diff options
Diffstat (limited to 'net/ipv4')
32 files changed, 3850 insertions, 1651 deletions
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 91d3d96805d0..b12dae2b0b2d 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c | |||
@@ -1029,6 +1029,11 @@ skip: | |||
1029 | } | 1029 | } |
1030 | } | 1030 | } |
1031 | 1031 | ||
1032 | static inline bool inetdev_valid_mtu(unsigned mtu) | ||
1033 | { | ||
1034 | return mtu >= 68; | ||
1035 | } | ||
1036 | |||
1032 | /* Called only under RTNL semaphore */ | 1037 | /* Called only under RTNL semaphore */ |
1033 | 1038 | ||
1034 | static int inetdev_event(struct notifier_block *this, unsigned long event, | 1039 | static int inetdev_event(struct notifier_block *this, unsigned long event, |
@@ -1048,6 +1053,10 @@ static int inetdev_event(struct notifier_block *this, unsigned long event, | |||
1048 | IN_DEV_CONF_SET(in_dev, NOXFRM, 1); | 1053 | IN_DEV_CONF_SET(in_dev, NOXFRM, 1); |
1049 | IN_DEV_CONF_SET(in_dev, NOPOLICY, 1); | 1054 | IN_DEV_CONF_SET(in_dev, NOPOLICY, 1); |
1050 | } | 1055 | } |
1056 | } else if (event == NETDEV_CHANGEMTU) { | ||
1057 | /* Re-enabling IP */ | ||
1058 | if (inetdev_valid_mtu(dev->mtu)) | ||
1059 | in_dev = inetdev_init(dev); | ||
1051 | } | 1060 | } |
1052 | goto out; | 1061 | goto out; |
1053 | } | 1062 | } |
@@ -1058,7 +1067,7 @@ static int inetdev_event(struct notifier_block *this, unsigned long event, | |||
1058 | dev->ip_ptr = NULL; | 1067 | dev->ip_ptr = NULL; |
1059 | break; | 1068 | break; |
1060 | case NETDEV_UP: | 1069 | case NETDEV_UP: |
1061 | if (dev->mtu < 68) | 1070 | if (!inetdev_valid_mtu(dev->mtu)) |
1062 | break; | 1071 | break; |
1063 | if (dev->flags & IFF_LOOPBACK) { | 1072 | if (dev->flags & IFF_LOOPBACK) { |
1064 | struct in_ifaddr *ifa; | 1073 | struct in_ifaddr *ifa; |
@@ -1080,9 +1089,9 @@ static int inetdev_event(struct notifier_block *this, unsigned long event, | |||
1080 | ip_mc_down(in_dev); | 1089 | ip_mc_down(in_dev); |
1081 | break; | 1090 | break; |
1082 | case NETDEV_CHANGEMTU: | 1091 | case NETDEV_CHANGEMTU: |
1083 | if (dev->mtu >= 68) | 1092 | if (inetdev_valid_mtu(dev->mtu)) |
1084 | break; | 1093 | break; |
1085 | /* MTU falled under 68, disable IP */ | 1094 | /* disable IP when MTU is not enough */ |
1086 | case NETDEV_UNREGISTER: | 1095 | case NETDEV_UNREGISTER: |
1087 | inetdev_destroy(in_dev); | 1096 | inetdev_destroy(in_dev); |
1088 | break; | 1097 | break; |
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index c10036e7a463..89cb047ab314 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c | |||
@@ -782,11 +782,15 @@ skip_listen_ht: | |||
782 | struct sock *sk; | 782 | struct sock *sk; |
783 | struct hlist_node *node; | 783 | struct hlist_node *node; |
784 | 784 | ||
785 | num = 0; | ||
786 | |||
787 | if (hlist_empty(&head->chain) && hlist_empty(&head->twchain)) | ||
788 | continue; | ||
789 | |||
785 | if (i > s_i) | 790 | if (i > s_i) |
786 | s_num = 0; | 791 | s_num = 0; |
787 | 792 | ||
788 | read_lock_bh(lock); | 793 | read_lock_bh(lock); |
789 | num = 0; | ||
790 | sk_for_each(sk, node, &head->chain) { | 794 | sk_for_each(sk, node, &head->chain) { |
791 | struct inet_sock *inet = inet_sk(sk); | 795 | struct inet_sock *inet = inet_sk(sk); |
792 | 796 | ||
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index d985bd613d25..743f011b9a84 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c | |||
@@ -409,3 +409,38 @@ out: | |||
409 | } | 409 | } |
410 | 410 | ||
411 | EXPORT_SYMBOL_GPL(inet_twdr_twcal_tick); | 411 | EXPORT_SYMBOL_GPL(inet_twdr_twcal_tick); |
412 | |||
413 | void inet_twsk_purge(struct net *net, struct inet_hashinfo *hashinfo, | ||
414 | struct inet_timewait_death_row *twdr, int family) | ||
415 | { | ||
416 | struct inet_timewait_sock *tw; | ||
417 | struct sock *sk; | ||
418 | struct hlist_node *node; | ||
419 | int h; | ||
420 | |||
421 | local_bh_disable(); | ||
422 | for (h = 0; h < (hashinfo->ehash_size); h++) { | ||
423 | struct inet_ehash_bucket *head = | ||
424 | inet_ehash_bucket(hashinfo, h); | ||
425 | rwlock_t *lock = inet_ehash_lockp(hashinfo, h); | ||
426 | restart: | ||
427 | write_lock(lock); | ||
428 | sk_for_each(sk, node, &head->twchain) { | ||
429 | |||
430 | tw = inet_twsk(sk); | ||
431 | if (!net_eq(twsk_net(tw), net) || | ||
432 | tw->tw_family != family) | ||
433 | continue; | ||
434 | |||
435 | atomic_inc(&tw->tw_refcnt); | ||
436 | write_unlock(lock); | ||
437 | inet_twsk_deschedule(tw, twdr); | ||
438 | inet_twsk_put(tw); | ||
439 | |||
440 | goto restart; | ||
441 | } | ||
442 | write_unlock(lock); | ||
443 | } | ||
444 | local_bh_enable(); | ||
445 | } | ||
446 | EXPORT_SYMBOL_GPL(inet_twsk_purge); | ||
diff --git a/net/ipv4/ipvs/Kconfig b/net/ipv4/ipvs/Kconfig index 09d0c3f35669..de6004de80bc 100644 --- a/net/ipv4/ipvs/Kconfig +++ b/net/ipv4/ipvs/Kconfig | |||
@@ -24,6 +24,14 @@ menuconfig IP_VS | |||
24 | 24 | ||
25 | if IP_VS | 25 | if IP_VS |
26 | 26 | ||
27 | config IP_VS_IPV6 | ||
28 | bool "IPv6 support for IPVS (DANGEROUS)" | ||
29 | depends on EXPERIMENTAL && (IPV6 = y || IP_VS = IPV6) | ||
30 | ---help--- | ||
31 | Add IPv6 support to IPVS. This is incomplete and might be dangerous. | ||
32 | |||
33 | Say N if unsure. | ||
34 | |||
27 | config IP_VS_DEBUG | 35 | config IP_VS_DEBUG |
28 | bool "IP virtual server debugging" | 36 | bool "IP virtual server debugging" |
29 | ---help--- | 37 | ---help--- |
@@ -33,7 +41,8 @@ config IP_VS_DEBUG | |||
33 | 41 | ||
34 | config IP_VS_TAB_BITS | 42 | config IP_VS_TAB_BITS |
35 | int "IPVS connection table size (the Nth power of 2)" | 43 | int "IPVS connection table size (the Nth power of 2)" |
36 | default "12" | 44 | range 8 20 |
45 | default 12 | ||
37 | ---help--- | 46 | ---help--- |
38 | The IPVS connection hash table uses the chaining scheme to handle | 47 | The IPVS connection hash table uses the chaining scheme to handle |
39 | hash collisions. Using a big IPVS connection hash table will greatly | 48 | hash collisions. Using a big IPVS connection hash table will greatly |
@@ -71,14 +80,20 @@ config IP_VS_PROTO_UDP | |||
71 | This option enables support for load balancing UDP transport | 80 | This option enables support for load balancing UDP transport |
72 | protocol. Say Y if unsure. | 81 | protocol. Say Y if unsure. |
73 | 82 | ||
83 | config IP_VS_PROTO_AH_ESP | ||
84 | bool | ||
85 | depends on UNDEFINED | ||
86 | |||
74 | config IP_VS_PROTO_ESP | 87 | config IP_VS_PROTO_ESP |
75 | bool "ESP load balancing support" | 88 | bool "ESP load balancing support" |
89 | select IP_VS_PROTO_AH_ESP | ||
76 | ---help--- | 90 | ---help--- |
77 | This option enables support for load balancing ESP (Encapsulation | 91 | This option enables support for load balancing ESP (Encapsulation |
78 | Security Payload) transport protocol. Say Y if unsure. | 92 | Security Payload) transport protocol. Say Y if unsure. |
79 | 93 | ||
80 | config IP_VS_PROTO_AH | 94 | config IP_VS_PROTO_AH |
81 | bool "AH load balancing support" | 95 | bool "AH load balancing support" |
96 | select IP_VS_PROTO_AH_ESP | ||
82 | ---help--- | 97 | ---help--- |
83 | This option enables support for load balancing AH (Authentication | 98 | This option enables support for load balancing AH (Authentication |
84 | Header) transport protocol. Say Y if unsure. | 99 | Header) transport protocol. Say Y if unsure. |
diff --git a/net/ipv4/ipvs/Makefile b/net/ipv4/ipvs/Makefile index 30e85de9ffff..73a46fe1fe4c 100644 --- a/net/ipv4/ipvs/Makefile +++ b/net/ipv4/ipvs/Makefile | |||
@@ -6,8 +6,7 @@ | |||
6 | ip_vs_proto-objs-y := | 6 | ip_vs_proto-objs-y := |
7 | ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_TCP) += ip_vs_proto_tcp.o | 7 | ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_TCP) += ip_vs_proto_tcp.o |
8 | ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_UDP) += ip_vs_proto_udp.o | 8 | ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_UDP) += ip_vs_proto_udp.o |
9 | ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_ESP) += ip_vs_proto_esp.o | 9 | ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_AH_ESP) += ip_vs_proto_ah_esp.o |
10 | ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_AH) += ip_vs_proto_ah.o | ||
11 | 10 | ||
12 | ip_vs-objs := ip_vs_conn.o ip_vs_core.o ip_vs_ctl.o ip_vs_sched.o \ | 11 | ip_vs-objs := ip_vs_conn.o ip_vs_core.o ip_vs_ctl.o ip_vs_sched.o \ |
13 | ip_vs_xmit.o ip_vs_app.o ip_vs_sync.o \ | 12 | ip_vs_xmit.o ip_vs_app.o ip_vs_sync.o \ |
diff --git a/net/ipv4/ipvs/ip_vs_conn.c b/net/ipv4/ipvs/ip_vs_conn.c index 44a6872dc245..9a24332fbed8 100644 --- a/net/ipv4/ipvs/ip_vs_conn.c +++ b/net/ipv4/ipvs/ip_vs_conn.c | |||
@@ -114,9 +114,18 @@ static inline void ct_write_unlock_bh(unsigned key) | |||
114 | /* | 114 | /* |
115 | * Returns hash value for IPVS connection entry | 115 | * Returns hash value for IPVS connection entry |
116 | */ | 116 | */ |
117 | static unsigned int ip_vs_conn_hashkey(unsigned proto, __be32 addr, __be16 port) | 117 | static unsigned int ip_vs_conn_hashkey(int af, unsigned proto, |
118 | const union nf_inet_addr *addr, | ||
119 | __be16 port) | ||
118 | { | 120 | { |
119 | return jhash_3words((__force u32)addr, (__force u32)port, proto, ip_vs_conn_rnd) | 121 | #ifdef CONFIG_IP_VS_IPV6 |
122 | if (af == AF_INET6) | ||
123 | return jhash_3words(jhash(addr, 16, ip_vs_conn_rnd), | ||
124 | (__force u32)port, proto, ip_vs_conn_rnd) | ||
125 | & IP_VS_CONN_TAB_MASK; | ||
126 | #endif | ||
127 | return jhash_3words((__force u32)addr->ip, (__force u32)port, proto, | ||
128 | ip_vs_conn_rnd) | ||
120 | & IP_VS_CONN_TAB_MASK; | 129 | & IP_VS_CONN_TAB_MASK; |
121 | } | 130 | } |
122 | 131 | ||
@@ -131,7 +140,7 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp) | |||
131 | int ret; | 140 | int ret; |
132 | 141 | ||
133 | /* Hash by protocol, client address and port */ | 142 | /* Hash by protocol, client address and port */ |
134 | hash = ip_vs_conn_hashkey(cp->protocol, cp->caddr, cp->cport); | 143 | hash = ip_vs_conn_hashkey(cp->af, cp->protocol, &cp->caddr, cp->cport); |
135 | 144 | ||
136 | ct_write_lock(hash); | 145 | ct_write_lock(hash); |
137 | 146 | ||
@@ -162,7 +171,7 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp) | |||
162 | int ret; | 171 | int ret; |
163 | 172 | ||
164 | /* unhash it and decrease its reference counter */ | 173 | /* unhash it and decrease its reference counter */ |
165 | hash = ip_vs_conn_hashkey(cp->protocol, cp->caddr, cp->cport); | 174 | hash = ip_vs_conn_hashkey(cp->af, cp->protocol, &cp->caddr, cp->cport); |
166 | 175 | ||
167 | ct_write_lock(hash); | 176 | ct_write_lock(hash); |
168 | 177 | ||
@@ -187,20 +196,23 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp) | |||
187 | * d_addr, d_port: pkt dest address (load balancer) | 196 | * d_addr, d_port: pkt dest address (load balancer) |
188 | */ | 197 | */ |
189 | static inline struct ip_vs_conn *__ip_vs_conn_in_get | 198 | static inline struct ip_vs_conn *__ip_vs_conn_in_get |
190 | (int protocol, __be32 s_addr, __be16 s_port, __be32 d_addr, __be16 d_port) | 199 | (int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port, |
200 | const union nf_inet_addr *d_addr, __be16 d_port) | ||
191 | { | 201 | { |
192 | unsigned hash; | 202 | unsigned hash; |
193 | struct ip_vs_conn *cp; | 203 | struct ip_vs_conn *cp; |
194 | 204 | ||
195 | hash = ip_vs_conn_hashkey(protocol, s_addr, s_port); | 205 | hash = ip_vs_conn_hashkey(af, protocol, s_addr, s_port); |
196 | 206 | ||
197 | ct_read_lock(hash); | 207 | ct_read_lock(hash); |
198 | 208 | ||
199 | list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { | 209 | list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { |
200 | if (s_addr==cp->caddr && s_port==cp->cport && | 210 | if (cp->af == af && |
201 | d_port==cp->vport && d_addr==cp->vaddr && | 211 | ip_vs_addr_equal(af, s_addr, &cp->caddr) && |
212 | ip_vs_addr_equal(af, d_addr, &cp->vaddr) && | ||
213 | s_port == cp->cport && d_port == cp->vport && | ||
202 | ((!s_port) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) && | 214 | ((!s_port) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) && |
203 | protocol==cp->protocol) { | 215 | protocol == cp->protocol) { |
204 | /* HIT */ | 216 | /* HIT */ |
205 | atomic_inc(&cp->refcnt); | 217 | atomic_inc(&cp->refcnt); |
206 | ct_read_unlock(hash); | 218 | ct_read_unlock(hash); |
@@ -214,39 +226,44 @@ static inline struct ip_vs_conn *__ip_vs_conn_in_get | |||
214 | } | 226 | } |
215 | 227 | ||
216 | struct ip_vs_conn *ip_vs_conn_in_get | 228 | struct ip_vs_conn *ip_vs_conn_in_get |
217 | (int protocol, __be32 s_addr, __be16 s_port, __be32 d_addr, __be16 d_port) | 229 | (int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port, |
230 | const union nf_inet_addr *d_addr, __be16 d_port) | ||
218 | { | 231 | { |
219 | struct ip_vs_conn *cp; | 232 | struct ip_vs_conn *cp; |
220 | 233 | ||
221 | cp = __ip_vs_conn_in_get(protocol, s_addr, s_port, d_addr, d_port); | 234 | cp = __ip_vs_conn_in_get(af, protocol, s_addr, s_port, d_addr, d_port); |
222 | if (!cp && atomic_read(&ip_vs_conn_no_cport_cnt)) | 235 | if (!cp && atomic_read(&ip_vs_conn_no_cport_cnt)) |
223 | cp = __ip_vs_conn_in_get(protocol, s_addr, 0, d_addr, d_port); | 236 | cp = __ip_vs_conn_in_get(af, protocol, s_addr, 0, d_addr, |
237 | d_port); | ||
224 | 238 | ||
225 | IP_VS_DBG(9, "lookup/in %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n", | 239 | IP_VS_DBG_BUF(9, "lookup/in %s %s:%d->%s:%d %s\n", |
226 | ip_vs_proto_name(protocol), | 240 | ip_vs_proto_name(protocol), |
227 | NIPQUAD(s_addr), ntohs(s_port), | 241 | IP_VS_DBG_ADDR(af, s_addr), ntohs(s_port), |
228 | NIPQUAD(d_addr), ntohs(d_port), | 242 | IP_VS_DBG_ADDR(af, d_addr), ntohs(d_port), |
229 | cp?"hit":"not hit"); | 243 | cp ? "hit" : "not hit"); |
230 | 244 | ||
231 | return cp; | 245 | return cp; |
232 | } | 246 | } |
233 | 247 | ||
234 | /* Get reference to connection template */ | 248 | /* Get reference to connection template */ |
235 | struct ip_vs_conn *ip_vs_ct_in_get | 249 | struct ip_vs_conn *ip_vs_ct_in_get |
236 | (int protocol, __be32 s_addr, __be16 s_port, __be32 d_addr, __be16 d_port) | 250 | (int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port, |
251 | const union nf_inet_addr *d_addr, __be16 d_port) | ||
237 | { | 252 | { |
238 | unsigned hash; | 253 | unsigned hash; |
239 | struct ip_vs_conn *cp; | 254 | struct ip_vs_conn *cp; |
240 | 255 | ||
241 | hash = ip_vs_conn_hashkey(protocol, s_addr, s_port); | 256 | hash = ip_vs_conn_hashkey(af, protocol, s_addr, s_port); |
242 | 257 | ||
243 | ct_read_lock(hash); | 258 | ct_read_lock(hash); |
244 | 259 | ||
245 | list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { | 260 | list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { |
246 | if (s_addr==cp->caddr && s_port==cp->cport && | 261 | if (cp->af == af && |
247 | d_port==cp->vport && d_addr==cp->vaddr && | 262 | ip_vs_addr_equal(af, s_addr, &cp->caddr) && |
263 | ip_vs_addr_equal(af, d_addr, &cp->vaddr) && | ||
264 | s_port == cp->cport && d_port == cp->vport && | ||
248 | cp->flags & IP_VS_CONN_F_TEMPLATE && | 265 | cp->flags & IP_VS_CONN_F_TEMPLATE && |
249 | protocol==cp->protocol) { | 266 | protocol == cp->protocol) { |
250 | /* HIT */ | 267 | /* HIT */ |
251 | atomic_inc(&cp->refcnt); | 268 | atomic_inc(&cp->refcnt); |
252 | goto out; | 269 | goto out; |
@@ -257,11 +274,11 @@ struct ip_vs_conn *ip_vs_ct_in_get | |||
257 | out: | 274 | out: |
258 | ct_read_unlock(hash); | 275 | ct_read_unlock(hash); |
259 | 276 | ||
260 | IP_VS_DBG(9, "template lookup/in %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n", | 277 | IP_VS_DBG_BUF(9, "template lookup/in %s %s:%d->%s:%d %s\n", |
261 | ip_vs_proto_name(protocol), | 278 | ip_vs_proto_name(protocol), |
262 | NIPQUAD(s_addr), ntohs(s_port), | 279 | IP_VS_DBG_ADDR(af, s_addr), ntohs(s_port), |
263 | NIPQUAD(d_addr), ntohs(d_port), | 280 | IP_VS_DBG_ADDR(af, d_addr), ntohs(d_port), |
264 | cp?"hit":"not hit"); | 281 | cp ? "hit" : "not hit"); |
265 | 282 | ||
266 | return cp; | 283 | return cp; |
267 | } | 284 | } |
@@ -273,7 +290,8 @@ struct ip_vs_conn *ip_vs_ct_in_get | |||
273 | * d_addr, d_port: pkt dest address (foreign host) | 290 | * d_addr, d_port: pkt dest address (foreign host) |
274 | */ | 291 | */ |
275 | struct ip_vs_conn *ip_vs_conn_out_get | 292 | struct ip_vs_conn *ip_vs_conn_out_get |
276 | (int protocol, __be32 s_addr, __be16 s_port, __be32 d_addr, __be16 d_port) | 293 | (int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port, |
294 | const union nf_inet_addr *d_addr, __be16 d_port) | ||
277 | { | 295 | { |
278 | unsigned hash; | 296 | unsigned hash; |
279 | struct ip_vs_conn *cp, *ret=NULL; | 297 | struct ip_vs_conn *cp, *ret=NULL; |
@@ -281,13 +299,15 @@ struct ip_vs_conn *ip_vs_conn_out_get | |||
281 | /* | 299 | /* |
282 | * Check for "full" addressed entries | 300 | * Check for "full" addressed entries |
283 | */ | 301 | */ |
284 | hash = ip_vs_conn_hashkey(protocol, d_addr, d_port); | 302 | hash = ip_vs_conn_hashkey(af, protocol, d_addr, d_port); |
285 | 303 | ||
286 | ct_read_lock(hash); | 304 | ct_read_lock(hash); |
287 | 305 | ||
288 | list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { | 306 | list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { |
289 | if (d_addr == cp->caddr && d_port == cp->cport && | 307 | if (cp->af == af && |
290 | s_port == cp->dport && s_addr == cp->daddr && | 308 | ip_vs_addr_equal(af, d_addr, &cp->caddr) && |
309 | ip_vs_addr_equal(af, s_addr, &cp->daddr) && | ||
310 | d_port == cp->cport && s_port == cp->dport && | ||
291 | protocol == cp->protocol) { | 311 | protocol == cp->protocol) { |
292 | /* HIT */ | 312 | /* HIT */ |
293 | atomic_inc(&cp->refcnt); | 313 | atomic_inc(&cp->refcnt); |
@@ -298,11 +318,11 @@ struct ip_vs_conn *ip_vs_conn_out_get | |||
298 | 318 | ||
299 | ct_read_unlock(hash); | 319 | ct_read_unlock(hash); |
300 | 320 | ||
301 | IP_VS_DBG(9, "lookup/out %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n", | 321 | IP_VS_DBG_BUF(9, "lookup/out %s %s:%d->%s:%d %s\n", |
302 | ip_vs_proto_name(protocol), | 322 | ip_vs_proto_name(protocol), |
303 | NIPQUAD(s_addr), ntohs(s_port), | 323 | IP_VS_DBG_ADDR(af, s_addr), ntohs(s_port), |
304 | NIPQUAD(d_addr), ntohs(d_port), | 324 | IP_VS_DBG_ADDR(af, d_addr), ntohs(d_port), |
305 | ret?"hit":"not hit"); | 325 | ret ? "hit" : "not hit"); |
306 | 326 | ||
307 | return ret; | 327 | return ret; |
308 | } | 328 | } |
@@ -369,6 +389,33 @@ static inline void ip_vs_bind_xmit(struct ip_vs_conn *cp) | |||
369 | } | 389 | } |
370 | } | 390 | } |
371 | 391 | ||
392 | #ifdef CONFIG_IP_VS_IPV6 | ||
393 | static inline void ip_vs_bind_xmit_v6(struct ip_vs_conn *cp) | ||
394 | { | ||
395 | switch (IP_VS_FWD_METHOD(cp)) { | ||
396 | case IP_VS_CONN_F_MASQ: | ||
397 | cp->packet_xmit = ip_vs_nat_xmit_v6; | ||
398 | break; | ||
399 | |||
400 | case IP_VS_CONN_F_TUNNEL: | ||
401 | cp->packet_xmit = ip_vs_tunnel_xmit_v6; | ||
402 | break; | ||
403 | |||
404 | case IP_VS_CONN_F_DROUTE: | ||
405 | cp->packet_xmit = ip_vs_dr_xmit_v6; | ||
406 | break; | ||
407 | |||
408 | case IP_VS_CONN_F_LOCALNODE: | ||
409 | cp->packet_xmit = ip_vs_null_xmit; | ||
410 | break; | ||
411 | |||
412 | case IP_VS_CONN_F_BYPASS: | ||
413 | cp->packet_xmit = ip_vs_bypass_xmit_v6; | ||
414 | break; | ||
415 | } | ||
416 | } | ||
417 | #endif | ||
418 | |||
372 | 419 | ||
373 | static inline int ip_vs_dest_totalconns(struct ip_vs_dest *dest) | 420 | static inline int ip_vs_dest_totalconns(struct ip_vs_dest *dest) |
374 | { | 421 | { |
@@ -402,16 +449,16 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest) | |||
402 | cp->flags |= atomic_read(&dest->conn_flags); | 449 | cp->flags |= atomic_read(&dest->conn_flags); |
403 | cp->dest = dest; | 450 | cp->dest = dest; |
404 | 451 | ||
405 | IP_VS_DBG(7, "Bind-dest %s c:%u.%u.%u.%u:%d v:%u.%u.%u.%u:%d " | 452 | IP_VS_DBG_BUF(7, "Bind-dest %s c:%s:%d v:%s:%d " |
406 | "d:%u.%u.%u.%u:%d fwd:%c s:%u conn->flags:%X conn->refcnt:%d " | 453 | "d:%s:%d fwd:%c s:%u conn->flags:%X conn->refcnt:%d " |
407 | "dest->refcnt:%d\n", | 454 | "dest->refcnt:%d\n", |
408 | ip_vs_proto_name(cp->protocol), | 455 | ip_vs_proto_name(cp->protocol), |
409 | NIPQUAD(cp->caddr), ntohs(cp->cport), | 456 | IP_VS_DBG_ADDR(cp->af, &cp->caddr), ntohs(cp->cport), |
410 | NIPQUAD(cp->vaddr), ntohs(cp->vport), | 457 | IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport), |
411 | NIPQUAD(cp->daddr), ntohs(cp->dport), | 458 | IP_VS_DBG_ADDR(cp->af, &cp->daddr), ntohs(cp->dport), |
412 | ip_vs_fwd_tag(cp), cp->state, | 459 | ip_vs_fwd_tag(cp), cp->state, |
413 | cp->flags, atomic_read(&cp->refcnt), | 460 | cp->flags, atomic_read(&cp->refcnt), |
414 | atomic_read(&dest->refcnt)); | 461 | atomic_read(&dest->refcnt)); |
415 | 462 | ||
416 | /* Update the connection counters */ | 463 | /* Update the connection counters */ |
417 | if (!(cp->flags & IP_VS_CONN_F_TEMPLATE)) { | 464 | if (!(cp->flags & IP_VS_CONN_F_TEMPLATE)) { |
@@ -444,8 +491,9 @@ struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp) | |||
444 | struct ip_vs_dest *dest; | 491 | struct ip_vs_dest *dest; |
445 | 492 | ||
446 | if ((cp) && (!cp->dest)) { | 493 | if ((cp) && (!cp->dest)) { |
447 | dest = ip_vs_find_dest(cp->daddr, cp->dport, | 494 | dest = ip_vs_find_dest(cp->af, &cp->daddr, cp->dport, |
448 | cp->vaddr, cp->vport, cp->protocol); | 495 | &cp->vaddr, cp->vport, |
496 | cp->protocol); | ||
449 | ip_vs_bind_dest(cp, dest); | 497 | ip_vs_bind_dest(cp, dest); |
450 | return dest; | 498 | return dest; |
451 | } else | 499 | } else |
@@ -464,16 +512,16 @@ static inline void ip_vs_unbind_dest(struct ip_vs_conn *cp) | |||
464 | if (!dest) | 512 | if (!dest) |
465 | return; | 513 | return; |
466 | 514 | ||
467 | IP_VS_DBG(7, "Unbind-dest %s c:%u.%u.%u.%u:%d v:%u.%u.%u.%u:%d " | 515 | IP_VS_DBG_BUF(7, "Unbind-dest %s c:%s:%d v:%s:%d " |
468 | "d:%u.%u.%u.%u:%d fwd:%c s:%u conn->flags:%X conn->refcnt:%d " | 516 | "d:%s:%d fwd:%c s:%u conn->flags:%X conn->refcnt:%d " |
469 | "dest->refcnt:%d\n", | 517 | "dest->refcnt:%d\n", |
470 | ip_vs_proto_name(cp->protocol), | 518 | ip_vs_proto_name(cp->protocol), |
471 | NIPQUAD(cp->caddr), ntohs(cp->cport), | 519 | IP_VS_DBG_ADDR(cp->af, &cp->caddr), ntohs(cp->cport), |
472 | NIPQUAD(cp->vaddr), ntohs(cp->vport), | 520 | IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport), |
473 | NIPQUAD(cp->daddr), ntohs(cp->dport), | 521 | IP_VS_DBG_ADDR(cp->af, &cp->daddr), ntohs(cp->dport), |
474 | ip_vs_fwd_tag(cp), cp->state, | 522 | ip_vs_fwd_tag(cp), cp->state, |
475 | cp->flags, atomic_read(&cp->refcnt), | 523 | cp->flags, atomic_read(&cp->refcnt), |
476 | atomic_read(&dest->refcnt)); | 524 | atomic_read(&dest->refcnt)); |
477 | 525 | ||
478 | /* Update the connection counters */ | 526 | /* Update the connection counters */ |
479 | if (!(cp->flags & IP_VS_CONN_F_TEMPLATE)) { | 527 | if (!(cp->flags & IP_VS_CONN_F_TEMPLATE)) { |
@@ -526,13 +574,16 @@ int ip_vs_check_template(struct ip_vs_conn *ct) | |||
526 | !(dest->flags & IP_VS_DEST_F_AVAILABLE) || | 574 | !(dest->flags & IP_VS_DEST_F_AVAILABLE) || |
527 | (sysctl_ip_vs_expire_quiescent_template && | 575 | (sysctl_ip_vs_expire_quiescent_template && |
528 | (atomic_read(&dest->weight) == 0))) { | 576 | (atomic_read(&dest->weight) == 0))) { |
529 | IP_VS_DBG(9, "check_template: dest not available for " | 577 | IP_VS_DBG_BUF(9, "check_template: dest not available for " |
530 | "protocol %s s:%u.%u.%u.%u:%d v:%u.%u.%u.%u:%d " | 578 | "protocol %s s:%s:%d v:%s:%d " |
531 | "-> d:%u.%u.%u.%u:%d\n", | 579 | "-> d:%s:%d\n", |
532 | ip_vs_proto_name(ct->protocol), | 580 | ip_vs_proto_name(ct->protocol), |
533 | NIPQUAD(ct->caddr), ntohs(ct->cport), | 581 | IP_VS_DBG_ADDR(ct->af, &ct->caddr), |
534 | NIPQUAD(ct->vaddr), ntohs(ct->vport), | 582 | ntohs(ct->cport), |
535 | NIPQUAD(ct->daddr), ntohs(ct->dport)); | 583 | IP_VS_DBG_ADDR(ct->af, &ct->vaddr), |
584 | ntohs(ct->vport), | ||
585 | IP_VS_DBG_ADDR(ct->af, &ct->daddr), | ||
586 | ntohs(ct->dport)); | ||
536 | 587 | ||
537 | /* | 588 | /* |
538 | * Invalidate the connection template | 589 | * Invalidate the connection template |
@@ -625,8 +676,9 @@ void ip_vs_conn_expire_now(struct ip_vs_conn *cp) | |||
625 | * Create a new connection entry and hash it into the ip_vs_conn_tab | 676 | * Create a new connection entry and hash it into the ip_vs_conn_tab |
626 | */ | 677 | */ |
627 | struct ip_vs_conn * | 678 | struct ip_vs_conn * |
628 | ip_vs_conn_new(int proto, __be32 caddr, __be16 cport, __be32 vaddr, __be16 vport, | 679 | ip_vs_conn_new(int af, int proto, const union nf_inet_addr *caddr, __be16 cport, |
629 | __be32 daddr, __be16 dport, unsigned flags, | 680 | const union nf_inet_addr *vaddr, __be16 vport, |
681 | const union nf_inet_addr *daddr, __be16 dport, unsigned flags, | ||
630 | struct ip_vs_dest *dest) | 682 | struct ip_vs_dest *dest) |
631 | { | 683 | { |
632 | struct ip_vs_conn *cp; | 684 | struct ip_vs_conn *cp; |
@@ -640,12 +692,13 @@ ip_vs_conn_new(int proto, __be32 caddr, __be16 cport, __be32 vaddr, __be16 vport | |||
640 | 692 | ||
641 | INIT_LIST_HEAD(&cp->c_list); | 693 | INIT_LIST_HEAD(&cp->c_list); |
642 | setup_timer(&cp->timer, ip_vs_conn_expire, (unsigned long)cp); | 694 | setup_timer(&cp->timer, ip_vs_conn_expire, (unsigned long)cp); |
695 | cp->af = af; | ||
643 | cp->protocol = proto; | 696 | cp->protocol = proto; |
644 | cp->caddr = caddr; | 697 | ip_vs_addr_copy(af, &cp->caddr, caddr); |
645 | cp->cport = cport; | 698 | cp->cport = cport; |
646 | cp->vaddr = vaddr; | 699 | ip_vs_addr_copy(af, &cp->vaddr, vaddr); |
647 | cp->vport = vport; | 700 | cp->vport = vport; |
648 | cp->daddr = daddr; | 701 | ip_vs_addr_copy(af, &cp->daddr, daddr); |
649 | cp->dport = dport; | 702 | cp->dport = dport; |
650 | cp->flags = flags; | 703 | cp->flags = flags; |
651 | spin_lock_init(&cp->lock); | 704 | spin_lock_init(&cp->lock); |
@@ -672,7 +725,12 @@ ip_vs_conn_new(int proto, __be32 caddr, __be16 cport, __be32 vaddr, __be16 vport | |||
672 | cp->timeout = 3*HZ; | 725 | cp->timeout = 3*HZ; |
673 | 726 | ||
674 | /* Bind its packet transmitter */ | 727 | /* Bind its packet transmitter */ |
675 | ip_vs_bind_xmit(cp); | 728 | #ifdef CONFIG_IP_VS_IPV6 |
729 | if (af == AF_INET6) | ||
730 | ip_vs_bind_xmit_v6(cp); | ||
731 | else | ||
732 | #endif | ||
733 | ip_vs_bind_xmit(cp); | ||
676 | 734 | ||
677 | if (unlikely(pp && atomic_read(&pp->appcnt))) | 735 | if (unlikely(pp && atomic_read(&pp->appcnt))) |
678 | ip_vs_bind_app(cp, pp); | 736 | ip_vs_bind_app(cp, pp); |
@@ -760,12 +818,26 @@ static int ip_vs_conn_seq_show(struct seq_file *seq, void *v) | |||
760 | else { | 818 | else { |
761 | const struct ip_vs_conn *cp = v; | 819 | const struct ip_vs_conn *cp = v; |
762 | 820 | ||
763 | seq_printf(seq, | 821 | #ifdef CONFIG_IP_VS_IPV6 |
764 | "%-3s %08X %04X %08X %04X %08X %04X %-11s %7lu\n", | 822 | if (cp->af == AF_INET6) |
823 | seq_printf(seq, | ||
824 | "%-3s " NIP6_FMT " %04X " NIP6_FMT | ||
825 | " %04X " NIP6_FMT " %04X %-11s %7lu\n", | ||
826 | ip_vs_proto_name(cp->protocol), | ||
827 | NIP6(cp->caddr.in6), ntohs(cp->cport), | ||
828 | NIP6(cp->vaddr.in6), ntohs(cp->vport), | ||
829 | NIP6(cp->daddr.in6), ntohs(cp->dport), | ||
830 | ip_vs_state_name(cp->protocol, cp->state), | ||
831 | (cp->timer.expires-jiffies)/HZ); | ||
832 | else | ||
833 | #endif | ||
834 | seq_printf(seq, | ||
835 | "%-3s %08X %04X %08X %04X" | ||
836 | " %08X %04X %-11s %7lu\n", | ||
765 | ip_vs_proto_name(cp->protocol), | 837 | ip_vs_proto_name(cp->protocol), |
766 | ntohl(cp->caddr), ntohs(cp->cport), | 838 | ntohl(cp->caddr.ip), ntohs(cp->cport), |
767 | ntohl(cp->vaddr), ntohs(cp->vport), | 839 | ntohl(cp->vaddr.ip), ntohs(cp->vport), |
768 | ntohl(cp->daddr), ntohs(cp->dport), | 840 | ntohl(cp->daddr.ip), ntohs(cp->dport), |
769 | ip_vs_state_name(cp->protocol, cp->state), | 841 | ip_vs_state_name(cp->protocol, cp->state), |
770 | (cp->timer.expires-jiffies)/HZ); | 842 | (cp->timer.expires-jiffies)/HZ); |
771 | } | 843 | } |
@@ -809,12 +881,27 @@ static int ip_vs_conn_sync_seq_show(struct seq_file *seq, void *v) | |||
809 | else { | 881 | else { |
810 | const struct ip_vs_conn *cp = v; | 882 | const struct ip_vs_conn *cp = v; |
811 | 883 | ||
812 | seq_printf(seq, | 884 | #ifdef CONFIG_IP_VS_IPV6 |
813 | "%-3s %08X %04X %08X %04X %08X %04X %-11s %-6s %7lu\n", | 885 | if (cp->af == AF_INET6) |
886 | seq_printf(seq, | ||
887 | "%-3s " NIP6_FMT " %04X " NIP6_FMT | ||
888 | " %04X " NIP6_FMT " %04X %-11s %-6s %7lu\n", | ||
889 | ip_vs_proto_name(cp->protocol), | ||
890 | NIP6(cp->caddr.in6), ntohs(cp->cport), | ||
891 | NIP6(cp->vaddr.in6), ntohs(cp->vport), | ||
892 | NIP6(cp->daddr.in6), ntohs(cp->dport), | ||
893 | ip_vs_state_name(cp->protocol, cp->state), | ||
894 | ip_vs_origin_name(cp->flags), | ||
895 | (cp->timer.expires-jiffies)/HZ); | ||
896 | else | ||
897 | #endif | ||
898 | seq_printf(seq, | ||
899 | "%-3s %08X %04X %08X %04X " | ||
900 | "%08X %04X %-11s %-6s %7lu\n", | ||
814 | ip_vs_proto_name(cp->protocol), | 901 | ip_vs_proto_name(cp->protocol), |
815 | ntohl(cp->caddr), ntohs(cp->cport), | 902 | ntohl(cp->caddr.ip), ntohs(cp->cport), |
816 | ntohl(cp->vaddr), ntohs(cp->vport), | 903 | ntohl(cp->vaddr.ip), ntohs(cp->vport), |
817 | ntohl(cp->daddr), ntohs(cp->dport), | 904 | ntohl(cp->daddr.ip), ntohs(cp->dport), |
818 | ip_vs_state_name(cp->protocol, cp->state), | 905 | ip_vs_state_name(cp->protocol, cp->state), |
819 | ip_vs_origin_name(cp->flags), | 906 | ip_vs_origin_name(cp->flags), |
820 | (cp->timer.expires-jiffies)/HZ); | 907 | (cp->timer.expires-jiffies)/HZ); |
diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c index a7879eafc3b5..958abf3e5f8c 100644 --- a/net/ipv4/ipvs/ip_vs_core.c +++ b/net/ipv4/ipvs/ip_vs_core.c | |||
@@ -39,6 +39,11 @@ | |||
39 | #include <linux/netfilter.h> | 39 | #include <linux/netfilter.h> |
40 | #include <linux/netfilter_ipv4.h> | 40 | #include <linux/netfilter_ipv4.h> |
41 | 41 | ||
42 | #ifdef CONFIG_IP_VS_IPV6 | ||
43 | #include <net/ipv6.h> | ||
44 | #include <linux/netfilter_ipv6.h> | ||
45 | #endif | ||
46 | |||
42 | #include <net/ip_vs.h> | 47 | #include <net/ip_vs.h> |
43 | 48 | ||
44 | 49 | ||
@@ -60,6 +65,7 @@ EXPORT_SYMBOL(ip_vs_get_debug_level); | |||
60 | 65 | ||
61 | /* ID used in ICMP lookups */ | 66 | /* ID used in ICMP lookups */ |
62 | #define icmp_id(icmph) (((icmph)->un).echo.id) | 67 | #define icmp_id(icmph) (((icmph)->un).echo.id) |
68 | #define icmpv6_id(icmph) (icmph->icmp6_dataun.u_echo.identifier) | ||
63 | 69 | ||
64 | const char *ip_vs_proto_name(unsigned proto) | 70 | const char *ip_vs_proto_name(unsigned proto) |
65 | { | 71 | { |
@@ -74,6 +80,10 @@ const char *ip_vs_proto_name(unsigned proto) | |||
74 | return "TCP"; | 80 | return "TCP"; |
75 | case IPPROTO_ICMP: | 81 | case IPPROTO_ICMP: |
76 | return "ICMP"; | 82 | return "ICMP"; |
83 | #ifdef CONFIG_IP_VS_IPV6 | ||
84 | case IPPROTO_ICMPV6: | ||
85 | return "ICMPv6"; | ||
86 | #endif | ||
77 | default: | 87 | default: |
78 | sprintf(buf, "IP_%d", proto); | 88 | sprintf(buf, "IP_%d", proto); |
79 | return buf; | 89 | return buf; |
@@ -92,18 +102,18 @@ ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb) | |||
92 | struct ip_vs_dest *dest = cp->dest; | 102 | struct ip_vs_dest *dest = cp->dest; |
93 | if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) { | 103 | if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) { |
94 | spin_lock(&dest->stats.lock); | 104 | spin_lock(&dest->stats.lock); |
95 | dest->stats.inpkts++; | 105 | dest->stats.ustats.inpkts++; |
96 | dest->stats.inbytes += skb->len; | 106 | dest->stats.ustats.inbytes += skb->len; |
97 | spin_unlock(&dest->stats.lock); | 107 | spin_unlock(&dest->stats.lock); |
98 | 108 | ||
99 | spin_lock(&dest->svc->stats.lock); | 109 | spin_lock(&dest->svc->stats.lock); |
100 | dest->svc->stats.inpkts++; | 110 | dest->svc->stats.ustats.inpkts++; |
101 | dest->svc->stats.inbytes += skb->len; | 111 | dest->svc->stats.ustats.inbytes += skb->len; |
102 | spin_unlock(&dest->svc->stats.lock); | 112 | spin_unlock(&dest->svc->stats.lock); |
103 | 113 | ||
104 | spin_lock(&ip_vs_stats.lock); | 114 | spin_lock(&ip_vs_stats.lock); |
105 | ip_vs_stats.inpkts++; | 115 | ip_vs_stats.ustats.inpkts++; |
106 | ip_vs_stats.inbytes += skb->len; | 116 | ip_vs_stats.ustats.inbytes += skb->len; |
107 | spin_unlock(&ip_vs_stats.lock); | 117 | spin_unlock(&ip_vs_stats.lock); |
108 | } | 118 | } |
109 | } | 119 | } |
@@ -115,18 +125,18 @@ ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb) | |||
115 | struct ip_vs_dest *dest = cp->dest; | 125 | struct ip_vs_dest *dest = cp->dest; |
116 | if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) { | 126 | if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) { |
117 | spin_lock(&dest->stats.lock); | 127 | spin_lock(&dest->stats.lock); |
118 | dest->stats.outpkts++; | 128 | dest->stats.ustats.outpkts++; |
119 | dest->stats.outbytes += skb->len; | 129 | dest->stats.ustats.outbytes += skb->len; |
120 | spin_unlock(&dest->stats.lock); | 130 | spin_unlock(&dest->stats.lock); |
121 | 131 | ||
122 | spin_lock(&dest->svc->stats.lock); | 132 | spin_lock(&dest->svc->stats.lock); |
123 | dest->svc->stats.outpkts++; | 133 | dest->svc->stats.ustats.outpkts++; |
124 | dest->svc->stats.outbytes += skb->len; | 134 | dest->svc->stats.ustats.outbytes += skb->len; |
125 | spin_unlock(&dest->svc->stats.lock); | 135 | spin_unlock(&dest->svc->stats.lock); |
126 | 136 | ||
127 | spin_lock(&ip_vs_stats.lock); | 137 | spin_lock(&ip_vs_stats.lock); |
128 | ip_vs_stats.outpkts++; | 138 | ip_vs_stats.ustats.outpkts++; |
129 | ip_vs_stats.outbytes += skb->len; | 139 | ip_vs_stats.ustats.outbytes += skb->len; |
130 | spin_unlock(&ip_vs_stats.lock); | 140 | spin_unlock(&ip_vs_stats.lock); |
131 | } | 141 | } |
132 | } | 142 | } |
@@ -136,15 +146,15 @@ static inline void | |||
136 | ip_vs_conn_stats(struct ip_vs_conn *cp, struct ip_vs_service *svc) | 146 | ip_vs_conn_stats(struct ip_vs_conn *cp, struct ip_vs_service *svc) |
137 | { | 147 | { |
138 | spin_lock(&cp->dest->stats.lock); | 148 | spin_lock(&cp->dest->stats.lock); |
139 | cp->dest->stats.conns++; | 149 | cp->dest->stats.ustats.conns++; |
140 | spin_unlock(&cp->dest->stats.lock); | 150 | spin_unlock(&cp->dest->stats.lock); |
141 | 151 | ||
142 | spin_lock(&svc->stats.lock); | 152 | spin_lock(&svc->stats.lock); |
143 | svc->stats.conns++; | 153 | svc->stats.ustats.conns++; |
144 | spin_unlock(&svc->stats.lock); | 154 | spin_unlock(&svc->stats.lock); |
145 | 155 | ||
146 | spin_lock(&ip_vs_stats.lock); | 156 | spin_lock(&ip_vs_stats.lock); |
147 | ip_vs_stats.conns++; | 157 | ip_vs_stats.ustats.conns++; |
148 | spin_unlock(&ip_vs_stats.lock); | 158 | spin_unlock(&ip_vs_stats.lock); |
149 | } | 159 | } |
150 | 160 | ||
@@ -173,20 +183,28 @@ ip_vs_sched_persist(struct ip_vs_service *svc, | |||
173 | __be16 ports[2]) | 183 | __be16 ports[2]) |
174 | { | 184 | { |
175 | struct ip_vs_conn *cp = NULL; | 185 | struct ip_vs_conn *cp = NULL; |
176 | struct iphdr *iph = ip_hdr(skb); | 186 | struct ip_vs_iphdr iph; |
177 | struct ip_vs_dest *dest; | 187 | struct ip_vs_dest *dest; |
178 | struct ip_vs_conn *ct; | 188 | struct ip_vs_conn *ct; |
179 | __be16 dport; /* destination port to forward */ | 189 | __be16 dport; /* destination port to forward */ |
180 | __be32 snet; /* source network of the client, after masking */ | 190 | union nf_inet_addr snet; /* source network of the client, |
191 | after masking */ | ||
192 | |||
193 | ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph); | ||
181 | 194 | ||
182 | /* Mask saddr with the netmask to adjust template granularity */ | 195 | /* Mask saddr with the netmask to adjust template granularity */ |
183 | snet = iph->saddr & svc->netmask; | 196 | #ifdef CONFIG_IP_VS_IPV6 |
197 | if (svc->af == AF_INET6) | ||
198 | ipv6_addr_prefix(&snet.in6, &iph.saddr.in6, svc->netmask); | ||
199 | else | ||
200 | #endif | ||
201 | snet.ip = iph.saddr.ip & svc->netmask; | ||
184 | 202 | ||
185 | IP_VS_DBG(6, "p-schedule: src %u.%u.%u.%u:%u dest %u.%u.%u.%u:%u " | 203 | IP_VS_DBG_BUF(6, "p-schedule: src %s:%u dest %s:%u " |
186 | "mnet %u.%u.%u.%u\n", | 204 | "mnet %s\n", |
187 | NIPQUAD(iph->saddr), ntohs(ports[0]), | 205 | IP_VS_DBG_ADDR(svc->af, &iph.saddr), ntohs(ports[0]), |
188 | NIPQUAD(iph->daddr), ntohs(ports[1]), | 206 | IP_VS_DBG_ADDR(svc->af, &iph.daddr), ntohs(ports[1]), |
189 | NIPQUAD(snet)); | 207 | IP_VS_DBG_ADDR(svc->af, &snet)); |
190 | 208 | ||
191 | /* | 209 | /* |
192 | * As far as we know, FTP is a very complicated network protocol, and | 210 | * As far as we know, FTP is a very complicated network protocol, and |
@@ -204,11 +222,11 @@ ip_vs_sched_persist(struct ip_vs_service *svc, | |||
204 | if (ports[1] == svc->port) { | 222 | if (ports[1] == svc->port) { |
205 | /* Check if a template already exists */ | 223 | /* Check if a template already exists */ |
206 | if (svc->port != FTPPORT) | 224 | if (svc->port != FTPPORT) |
207 | ct = ip_vs_ct_in_get(iph->protocol, snet, 0, | 225 | ct = ip_vs_ct_in_get(svc->af, iph.protocol, &snet, 0, |
208 | iph->daddr, ports[1]); | 226 | &iph.daddr, ports[1]); |
209 | else | 227 | else |
210 | ct = ip_vs_ct_in_get(iph->protocol, snet, 0, | 228 | ct = ip_vs_ct_in_get(svc->af, iph.protocol, &snet, 0, |
211 | iph->daddr, 0); | 229 | &iph.daddr, 0); |
212 | 230 | ||
213 | if (!ct || !ip_vs_check_template(ct)) { | 231 | if (!ct || !ip_vs_check_template(ct)) { |
214 | /* | 232 | /* |
@@ -228,18 +246,18 @@ ip_vs_sched_persist(struct ip_vs_service *svc, | |||
228 | * for ftp service. | 246 | * for ftp service. |
229 | */ | 247 | */ |
230 | if (svc->port != FTPPORT) | 248 | if (svc->port != FTPPORT) |
231 | ct = ip_vs_conn_new(iph->protocol, | 249 | ct = ip_vs_conn_new(svc->af, iph.protocol, |
232 | snet, 0, | 250 | &snet, 0, |
233 | iph->daddr, | 251 | &iph.daddr, |
234 | ports[1], | 252 | ports[1], |
235 | dest->addr, dest->port, | 253 | &dest->addr, dest->port, |
236 | IP_VS_CONN_F_TEMPLATE, | 254 | IP_VS_CONN_F_TEMPLATE, |
237 | dest); | 255 | dest); |
238 | else | 256 | else |
239 | ct = ip_vs_conn_new(iph->protocol, | 257 | ct = ip_vs_conn_new(svc->af, iph.protocol, |
240 | snet, 0, | 258 | &snet, 0, |
241 | iph->daddr, 0, | 259 | &iph.daddr, 0, |
242 | dest->addr, 0, | 260 | &dest->addr, 0, |
243 | IP_VS_CONN_F_TEMPLATE, | 261 | IP_VS_CONN_F_TEMPLATE, |
244 | dest); | 262 | dest); |
245 | if (ct == NULL) | 263 | if (ct == NULL) |
@@ -258,12 +276,16 @@ ip_vs_sched_persist(struct ip_vs_service *svc, | |||
258 | * fwmark template: <IPPROTO_IP,caddr,0,fwmark,0,daddr,0> | 276 | * fwmark template: <IPPROTO_IP,caddr,0,fwmark,0,daddr,0> |
259 | * port zero template: <protocol,caddr,0,vaddr,0,daddr,0> | 277 | * port zero template: <protocol,caddr,0,vaddr,0,daddr,0> |
260 | */ | 278 | */ |
261 | if (svc->fwmark) | 279 | if (svc->fwmark) { |
262 | ct = ip_vs_ct_in_get(IPPROTO_IP, snet, 0, | 280 | union nf_inet_addr fwmark = { |
263 | htonl(svc->fwmark), 0); | 281 | .all = { 0, 0, 0, htonl(svc->fwmark) } |
264 | else | 282 | }; |
265 | ct = ip_vs_ct_in_get(iph->protocol, snet, 0, | 283 | |
266 | iph->daddr, 0); | 284 | ct = ip_vs_ct_in_get(svc->af, IPPROTO_IP, &snet, 0, |
285 | &fwmark, 0); | ||
286 | } else | ||
287 | ct = ip_vs_ct_in_get(svc->af, iph.protocol, &snet, 0, | ||
288 | &iph.daddr, 0); | ||
267 | 289 | ||
268 | if (!ct || !ip_vs_check_template(ct)) { | 290 | if (!ct || !ip_vs_check_template(ct)) { |
269 | /* | 291 | /* |
@@ -282,18 +304,22 @@ ip_vs_sched_persist(struct ip_vs_service *svc, | |||
282 | /* | 304 | /* |
283 | * Create a template according to the service | 305 | * Create a template according to the service |
284 | */ | 306 | */ |
285 | if (svc->fwmark) | 307 | if (svc->fwmark) { |
286 | ct = ip_vs_conn_new(IPPROTO_IP, | 308 | union nf_inet_addr fwmark = { |
287 | snet, 0, | 309 | .all = { 0, 0, 0, htonl(svc->fwmark) } |
288 | htonl(svc->fwmark), 0, | 310 | }; |
289 | dest->addr, 0, | 311 | |
312 | ct = ip_vs_conn_new(svc->af, IPPROTO_IP, | ||
313 | &snet, 0, | ||
314 | &fwmark, 0, | ||
315 | &dest->addr, 0, | ||
290 | IP_VS_CONN_F_TEMPLATE, | 316 | IP_VS_CONN_F_TEMPLATE, |
291 | dest); | 317 | dest); |
292 | else | 318 | } else |
293 | ct = ip_vs_conn_new(iph->protocol, | 319 | ct = ip_vs_conn_new(svc->af, iph.protocol, |
294 | snet, 0, | 320 | &snet, 0, |
295 | iph->daddr, 0, | 321 | &iph.daddr, 0, |
296 | dest->addr, 0, | 322 | &dest->addr, 0, |
297 | IP_VS_CONN_F_TEMPLATE, | 323 | IP_VS_CONN_F_TEMPLATE, |
298 | dest); | 324 | dest); |
299 | if (ct == NULL) | 325 | if (ct == NULL) |
@@ -310,10 +336,10 @@ ip_vs_sched_persist(struct ip_vs_service *svc, | |||
310 | /* | 336 | /* |
311 | * Create a new connection according to the template | 337 | * Create a new connection according to the template |
312 | */ | 338 | */ |
313 | cp = ip_vs_conn_new(iph->protocol, | 339 | cp = ip_vs_conn_new(svc->af, iph.protocol, |
314 | iph->saddr, ports[0], | 340 | &iph.saddr, ports[0], |
315 | iph->daddr, ports[1], | 341 | &iph.daddr, ports[1], |
316 | dest->addr, dport, | 342 | &dest->addr, dport, |
317 | 0, | 343 | 0, |
318 | dest); | 344 | dest); |
319 | if (cp == NULL) { | 345 | if (cp == NULL) { |
@@ -342,12 +368,12 @@ struct ip_vs_conn * | |||
342 | ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) | 368 | ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) |
343 | { | 369 | { |
344 | struct ip_vs_conn *cp = NULL; | 370 | struct ip_vs_conn *cp = NULL; |
345 | struct iphdr *iph = ip_hdr(skb); | 371 | struct ip_vs_iphdr iph; |
346 | struct ip_vs_dest *dest; | 372 | struct ip_vs_dest *dest; |
347 | __be16 _ports[2], *pptr; | 373 | __be16 _ports[2], *pptr; |
348 | 374 | ||
349 | pptr = skb_header_pointer(skb, iph->ihl*4, | 375 | ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph); |
350 | sizeof(_ports), _ports); | 376 | pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports); |
351 | if (pptr == NULL) | 377 | if (pptr == NULL) |
352 | return NULL; | 378 | return NULL; |
353 | 379 | ||
@@ -377,22 +403,22 @@ ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) | |||
377 | /* | 403 | /* |
378 | * Create a connection entry. | 404 | * Create a connection entry. |
379 | */ | 405 | */ |
380 | cp = ip_vs_conn_new(iph->protocol, | 406 | cp = ip_vs_conn_new(svc->af, iph.protocol, |
381 | iph->saddr, pptr[0], | 407 | &iph.saddr, pptr[0], |
382 | iph->daddr, pptr[1], | 408 | &iph.daddr, pptr[1], |
383 | dest->addr, dest->port?dest->port:pptr[1], | 409 | &dest->addr, dest->port ? dest->port : pptr[1], |
384 | 0, | 410 | 0, |
385 | dest); | 411 | dest); |
386 | if (cp == NULL) | 412 | if (cp == NULL) |
387 | return NULL; | 413 | return NULL; |
388 | 414 | ||
389 | IP_VS_DBG(6, "Schedule fwd:%c c:%u.%u.%u.%u:%u v:%u.%u.%u.%u:%u " | 415 | IP_VS_DBG_BUF(6, "Schedule fwd:%c c:%s:%u v:%s:%u " |
390 | "d:%u.%u.%u.%u:%u conn->flags:%X conn->refcnt:%d\n", | 416 | "d:%s:%u conn->flags:%X conn->refcnt:%d\n", |
391 | ip_vs_fwd_tag(cp), | 417 | ip_vs_fwd_tag(cp), |
392 | NIPQUAD(cp->caddr), ntohs(cp->cport), | 418 | IP_VS_DBG_ADDR(svc->af, &cp->caddr), ntohs(cp->cport), |
393 | NIPQUAD(cp->vaddr), ntohs(cp->vport), | 419 | IP_VS_DBG_ADDR(svc->af, &cp->vaddr), ntohs(cp->vport), |
394 | NIPQUAD(cp->daddr), ntohs(cp->dport), | 420 | IP_VS_DBG_ADDR(svc->af, &cp->daddr), ntohs(cp->dport), |
395 | cp->flags, atomic_read(&cp->refcnt)); | 421 | cp->flags, atomic_read(&cp->refcnt)); |
396 | 422 | ||
397 | ip_vs_conn_stats(cp, svc); | 423 | ip_vs_conn_stats(cp, svc); |
398 | return cp; | 424 | return cp; |
@@ -408,31 +434,39 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, | |||
408 | struct ip_vs_protocol *pp) | 434 | struct ip_vs_protocol *pp) |
409 | { | 435 | { |
410 | __be16 _ports[2], *pptr; | 436 | __be16 _ports[2], *pptr; |
411 | struct iphdr *iph = ip_hdr(skb); | 437 | struct ip_vs_iphdr iph; |
438 | int unicast; | ||
439 | ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph); | ||
412 | 440 | ||
413 | pptr = skb_header_pointer(skb, iph->ihl*4, | 441 | pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports); |
414 | sizeof(_ports), _ports); | ||
415 | if (pptr == NULL) { | 442 | if (pptr == NULL) { |
416 | ip_vs_service_put(svc); | 443 | ip_vs_service_put(svc); |
417 | return NF_DROP; | 444 | return NF_DROP; |
418 | } | 445 | } |
419 | 446 | ||
447 | #ifdef CONFIG_IP_VS_IPV6 | ||
448 | if (svc->af == AF_INET6) | ||
449 | unicast = ipv6_addr_type(&iph.daddr.in6) & IPV6_ADDR_UNICAST; | ||
450 | else | ||
451 | #endif | ||
452 | unicast = (inet_addr_type(&init_net, iph.daddr.ip) == RTN_UNICAST); | ||
453 | |||
420 | /* if it is fwmark-based service, the cache_bypass sysctl is up | 454 | /* if it is fwmark-based service, the cache_bypass sysctl is up |
421 | and the destination is RTN_UNICAST (and not local), then create | 455 | and the destination is a non-local unicast, then create |
422 | a cache_bypass connection entry */ | 456 | a cache_bypass connection entry */ |
423 | if (sysctl_ip_vs_cache_bypass && svc->fwmark | 457 | if (sysctl_ip_vs_cache_bypass && svc->fwmark && unicast) { |
424 | && (inet_addr_type(&init_net, iph->daddr) == RTN_UNICAST)) { | ||
425 | int ret, cs; | 458 | int ret, cs; |
426 | struct ip_vs_conn *cp; | 459 | struct ip_vs_conn *cp; |
460 | union nf_inet_addr daddr = { .all = { 0, 0, 0, 0 } }; | ||
427 | 461 | ||
428 | ip_vs_service_put(svc); | 462 | ip_vs_service_put(svc); |
429 | 463 | ||
430 | /* create a new connection entry */ | 464 | /* create a new connection entry */ |
431 | IP_VS_DBG(6, "ip_vs_leave: create a cache_bypass entry\n"); | 465 | IP_VS_DBG(6, "ip_vs_leave: create a cache_bypass entry\n"); |
432 | cp = ip_vs_conn_new(iph->protocol, | 466 | cp = ip_vs_conn_new(svc->af, iph.protocol, |
433 | iph->saddr, pptr[0], | 467 | &iph.saddr, pptr[0], |
434 | iph->daddr, pptr[1], | 468 | &iph.daddr, pptr[1], |
435 | 0, 0, | 469 | &daddr, 0, |
436 | IP_VS_CONN_F_BYPASS, | 470 | IP_VS_CONN_F_BYPASS, |
437 | NULL); | 471 | NULL); |
438 | if (cp == NULL) | 472 | if (cp == NULL) |
@@ -473,7 +507,14 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, | |||
473 | * created, the TCP RST packet cannot be sent, instead that | 507 | * created, the TCP RST packet cannot be sent, instead that |
474 | * ICMP_PORT_UNREACH is sent here no matter it is TCP/UDP. --WZ | 508 | * ICMP_PORT_UNREACH is sent here no matter it is TCP/UDP. --WZ |
475 | */ | 509 | */ |
476 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); | 510 | #ifdef CONFIG_IP_VS_IPV6 |
511 | if (svc->af == AF_INET6) | ||
512 | icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0, | ||
513 | skb->dev); | ||
514 | else | ||
515 | #endif | ||
516 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); | ||
517 | |||
477 | return NF_DROP; | 518 | return NF_DROP; |
478 | } | 519 | } |
479 | 520 | ||
@@ -512,6 +553,14 @@ static inline int ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user) | |||
512 | return err; | 553 | return err; |
513 | } | 554 | } |
514 | 555 | ||
556 | #ifdef CONFIG_IP_VS_IPV6 | ||
557 | static inline int ip_vs_gather_frags_v6(struct sk_buff *skb, u_int32_t user) | ||
558 | { | ||
559 | /* TODO IPv6: Find out what to do here for IPv6 */ | ||
560 | return 0; | ||
561 | } | ||
562 | #endif | ||
563 | |||
515 | /* | 564 | /* |
516 | * Packet has been made sufficiently writable in caller | 565 | * Packet has been made sufficiently writable in caller |
517 | * - inout: 1=in->out, 0=out->in | 566 | * - inout: 1=in->out, 0=out->in |
@@ -526,14 +575,14 @@ void ip_vs_nat_icmp(struct sk_buff *skb, struct ip_vs_protocol *pp, | |||
526 | struct iphdr *ciph = (struct iphdr *)(icmph + 1); | 575 | struct iphdr *ciph = (struct iphdr *)(icmph + 1); |
527 | 576 | ||
528 | if (inout) { | 577 | if (inout) { |
529 | iph->saddr = cp->vaddr; | 578 | iph->saddr = cp->vaddr.ip; |
530 | ip_send_check(iph); | 579 | ip_send_check(iph); |
531 | ciph->daddr = cp->vaddr; | 580 | ciph->daddr = cp->vaddr.ip; |
532 | ip_send_check(ciph); | 581 | ip_send_check(ciph); |
533 | } else { | 582 | } else { |
534 | iph->daddr = cp->daddr; | 583 | iph->daddr = cp->daddr.ip; |
535 | ip_send_check(iph); | 584 | ip_send_check(iph); |
536 | ciph->saddr = cp->daddr; | 585 | ciph->saddr = cp->daddr.ip; |
537 | ip_send_check(ciph); | 586 | ip_send_check(ciph); |
538 | } | 587 | } |
539 | 588 | ||
@@ -560,21 +609,112 @@ void ip_vs_nat_icmp(struct sk_buff *skb, struct ip_vs_protocol *pp, | |||
560 | "Forwarding altered incoming ICMP"); | 609 | "Forwarding altered incoming ICMP"); |
561 | } | 610 | } |
562 | 611 | ||
612 | #ifdef CONFIG_IP_VS_IPV6 | ||
613 | void ip_vs_nat_icmp_v6(struct sk_buff *skb, struct ip_vs_protocol *pp, | ||
614 | struct ip_vs_conn *cp, int inout) | ||
615 | { | ||
616 | struct ipv6hdr *iph = ipv6_hdr(skb); | ||
617 | unsigned int icmp_offset = sizeof(struct ipv6hdr); | ||
618 | struct icmp6hdr *icmph = (struct icmp6hdr *)(skb_network_header(skb) + | ||
619 | icmp_offset); | ||
620 | struct ipv6hdr *ciph = (struct ipv6hdr *)(icmph + 1); | ||
621 | |||
622 | if (inout) { | ||
623 | iph->saddr = cp->vaddr.in6; | ||
624 | ciph->daddr = cp->vaddr.in6; | ||
625 | } else { | ||
626 | iph->daddr = cp->daddr.in6; | ||
627 | ciph->saddr = cp->daddr.in6; | ||
628 | } | ||
629 | |||
630 | /* the TCP/UDP port */ | ||
631 | if (IPPROTO_TCP == ciph->nexthdr || IPPROTO_UDP == ciph->nexthdr) { | ||
632 | __be16 *ports = (void *)ciph + sizeof(struct ipv6hdr); | ||
633 | |||
634 | if (inout) | ||
635 | ports[1] = cp->vport; | ||
636 | else | ||
637 | ports[0] = cp->dport; | ||
638 | } | ||
639 | |||
640 | /* And finally the ICMP checksum */ | ||
641 | icmph->icmp6_cksum = 0; | ||
642 | /* TODO IPv6: is this correct for ICMPv6? */ | ||
643 | ip_vs_checksum_complete(skb, icmp_offset); | ||
644 | skb->ip_summed = CHECKSUM_UNNECESSARY; | ||
645 | |||
646 | if (inout) | ||
647 | IP_VS_DBG_PKT(11, pp, skb, (void *)ciph - (void *)iph, | ||
648 | "Forwarding altered outgoing ICMPv6"); | ||
649 | else | ||
650 | IP_VS_DBG_PKT(11, pp, skb, (void *)ciph - (void *)iph, | ||
651 | "Forwarding altered incoming ICMPv6"); | ||
652 | } | ||
653 | #endif | ||
654 | |||
655 | /* Handle relevant response ICMP messages - forward to the right | ||
656 | * destination host. Used for NAT and local client. | ||
657 | */ | ||
658 | static int handle_response_icmp(int af, struct sk_buff *skb, | ||
659 | union nf_inet_addr *snet, | ||
660 | __u8 protocol, struct ip_vs_conn *cp, | ||
661 | struct ip_vs_protocol *pp, | ||
662 | unsigned int offset, unsigned int ihl) | ||
663 | { | ||
664 | unsigned int verdict = NF_DROP; | ||
665 | |||
666 | if (IP_VS_FWD_METHOD(cp) != 0) { | ||
667 | IP_VS_ERR("shouldn't reach here, because the box is on the " | ||
668 | "half connection in the tun/dr module.\n"); | ||
669 | } | ||
670 | |||
671 | /* Ensure the checksum is correct */ | ||
672 | if (!skb_csum_unnecessary(skb) && ip_vs_checksum_complete(skb, ihl)) { | ||
673 | /* Failed checksum! */ | ||
674 | IP_VS_DBG_BUF(1, "Forward ICMP: failed checksum from %s!\n", | ||
675 | IP_VS_DBG_ADDR(af, snet)); | ||
676 | goto out; | ||
677 | } | ||
678 | |||
679 | if (IPPROTO_TCP == protocol || IPPROTO_UDP == protocol) | ||
680 | offset += 2 * sizeof(__u16); | ||
681 | if (!skb_make_writable(skb, offset)) | ||
682 | goto out; | ||
683 | |||
684 | #ifdef CONFIG_IP_VS_IPV6 | ||
685 | if (af == AF_INET6) | ||
686 | ip_vs_nat_icmp_v6(skb, pp, cp, 1); | ||
687 | else | ||
688 | #endif | ||
689 | ip_vs_nat_icmp(skb, pp, cp, 1); | ||
690 | |||
691 | /* do the statistics and put it back */ | ||
692 | ip_vs_out_stats(cp, skb); | ||
693 | |||
694 | skb->ipvs_property = 1; | ||
695 | verdict = NF_ACCEPT; | ||
696 | |||
697 | out: | ||
698 | __ip_vs_conn_put(cp); | ||
699 | |||
700 | return verdict; | ||
701 | } | ||
702 | |||
563 | /* | 703 | /* |
564 | * Handle ICMP messages in the inside-to-outside direction (outgoing). | 704 | * Handle ICMP messages in the inside-to-outside direction (outgoing). |
565 | * Find any that might be relevant, check against existing connections, | 705 | * Find any that might be relevant, check against existing connections. |
566 | * forward to the right destination host if relevant. | ||
567 | * Currently handles error types - unreachable, quench, ttl exceeded. | 706 | * Currently handles error types - unreachable, quench, ttl exceeded. |
568 | * (Only used in VS/NAT) | ||
569 | */ | 707 | */ |
570 | static int ip_vs_out_icmp(struct sk_buff *skb, int *related) | 708 | static int ip_vs_out_icmp(struct sk_buff *skb, int *related) |
571 | { | 709 | { |
572 | struct iphdr *iph; | 710 | struct iphdr *iph; |
573 | struct icmphdr _icmph, *ic; | 711 | struct icmphdr _icmph, *ic; |
574 | struct iphdr _ciph, *cih; /* The ip header contained within the ICMP */ | 712 | struct iphdr _ciph, *cih; /* The ip header contained within the ICMP */ |
713 | struct ip_vs_iphdr ciph; | ||
575 | struct ip_vs_conn *cp; | 714 | struct ip_vs_conn *cp; |
576 | struct ip_vs_protocol *pp; | 715 | struct ip_vs_protocol *pp; |
577 | unsigned int offset, ihl, verdict; | 716 | unsigned int offset, ihl; |
717 | union nf_inet_addr snet; | ||
578 | 718 | ||
579 | *related = 1; | 719 | *related = 1; |
580 | 720 | ||
@@ -627,102 +767,231 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related) | |||
627 | 767 | ||
628 | offset += cih->ihl * 4; | 768 | offset += cih->ihl * 4; |
629 | 769 | ||
770 | ip_vs_fill_iphdr(AF_INET, cih, &ciph); | ||
630 | /* The embedded headers contain source and dest in reverse order */ | 771 | /* The embedded headers contain source and dest in reverse order */ |
631 | cp = pp->conn_out_get(skb, pp, cih, offset, 1); | 772 | cp = pp->conn_out_get(AF_INET, skb, pp, &ciph, offset, 1); |
632 | if (!cp) | 773 | if (!cp) |
633 | return NF_ACCEPT; | 774 | return NF_ACCEPT; |
634 | 775 | ||
635 | verdict = NF_DROP; | 776 | snet.ip = iph->saddr; |
777 | return handle_response_icmp(AF_INET, skb, &snet, cih->protocol, cp, | ||
778 | pp, offset, ihl); | ||
779 | } | ||
636 | 780 | ||
637 | if (IP_VS_FWD_METHOD(cp) != 0) { | 781 | #ifdef CONFIG_IP_VS_IPV6 |
638 | IP_VS_ERR("shouldn't reach here, because the box is on the " | 782 | static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related) |
639 | "half connection in the tun/dr module.\n"); | 783 | { |
784 | struct ipv6hdr *iph; | ||
785 | struct icmp6hdr _icmph, *ic; | ||
786 | struct ipv6hdr _ciph, *cih; /* The ip header contained | ||
787 | within the ICMP */ | ||
788 | struct ip_vs_iphdr ciph; | ||
789 | struct ip_vs_conn *cp; | ||
790 | struct ip_vs_protocol *pp; | ||
791 | unsigned int offset; | ||
792 | union nf_inet_addr snet; | ||
793 | |||
794 | *related = 1; | ||
795 | |||
796 | /* reassemble IP fragments */ | ||
797 | if (ipv6_hdr(skb)->nexthdr == IPPROTO_FRAGMENT) { | ||
798 | if (ip_vs_gather_frags_v6(skb, IP_DEFRAG_VS_OUT)) | ||
799 | return NF_STOLEN; | ||
640 | } | 800 | } |
641 | 801 | ||
642 | /* Ensure the checksum is correct */ | 802 | iph = ipv6_hdr(skb); |
643 | if (!skb_csum_unnecessary(skb) && ip_vs_checksum_complete(skb, ihl)) { | 803 | offset = sizeof(struct ipv6hdr); |
644 | /* Failed checksum! */ | 804 | ic = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph); |
645 | IP_VS_DBG(1, "Forward ICMP: failed checksum from %d.%d.%d.%d!\n", | 805 | if (ic == NULL) |
646 | NIPQUAD(iph->saddr)); | 806 | return NF_DROP; |
647 | goto out; | 807 | |
808 | IP_VS_DBG(12, "Outgoing ICMPv6 (%d,%d) " NIP6_FMT "->" NIP6_FMT "\n", | ||
809 | ic->icmp6_type, ntohs(icmpv6_id(ic)), | ||
810 | NIP6(iph->saddr), NIP6(iph->daddr)); | ||
811 | |||
812 | /* | ||
813 | * Work through seeing if this is for us. | ||
814 | * These checks are supposed to be in an order that means easy | ||
815 | * things are checked first to speed up processing.... however | ||
816 | * this means that some packets will manage to get a long way | ||
817 | * down this stack and then be rejected, but that's life. | ||
818 | */ | ||
819 | if ((ic->icmp6_type != ICMPV6_DEST_UNREACH) && | ||
820 | (ic->icmp6_type != ICMPV6_PKT_TOOBIG) && | ||
821 | (ic->icmp6_type != ICMPV6_TIME_EXCEED)) { | ||
822 | *related = 0; | ||
823 | return NF_ACCEPT; | ||
648 | } | 824 | } |
649 | 825 | ||
650 | if (IPPROTO_TCP == cih->protocol || IPPROTO_UDP == cih->protocol) | 826 | /* Now find the contained IP header */ |
651 | offset += 2 * sizeof(__u16); | 827 | offset += sizeof(_icmph); |
652 | if (!skb_make_writable(skb, offset)) | 828 | cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph); |
653 | goto out; | 829 | if (cih == NULL) |
830 | return NF_ACCEPT; /* The packet looks wrong, ignore */ | ||
654 | 831 | ||
655 | ip_vs_nat_icmp(skb, pp, cp, 1); | 832 | pp = ip_vs_proto_get(cih->nexthdr); |
833 | if (!pp) | ||
834 | return NF_ACCEPT; | ||
656 | 835 | ||
657 | /* do the statistics and put it back */ | 836 | /* Is the embedded protocol header present? */ |
658 | ip_vs_out_stats(cp, skb); | 837 | /* TODO: we don't support fragmentation at the moment anyways */ |
838 | if (unlikely(cih->nexthdr == IPPROTO_FRAGMENT && pp->dont_defrag)) | ||
839 | return NF_ACCEPT; | ||
659 | 840 | ||
660 | skb->ipvs_property = 1; | 841 | IP_VS_DBG_PKT(11, pp, skb, offset, "Checking outgoing ICMPv6 for"); |
661 | verdict = NF_ACCEPT; | ||
662 | 842 | ||
663 | out: | 843 | offset += sizeof(struct ipv6hdr); |
664 | __ip_vs_conn_put(cp); | ||
665 | 844 | ||
666 | return verdict; | 845 | ip_vs_fill_iphdr(AF_INET6, cih, &ciph); |
846 | /* The embedded headers contain source and dest in reverse order */ | ||
847 | cp = pp->conn_out_get(AF_INET6, skb, pp, &ciph, offset, 1); | ||
848 | if (!cp) | ||
849 | return NF_ACCEPT; | ||
850 | |||
851 | ipv6_addr_copy(&snet.in6, &iph->saddr); | ||
852 | return handle_response_icmp(AF_INET6, skb, &snet, cih->nexthdr, cp, | ||
853 | pp, offset, sizeof(struct ipv6hdr)); | ||
667 | } | 854 | } |
855 | #endif | ||
668 | 856 | ||
669 | static inline int is_tcp_reset(const struct sk_buff *skb) | 857 | static inline int is_tcp_reset(const struct sk_buff *skb, int nh_len) |
670 | { | 858 | { |
671 | struct tcphdr _tcph, *th; | 859 | struct tcphdr _tcph, *th; |
672 | 860 | ||
673 | th = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph); | 861 | th = skb_header_pointer(skb, nh_len, sizeof(_tcph), &_tcph); |
674 | if (th == NULL) | 862 | if (th == NULL) |
675 | return 0; | 863 | return 0; |
676 | return th->rst; | 864 | return th->rst; |
677 | } | 865 | } |
678 | 866 | ||
867 | /* Handle response packets: rewrite addresses and send away... | ||
868 | * Used for NAT and local client. | ||
869 | */ | ||
870 | static unsigned int | ||
871 | handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, | ||
872 | struct ip_vs_conn *cp, int ihl) | ||
873 | { | ||
874 | IP_VS_DBG_PKT(11, pp, skb, 0, "Outgoing packet"); | ||
875 | |||
876 | if (!skb_make_writable(skb, ihl)) | ||
877 | goto drop; | ||
878 | |||
879 | /* mangle the packet */ | ||
880 | if (pp->snat_handler && !pp->snat_handler(skb, pp, cp)) | ||
881 | goto drop; | ||
882 | |||
883 | #ifdef CONFIG_IP_VS_IPV6 | ||
884 | if (af == AF_INET6) | ||
885 | ipv6_hdr(skb)->saddr = cp->vaddr.in6; | ||
886 | else | ||
887 | #endif | ||
888 | { | ||
889 | ip_hdr(skb)->saddr = cp->vaddr.ip; | ||
890 | ip_send_check(ip_hdr(skb)); | ||
891 | } | ||
892 | |||
893 | /* For policy routing, packets originating from this | ||
894 | * machine itself may be routed differently to packets | ||
895 | * passing through. We want this packet to be routed as | ||
896 | * if it came from this machine itself. So re-compute | ||
897 | * the routing information. | ||
898 | */ | ||
899 | #ifdef CONFIG_IP_VS_IPV6 | ||
900 | if (af == AF_INET6) { | ||
901 | if (ip6_route_me_harder(skb) != 0) | ||
902 | goto drop; | ||
903 | } else | ||
904 | #endif | ||
905 | if (ip_route_me_harder(skb, RTN_LOCAL) != 0) | ||
906 | goto drop; | ||
907 | |||
908 | IP_VS_DBG_PKT(10, pp, skb, 0, "After SNAT"); | ||
909 | |||
910 | ip_vs_out_stats(cp, skb); | ||
911 | ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pp); | ||
912 | ip_vs_conn_put(cp); | ||
913 | |||
914 | skb->ipvs_property = 1; | ||
915 | |||
916 | LeaveFunction(11); | ||
917 | return NF_ACCEPT; | ||
918 | |||
919 | drop: | ||
920 | ip_vs_conn_put(cp); | ||
921 | kfree_skb(skb); | ||
922 | return NF_STOLEN; | ||
923 | } | ||
924 | |||
679 | /* | 925 | /* |
680 | * It is hooked at the NF_INET_FORWARD chain, used only for VS/NAT. | 926 | * It is hooked at the NF_INET_FORWARD chain, used only for VS/NAT. |
681 | * Check if outgoing packet belongs to the established ip_vs_conn, | 927 | * Check if outgoing packet belongs to the established ip_vs_conn. |
682 | * rewrite addresses of the packet and send it on its way... | ||
683 | */ | 928 | */ |
684 | static unsigned int | 929 | static unsigned int |
685 | ip_vs_out(unsigned int hooknum, struct sk_buff *skb, | 930 | ip_vs_out(unsigned int hooknum, struct sk_buff *skb, |
686 | const struct net_device *in, const struct net_device *out, | 931 | const struct net_device *in, const struct net_device *out, |
687 | int (*okfn)(struct sk_buff *)) | 932 | int (*okfn)(struct sk_buff *)) |
688 | { | 933 | { |
689 | struct iphdr *iph; | 934 | struct ip_vs_iphdr iph; |
690 | struct ip_vs_protocol *pp; | 935 | struct ip_vs_protocol *pp; |
691 | struct ip_vs_conn *cp; | 936 | struct ip_vs_conn *cp; |
692 | int ihl; | 937 | int af; |
693 | 938 | ||
694 | EnterFunction(11); | 939 | EnterFunction(11); |
695 | 940 | ||
941 | af = (skb->protocol == htons(ETH_P_IP)) ? AF_INET : AF_INET6; | ||
942 | |||
696 | if (skb->ipvs_property) | 943 | if (skb->ipvs_property) |
697 | return NF_ACCEPT; | 944 | return NF_ACCEPT; |
698 | 945 | ||
699 | iph = ip_hdr(skb); | 946 | ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); |
700 | if (unlikely(iph->protocol == IPPROTO_ICMP)) { | 947 | #ifdef CONFIG_IP_VS_IPV6 |
701 | int related, verdict = ip_vs_out_icmp(skb, &related); | 948 | if (af == AF_INET6) { |
949 | if (unlikely(iph.protocol == IPPROTO_ICMPV6)) { | ||
950 | int related, verdict = ip_vs_out_icmp_v6(skb, &related); | ||
702 | 951 | ||
703 | if (related) | 952 | if (related) |
704 | return verdict; | 953 | return verdict; |
705 | iph = ip_hdr(skb); | 954 | ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); |
706 | } | 955 | } |
956 | } else | ||
957 | #endif | ||
958 | if (unlikely(iph.protocol == IPPROTO_ICMP)) { | ||
959 | int related, verdict = ip_vs_out_icmp(skb, &related); | ||
707 | 960 | ||
708 | pp = ip_vs_proto_get(iph->protocol); | 961 | if (related) |
962 | return verdict; | ||
963 | ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); | ||
964 | } | ||
965 | |||
966 | pp = ip_vs_proto_get(iph.protocol); | ||
709 | if (unlikely(!pp)) | 967 | if (unlikely(!pp)) |
710 | return NF_ACCEPT; | 968 | return NF_ACCEPT; |
711 | 969 | ||
712 | /* reassemble IP fragments */ | 970 | /* reassemble IP fragments */ |
713 | if (unlikely(iph->frag_off & htons(IP_MF|IP_OFFSET) && | 971 | #ifdef CONFIG_IP_VS_IPV6 |
714 | !pp->dont_defrag)) { | 972 | if (af == AF_INET6) { |
715 | if (ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT)) | 973 | if (unlikely(iph.protocol == IPPROTO_ICMPV6)) { |
716 | return NF_STOLEN; | 974 | int related, verdict = ip_vs_out_icmp_v6(skb, &related); |
717 | iph = ip_hdr(skb); | 975 | |
718 | } | 976 | if (related) |
977 | return verdict; | ||
719 | 978 | ||
720 | ihl = iph->ihl << 2; | 979 | ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); |
980 | } | ||
981 | } else | ||
982 | #endif | ||
983 | if (unlikely(ip_hdr(skb)->frag_off & htons(IP_MF|IP_OFFSET) && | ||
984 | !pp->dont_defrag)) { | ||
985 | if (ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT)) | ||
986 | return NF_STOLEN; | ||
987 | |||
988 | ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); | ||
989 | } | ||
721 | 990 | ||
722 | /* | 991 | /* |
723 | * Check if the packet belongs to an existing entry | 992 | * Check if the packet belongs to an existing entry |
724 | */ | 993 | */ |
725 | cp = pp->conn_out_get(skb, pp, iph, ihl, 0); | 994 | cp = pp->conn_out_get(af, skb, pp, &iph, iph.len, 0); |
726 | 995 | ||
727 | if (unlikely(!cp)) { | 996 | if (unlikely(!cp)) { |
728 | if (sysctl_ip_vs_nat_icmp_send && | 997 | if (sysctl_ip_vs_nat_icmp_send && |
@@ -730,21 +999,31 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, | |||
730 | pp->protocol == IPPROTO_UDP)) { | 999 | pp->protocol == IPPROTO_UDP)) { |
731 | __be16 _ports[2], *pptr; | 1000 | __be16 _ports[2], *pptr; |
732 | 1001 | ||
733 | pptr = skb_header_pointer(skb, ihl, | 1002 | pptr = skb_header_pointer(skb, iph.len, |
734 | sizeof(_ports), _ports); | 1003 | sizeof(_ports), _ports); |
735 | if (pptr == NULL) | 1004 | if (pptr == NULL) |
736 | return NF_ACCEPT; /* Not for me */ | 1005 | return NF_ACCEPT; /* Not for me */ |
737 | if (ip_vs_lookup_real_service(iph->protocol, | 1006 | if (ip_vs_lookup_real_service(af, iph.protocol, |
738 | iph->saddr, pptr[0])) { | 1007 | &iph.saddr, |
1008 | pptr[0])) { | ||
739 | /* | 1009 | /* |
740 | * Notify the real server: there is no | 1010 | * Notify the real server: there is no |
741 | * existing entry if it is not RST | 1011 | * existing entry if it is not RST |
742 | * packet or not TCP packet. | 1012 | * packet or not TCP packet. |
743 | */ | 1013 | */ |
744 | if (iph->protocol != IPPROTO_TCP | 1014 | if (iph.protocol != IPPROTO_TCP |
745 | || !is_tcp_reset(skb)) { | 1015 | || !is_tcp_reset(skb, iph.len)) { |
746 | icmp_send(skb,ICMP_DEST_UNREACH, | 1016 | #ifdef CONFIG_IP_VS_IPV6 |
747 | ICMP_PORT_UNREACH, 0); | 1017 | if (af == AF_INET6) |
1018 | icmpv6_send(skb, | ||
1019 | ICMPV6_DEST_UNREACH, | ||
1020 | ICMPV6_PORT_UNREACH, | ||
1021 | 0, skb->dev); | ||
1022 | else | ||
1023 | #endif | ||
1024 | icmp_send(skb, | ||
1025 | ICMP_DEST_UNREACH, | ||
1026 | ICMP_PORT_UNREACH, 0); | ||
748 | return NF_DROP; | 1027 | return NF_DROP; |
749 | } | 1028 | } |
750 | } | 1029 | } |
@@ -754,41 +1033,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, | |||
754 | return NF_ACCEPT; | 1033 | return NF_ACCEPT; |
755 | } | 1034 | } |
756 | 1035 | ||
757 | IP_VS_DBG_PKT(11, pp, skb, 0, "Outgoing packet"); | 1036 | return handle_response(af, skb, pp, cp, iph.len); |
758 | |||
759 | if (!skb_make_writable(skb, ihl)) | ||
760 | goto drop; | ||
761 | |||
762 | /* mangle the packet */ | ||
763 | if (pp->snat_handler && !pp->snat_handler(skb, pp, cp)) | ||
764 | goto drop; | ||
765 | ip_hdr(skb)->saddr = cp->vaddr; | ||
766 | ip_send_check(ip_hdr(skb)); | ||
767 | |||
768 | /* For policy routing, packets originating from this | ||
769 | * machine itself may be routed differently to packets | ||
770 | * passing through. We want this packet to be routed as | ||
771 | * if it came from this machine itself. So re-compute | ||
772 | * the routing information. | ||
773 | */ | ||
774 | if (ip_route_me_harder(skb, RTN_LOCAL) != 0) | ||
775 | goto drop; | ||
776 | |||
777 | IP_VS_DBG_PKT(10, pp, skb, 0, "After SNAT"); | ||
778 | |||
779 | ip_vs_out_stats(cp, skb); | ||
780 | ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pp); | ||
781 | ip_vs_conn_put(cp); | ||
782 | |||
783 | skb->ipvs_property = 1; | ||
784 | |||
785 | LeaveFunction(11); | ||
786 | return NF_ACCEPT; | ||
787 | |||
788 | drop: | ||
789 | ip_vs_conn_put(cp); | ||
790 | kfree_skb(skb); | ||
791 | return NF_STOLEN; | ||
792 | } | 1037 | } |
793 | 1038 | ||
794 | 1039 | ||
@@ -804,9 +1049,11 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) | |||
804 | struct iphdr *iph; | 1049 | struct iphdr *iph; |
805 | struct icmphdr _icmph, *ic; | 1050 | struct icmphdr _icmph, *ic; |
806 | struct iphdr _ciph, *cih; /* The ip header contained within the ICMP */ | 1051 | struct iphdr _ciph, *cih; /* The ip header contained within the ICMP */ |
1052 | struct ip_vs_iphdr ciph; | ||
807 | struct ip_vs_conn *cp; | 1053 | struct ip_vs_conn *cp; |
808 | struct ip_vs_protocol *pp; | 1054 | struct ip_vs_protocol *pp; |
809 | unsigned int offset, ihl, verdict; | 1055 | unsigned int offset, ihl, verdict; |
1056 | union nf_inet_addr snet; | ||
810 | 1057 | ||
811 | *related = 1; | 1058 | *related = 1; |
812 | 1059 | ||
@@ -860,10 +1107,20 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) | |||
860 | 1107 | ||
861 | offset += cih->ihl * 4; | 1108 | offset += cih->ihl * 4; |
862 | 1109 | ||
1110 | ip_vs_fill_iphdr(AF_INET, cih, &ciph); | ||
863 | /* The embedded headers contain source and dest in reverse order */ | 1111 | /* The embedded headers contain source and dest in reverse order */ |
864 | cp = pp->conn_in_get(skb, pp, cih, offset, 1); | 1112 | cp = pp->conn_in_get(AF_INET, skb, pp, &ciph, offset, 1); |
865 | if (!cp) | 1113 | if (!cp) { |
1114 | /* The packet could also belong to a local client */ | ||
1115 | cp = pp->conn_out_get(AF_INET, skb, pp, &ciph, offset, 1); | ||
1116 | if (cp) { | ||
1117 | snet.ip = iph->saddr; | ||
1118 | return handle_response_icmp(AF_INET, skb, &snet, | ||
1119 | cih->protocol, cp, pp, | ||
1120 | offset, ihl); | ||
1121 | } | ||
866 | return NF_ACCEPT; | 1122 | return NF_ACCEPT; |
1123 | } | ||
867 | 1124 | ||
868 | verdict = NF_DROP; | 1125 | verdict = NF_DROP; |
869 | 1126 | ||
@@ -888,6 +1145,105 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) | |||
888 | return verdict; | 1145 | return verdict; |
889 | } | 1146 | } |
890 | 1147 | ||
1148 | #ifdef CONFIG_IP_VS_IPV6 | ||
1149 | static int | ||
1150 | ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum) | ||
1151 | { | ||
1152 | struct ipv6hdr *iph; | ||
1153 | struct icmp6hdr _icmph, *ic; | ||
1154 | struct ipv6hdr _ciph, *cih; /* The ip header contained | ||
1155 | within the ICMP */ | ||
1156 | struct ip_vs_iphdr ciph; | ||
1157 | struct ip_vs_conn *cp; | ||
1158 | struct ip_vs_protocol *pp; | ||
1159 | unsigned int offset, verdict; | ||
1160 | union nf_inet_addr snet; | ||
1161 | |||
1162 | *related = 1; | ||
1163 | |||
1164 | /* reassemble IP fragments */ | ||
1165 | if (ipv6_hdr(skb)->nexthdr == IPPROTO_FRAGMENT) { | ||
1166 | if (ip_vs_gather_frags_v6(skb, hooknum == NF_INET_LOCAL_IN ? | ||
1167 | IP_DEFRAG_VS_IN : | ||
1168 | IP_DEFRAG_VS_FWD)) | ||
1169 | return NF_STOLEN; | ||
1170 | } | ||
1171 | |||
1172 | iph = ipv6_hdr(skb); | ||
1173 | offset = sizeof(struct ipv6hdr); | ||
1174 | ic = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph); | ||
1175 | if (ic == NULL) | ||
1176 | return NF_DROP; | ||
1177 | |||
1178 | IP_VS_DBG(12, "Incoming ICMPv6 (%d,%d) " NIP6_FMT "->" NIP6_FMT "\n", | ||
1179 | ic->icmp6_type, ntohs(icmpv6_id(ic)), | ||
1180 | NIP6(iph->saddr), NIP6(iph->daddr)); | ||
1181 | |||
1182 | /* | ||
1183 | * Work through seeing if this is for us. | ||
1184 | * These checks are supposed to be in an order that means easy | ||
1185 | * things are checked first to speed up processing.... however | ||
1186 | * this means that some packets will manage to get a long way | ||
1187 | * down this stack and then be rejected, but that's life. | ||
1188 | */ | ||
1189 | if ((ic->icmp6_type != ICMPV6_DEST_UNREACH) && | ||
1190 | (ic->icmp6_type != ICMPV6_PKT_TOOBIG) && | ||
1191 | (ic->icmp6_type != ICMPV6_TIME_EXCEED)) { | ||
1192 | *related = 0; | ||
1193 | return NF_ACCEPT; | ||
1194 | } | ||
1195 | |||
1196 | /* Now find the contained IP header */ | ||
1197 | offset += sizeof(_icmph); | ||
1198 | cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph); | ||
1199 | if (cih == NULL) | ||
1200 | return NF_ACCEPT; /* The packet looks wrong, ignore */ | ||
1201 | |||
1202 | pp = ip_vs_proto_get(cih->nexthdr); | ||
1203 | if (!pp) | ||
1204 | return NF_ACCEPT; | ||
1205 | |||
1206 | /* Is the embedded protocol header present? */ | ||
1207 | /* TODO: we don't support fragmentation at the moment anyways */ | ||
1208 | if (unlikely(cih->nexthdr == IPPROTO_FRAGMENT && pp->dont_defrag)) | ||
1209 | return NF_ACCEPT; | ||
1210 | |||
1211 | IP_VS_DBG_PKT(11, pp, skb, offset, "Checking incoming ICMPv6 for"); | ||
1212 | |||
1213 | offset += sizeof(struct ipv6hdr); | ||
1214 | |||
1215 | ip_vs_fill_iphdr(AF_INET6, cih, &ciph); | ||
1216 | /* The embedded headers contain source and dest in reverse order */ | ||
1217 | cp = pp->conn_in_get(AF_INET6, skb, pp, &ciph, offset, 1); | ||
1218 | if (!cp) { | ||
1219 | /* The packet could also belong to a local client */ | ||
1220 | cp = pp->conn_out_get(AF_INET6, skb, pp, &ciph, offset, 1); | ||
1221 | if (cp) { | ||
1222 | ipv6_addr_copy(&snet.in6, &iph->saddr); | ||
1223 | return handle_response_icmp(AF_INET6, skb, &snet, | ||
1224 | cih->nexthdr, | ||
1225 | cp, pp, offset, | ||
1226 | sizeof(struct ipv6hdr)); | ||
1227 | } | ||
1228 | return NF_ACCEPT; | ||
1229 | } | ||
1230 | |||
1231 | verdict = NF_DROP; | ||
1232 | |||
1233 | /* do the statistics and put it back */ | ||
1234 | ip_vs_in_stats(cp, skb); | ||
1235 | if (IPPROTO_TCP == cih->nexthdr || IPPROTO_UDP == cih->nexthdr) | ||
1236 | offset += 2 * sizeof(__u16); | ||
1237 | verdict = ip_vs_icmp_xmit_v6(skb, cp, pp, offset); | ||
1238 | /* do not touch skb anymore */ | ||
1239 | |||
1240 | __ip_vs_conn_put(cp); | ||
1241 | |||
1242 | return verdict; | ||
1243 | } | ||
1244 | #endif | ||
1245 | |||
1246 | |||
891 | /* | 1247 | /* |
892 | * Check if it's for virtual services, look it up, | 1248 | * Check if it's for virtual services, look it up, |
893 | * and send it on its way... | 1249 | * and send it on its way... |
@@ -897,50 +1253,54 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, | |||
897 | const struct net_device *in, const struct net_device *out, | 1253 | const struct net_device *in, const struct net_device *out, |
898 | int (*okfn)(struct sk_buff *)) | 1254 | int (*okfn)(struct sk_buff *)) |
899 | { | 1255 | { |
900 | struct iphdr *iph; | 1256 | struct ip_vs_iphdr iph; |
901 | struct ip_vs_protocol *pp; | 1257 | struct ip_vs_protocol *pp; |
902 | struct ip_vs_conn *cp; | 1258 | struct ip_vs_conn *cp; |
903 | int ret, restart; | 1259 | int ret, restart, af; |
904 | int ihl; | 1260 | |
1261 | af = (skb->protocol == htons(ETH_P_IP)) ? AF_INET : AF_INET6; | ||
1262 | |||
1263 | ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); | ||
905 | 1264 | ||
906 | /* | 1265 | /* |
907 | * Big tappo: only PACKET_HOST (neither loopback nor mcasts) | 1266 | * Big tappo: only PACKET_HOST, including loopback for local client |
908 | * ... don't know why 1st test DOES NOT include 2nd (?) | 1267 | * Don't handle local packets on IPv6 for now |
909 | */ | 1268 | */ |
910 | if (unlikely(skb->pkt_type != PACKET_HOST | 1269 | if (unlikely(skb->pkt_type != PACKET_HOST)) { |
911 | || skb->dev->flags & IFF_LOOPBACK || skb->sk)) { | 1270 | IP_VS_DBG_BUF(12, "packet type=%d proto=%d daddr=%s ignored\n", |
912 | IP_VS_DBG(12, "packet type=%d proto=%d daddr=%d.%d.%d.%d ignored\n", | 1271 | skb->pkt_type, |
913 | skb->pkt_type, | 1272 | iph.protocol, |
914 | ip_hdr(skb)->protocol, | 1273 | IP_VS_DBG_ADDR(af, &iph.daddr)); |
915 | NIPQUAD(ip_hdr(skb)->daddr)); | ||
916 | return NF_ACCEPT; | 1274 | return NF_ACCEPT; |
917 | } | 1275 | } |
918 | 1276 | ||
919 | iph = ip_hdr(skb); | 1277 | if (unlikely(iph.protocol == IPPROTO_ICMP)) { |
920 | if (unlikely(iph->protocol == IPPROTO_ICMP)) { | ||
921 | int related, verdict = ip_vs_in_icmp(skb, &related, hooknum); | 1278 | int related, verdict = ip_vs_in_icmp(skb, &related, hooknum); |
922 | 1279 | ||
923 | if (related) | 1280 | if (related) |
924 | return verdict; | 1281 | return verdict; |
925 | iph = ip_hdr(skb); | 1282 | ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); |
926 | } | 1283 | } |
927 | 1284 | ||
928 | /* Protocol supported? */ | 1285 | /* Protocol supported? */ |
929 | pp = ip_vs_proto_get(iph->protocol); | 1286 | pp = ip_vs_proto_get(iph.protocol); |
930 | if (unlikely(!pp)) | 1287 | if (unlikely(!pp)) |
931 | return NF_ACCEPT; | 1288 | return NF_ACCEPT; |
932 | 1289 | ||
933 | ihl = iph->ihl << 2; | ||
934 | |||
935 | /* | 1290 | /* |
936 | * Check if the packet belongs to an existing connection entry | 1291 | * Check if the packet belongs to an existing connection entry |
937 | */ | 1292 | */ |
938 | cp = pp->conn_in_get(skb, pp, iph, ihl, 0); | 1293 | cp = pp->conn_in_get(af, skb, pp, &iph, iph.len, 0); |
939 | 1294 | ||
940 | if (unlikely(!cp)) { | 1295 | if (unlikely(!cp)) { |
941 | int v; | 1296 | int v; |
942 | 1297 | ||
943 | if (!pp->conn_schedule(skb, pp, &v, &cp)) | 1298 | /* For local client packets, it could be a response */ |
1299 | cp = pp->conn_out_get(af, skb, pp, &iph, iph.len, 0); | ||
1300 | if (cp) | ||
1301 | return handle_response(af, skb, pp, cp, iph.len); | ||
1302 | |||
1303 | if (!pp->conn_schedule(af, skb, pp, &v, &cp)) | ||
944 | return v; | 1304 | return v; |
945 | } | 1305 | } |
946 | 1306 | ||
@@ -984,7 +1344,8 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, | |||
984 | * encorage the standby servers to update the connections timeout | 1344 | * encorage the standby servers to update the connections timeout |
985 | */ | 1345 | */ |
986 | atomic_inc(&cp->in_pkts); | 1346 | atomic_inc(&cp->in_pkts); |
987 | if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && | 1347 | if (af == AF_INET && |
1348 | (ip_vs_sync_state & IP_VS_STATE_MASTER) && | ||
988 | (((cp->protocol != IPPROTO_TCP || | 1349 | (((cp->protocol != IPPROTO_TCP || |
989 | cp->state == IP_VS_TCP_S_ESTABLISHED) && | 1350 | cp->state == IP_VS_TCP_S_ESTABLISHED) && |
990 | (atomic_read(&cp->in_pkts) % sysctl_ip_vs_sync_threshold[1] | 1351 | (atomic_read(&cp->in_pkts) % sysctl_ip_vs_sync_threshold[1] |
@@ -1023,6 +1384,21 @@ ip_vs_forward_icmp(unsigned int hooknum, struct sk_buff *skb, | |||
1023 | return ip_vs_in_icmp(skb, &r, hooknum); | 1384 | return ip_vs_in_icmp(skb, &r, hooknum); |
1024 | } | 1385 | } |
1025 | 1386 | ||
1387 | #ifdef CONFIG_IP_VS_IPV6 | ||
1388 | static unsigned int | ||
1389 | ip_vs_forward_icmp_v6(unsigned int hooknum, struct sk_buff *skb, | ||
1390 | const struct net_device *in, const struct net_device *out, | ||
1391 | int (*okfn)(struct sk_buff *)) | ||
1392 | { | ||
1393 | int r; | ||
1394 | |||
1395 | if (ipv6_hdr(skb)->nexthdr != IPPROTO_ICMPV6) | ||
1396 | return NF_ACCEPT; | ||
1397 | |||
1398 | return ip_vs_in_icmp_v6(skb, &r, hooknum); | ||
1399 | } | ||
1400 | #endif | ||
1401 | |||
1026 | 1402 | ||
1027 | static struct nf_hook_ops ip_vs_ops[] __read_mostly = { | 1403 | static struct nf_hook_ops ip_vs_ops[] __read_mostly = { |
1028 | /* After packet filtering, forward packet through VS/DR, VS/TUN, | 1404 | /* After packet filtering, forward packet through VS/DR, VS/TUN, |
@@ -1060,6 +1436,43 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = { | |||
1060 | .hooknum = NF_INET_POST_ROUTING, | 1436 | .hooknum = NF_INET_POST_ROUTING, |
1061 | .priority = NF_IP_PRI_NAT_SRC-1, | 1437 | .priority = NF_IP_PRI_NAT_SRC-1, |
1062 | }, | 1438 | }, |
1439 | #ifdef CONFIG_IP_VS_IPV6 | ||
1440 | /* After packet filtering, forward packet through VS/DR, VS/TUN, | ||
1441 | * or VS/NAT(change destination), so that filtering rules can be | ||
1442 | * applied to IPVS. */ | ||
1443 | { | ||
1444 | .hook = ip_vs_in, | ||
1445 | .owner = THIS_MODULE, | ||
1446 | .pf = PF_INET6, | ||
1447 | .hooknum = NF_INET_LOCAL_IN, | ||
1448 | .priority = 100, | ||
1449 | }, | ||
1450 | /* After packet filtering, change source only for VS/NAT */ | ||
1451 | { | ||
1452 | .hook = ip_vs_out, | ||
1453 | .owner = THIS_MODULE, | ||
1454 | .pf = PF_INET6, | ||
1455 | .hooknum = NF_INET_FORWARD, | ||
1456 | .priority = 100, | ||
1457 | }, | ||
1458 | /* After packet filtering (but before ip_vs_out_icmp), catch icmp | ||
1459 | * destined for 0.0.0.0/0, which is for incoming IPVS connections */ | ||
1460 | { | ||
1461 | .hook = ip_vs_forward_icmp_v6, | ||
1462 | .owner = THIS_MODULE, | ||
1463 | .pf = PF_INET6, | ||
1464 | .hooknum = NF_INET_FORWARD, | ||
1465 | .priority = 99, | ||
1466 | }, | ||
1467 | /* Before the netfilter connection tracking, exit from POST_ROUTING */ | ||
1468 | { | ||
1469 | .hook = ip_vs_post_routing, | ||
1470 | .owner = THIS_MODULE, | ||
1471 | .pf = PF_INET6, | ||
1472 | .hooknum = NF_INET_POST_ROUTING, | ||
1473 | .priority = NF_IP6_PRI_NAT_SRC-1, | ||
1474 | }, | ||
1475 | #endif | ||
1063 | }; | 1476 | }; |
1064 | 1477 | ||
1065 | 1478 | ||
@@ -1070,10 +1483,12 @@ static int __init ip_vs_init(void) | |||
1070 | { | 1483 | { |
1071 | int ret; | 1484 | int ret; |
1072 | 1485 | ||
1486 | ip_vs_estimator_init(); | ||
1487 | |||
1073 | ret = ip_vs_control_init(); | 1488 | ret = ip_vs_control_init(); |
1074 | if (ret < 0) { | 1489 | if (ret < 0) { |
1075 | IP_VS_ERR("can't setup control.\n"); | 1490 | IP_VS_ERR("can't setup control.\n"); |
1076 | goto cleanup_nothing; | 1491 | goto cleanup_estimator; |
1077 | } | 1492 | } |
1078 | 1493 | ||
1079 | ip_vs_protocol_init(); | 1494 | ip_vs_protocol_init(); |
@@ -1106,7 +1521,8 @@ static int __init ip_vs_init(void) | |||
1106 | cleanup_protocol: | 1521 | cleanup_protocol: |
1107 | ip_vs_protocol_cleanup(); | 1522 | ip_vs_protocol_cleanup(); |
1108 | ip_vs_control_cleanup(); | 1523 | ip_vs_control_cleanup(); |
1109 | cleanup_nothing: | 1524 | cleanup_estimator: |
1525 | ip_vs_estimator_cleanup(); | ||
1110 | return ret; | 1526 | return ret; |
1111 | } | 1527 | } |
1112 | 1528 | ||
@@ -1117,6 +1533,7 @@ static void __exit ip_vs_cleanup(void) | |||
1117 | ip_vs_app_cleanup(); | 1533 | ip_vs_app_cleanup(); |
1118 | ip_vs_protocol_cleanup(); | 1534 | ip_vs_protocol_cleanup(); |
1119 | ip_vs_control_cleanup(); | 1535 | ip_vs_control_cleanup(); |
1536 | ip_vs_estimator_cleanup(); | ||
1120 | IP_VS_INFO("ipvs unloaded.\n"); | 1537 | IP_VS_INFO("ipvs unloaded.\n"); |
1121 | } | 1538 | } |
1122 | 1539 | ||
diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c index 6379705a8dcb..771551d8fba9 100644 --- a/net/ipv4/ipvs/ip_vs_ctl.c +++ b/net/ipv4/ipvs/ip_vs_ctl.c | |||
@@ -35,8 +35,13 @@ | |||
35 | 35 | ||
36 | #include <net/net_namespace.h> | 36 | #include <net/net_namespace.h> |
37 | #include <net/ip.h> | 37 | #include <net/ip.h> |
38 | #ifdef CONFIG_IP_VS_IPV6 | ||
39 | #include <net/ipv6.h> | ||
40 | #include <net/ip6_route.h> | ||
41 | #endif | ||
38 | #include <net/route.h> | 42 | #include <net/route.h> |
39 | #include <net/sock.h> | 43 | #include <net/sock.h> |
44 | #include <net/genetlink.h> | ||
40 | 45 | ||
41 | #include <asm/uaccess.h> | 46 | #include <asm/uaccess.h> |
42 | 47 | ||
@@ -90,6 +95,26 @@ int ip_vs_get_debug_level(void) | |||
90 | } | 95 | } |
91 | #endif | 96 | #endif |
92 | 97 | ||
98 | #ifdef CONFIG_IP_VS_IPV6 | ||
99 | /* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */ | ||
100 | static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr) | ||
101 | { | ||
102 | struct rt6_info *rt; | ||
103 | struct flowi fl = { | ||
104 | .oif = 0, | ||
105 | .nl_u = { | ||
106 | .ip6_u = { | ||
107 | .daddr = *addr, | ||
108 | .saddr = { .s6_addr32 = {0, 0, 0, 0} }, } }, | ||
109 | }; | ||
110 | |||
111 | rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl); | ||
112 | if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK)) | ||
113 | return 1; | ||
114 | |||
115 | return 0; | ||
116 | } | ||
117 | #endif | ||
93 | /* | 118 | /* |
94 | * update_defense_level is called from keventd and from sysctl, | 119 | * update_defense_level is called from keventd and from sysctl, |
95 | * so it needs to protect itself from softirqs | 120 | * so it needs to protect itself from softirqs |
@@ -281,11 +306,19 @@ static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0); | |||
281 | * Returns hash value for virtual service | 306 | * Returns hash value for virtual service |
282 | */ | 307 | */ |
283 | static __inline__ unsigned | 308 | static __inline__ unsigned |
284 | ip_vs_svc_hashkey(unsigned proto, __be32 addr, __be16 port) | 309 | ip_vs_svc_hashkey(int af, unsigned proto, const union nf_inet_addr *addr, |
310 | __be16 port) | ||
285 | { | 311 | { |
286 | register unsigned porth = ntohs(port); | 312 | register unsigned porth = ntohs(port); |
313 | __be32 addr_fold = addr->ip; | ||
314 | |||
315 | #ifdef CONFIG_IP_VS_IPV6 | ||
316 | if (af == AF_INET6) | ||
317 | addr_fold = addr->ip6[0]^addr->ip6[1]^ | ||
318 | addr->ip6[2]^addr->ip6[3]; | ||
319 | #endif | ||
287 | 320 | ||
288 | return (proto^ntohl(addr)^(porth>>IP_VS_SVC_TAB_BITS)^porth) | 321 | return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth) |
289 | & IP_VS_SVC_TAB_MASK; | 322 | & IP_VS_SVC_TAB_MASK; |
290 | } | 323 | } |
291 | 324 | ||
@@ -316,7 +349,8 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc) | |||
316 | /* | 349 | /* |
317 | * Hash it by <protocol,addr,port> in ip_vs_svc_table | 350 | * Hash it by <protocol,addr,port> in ip_vs_svc_table |
318 | */ | 351 | */ |
319 | hash = ip_vs_svc_hashkey(svc->protocol, svc->addr, svc->port); | 352 | hash = ip_vs_svc_hashkey(svc->af, svc->protocol, &svc->addr, |
353 | svc->port); | ||
320 | list_add(&svc->s_list, &ip_vs_svc_table[hash]); | 354 | list_add(&svc->s_list, &ip_vs_svc_table[hash]); |
321 | } else { | 355 | } else { |
322 | /* | 356 | /* |
@@ -362,17 +396,19 @@ static int ip_vs_svc_unhash(struct ip_vs_service *svc) | |||
362 | /* | 396 | /* |
363 | * Get service by {proto,addr,port} in the service table. | 397 | * Get service by {proto,addr,port} in the service table. |
364 | */ | 398 | */ |
365 | static __inline__ struct ip_vs_service * | 399 | static inline struct ip_vs_service * |
366 | __ip_vs_service_get(__u16 protocol, __be32 vaddr, __be16 vport) | 400 | __ip_vs_service_get(int af, __u16 protocol, const union nf_inet_addr *vaddr, |
401 | __be16 vport) | ||
367 | { | 402 | { |
368 | unsigned hash; | 403 | unsigned hash; |
369 | struct ip_vs_service *svc; | 404 | struct ip_vs_service *svc; |
370 | 405 | ||
371 | /* Check for "full" addressed entries */ | 406 | /* Check for "full" addressed entries */ |
372 | hash = ip_vs_svc_hashkey(protocol, vaddr, vport); | 407 | hash = ip_vs_svc_hashkey(af, protocol, vaddr, vport); |
373 | 408 | ||
374 | list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){ | 409 | list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){ |
375 | if ((svc->addr == vaddr) | 410 | if ((svc->af == af) |
411 | && ip_vs_addr_equal(af, &svc->addr, vaddr) | ||
376 | && (svc->port == vport) | 412 | && (svc->port == vport) |
377 | && (svc->protocol == protocol)) { | 413 | && (svc->protocol == protocol)) { |
378 | /* HIT */ | 414 | /* HIT */ |
@@ -388,7 +424,8 @@ __ip_vs_service_get(__u16 protocol, __be32 vaddr, __be16 vport) | |||
388 | /* | 424 | /* |
389 | * Get service by {fwmark} in the service table. | 425 | * Get service by {fwmark} in the service table. |
390 | */ | 426 | */ |
391 | static __inline__ struct ip_vs_service *__ip_vs_svc_fwm_get(__u32 fwmark) | 427 | static inline struct ip_vs_service * |
428 | __ip_vs_svc_fwm_get(int af, __u32 fwmark) | ||
392 | { | 429 | { |
393 | unsigned hash; | 430 | unsigned hash; |
394 | struct ip_vs_service *svc; | 431 | struct ip_vs_service *svc; |
@@ -397,7 +434,7 @@ static __inline__ struct ip_vs_service *__ip_vs_svc_fwm_get(__u32 fwmark) | |||
397 | hash = ip_vs_svc_fwm_hashkey(fwmark); | 434 | hash = ip_vs_svc_fwm_hashkey(fwmark); |
398 | 435 | ||
399 | list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) { | 436 | list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) { |
400 | if (svc->fwmark == fwmark) { | 437 | if (svc->fwmark == fwmark && svc->af == af) { |
401 | /* HIT */ | 438 | /* HIT */ |
402 | atomic_inc(&svc->usecnt); | 439 | atomic_inc(&svc->usecnt); |
403 | return svc; | 440 | return svc; |
@@ -408,7 +445,8 @@ static __inline__ struct ip_vs_service *__ip_vs_svc_fwm_get(__u32 fwmark) | |||
408 | } | 445 | } |
409 | 446 | ||
410 | struct ip_vs_service * | 447 | struct ip_vs_service * |
411 | ip_vs_service_get(__u32 fwmark, __u16 protocol, __be32 vaddr, __be16 vport) | 448 | ip_vs_service_get(int af, __u32 fwmark, __u16 protocol, |
449 | const union nf_inet_addr *vaddr, __be16 vport) | ||
412 | { | 450 | { |
413 | struct ip_vs_service *svc; | 451 | struct ip_vs_service *svc; |
414 | 452 | ||
@@ -417,14 +455,14 @@ ip_vs_service_get(__u32 fwmark, __u16 protocol, __be32 vaddr, __be16 vport) | |||
417 | /* | 455 | /* |
418 | * Check the table hashed by fwmark first | 456 | * Check the table hashed by fwmark first |
419 | */ | 457 | */ |
420 | if (fwmark && (svc = __ip_vs_svc_fwm_get(fwmark))) | 458 | if (fwmark && (svc = __ip_vs_svc_fwm_get(af, fwmark))) |
421 | goto out; | 459 | goto out; |
422 | 460 | ||
423 | /* | 461 | /* |
424 | * Check the table hashed by <protocol,addr,port> | 462 | * Check the table hashed by <protocol,addr,port> |
425 | * for "full" addressed entries | 463 | * for "full" addressed entries |
426 | */ | 464 | */ |
427 | svc = __ip_vs_service_get(protocol, vaddr, vport); | 465 | svc = __ip_vs_service_get(af, protocol, vaddr, vport); |
428 | 466 | ||
429 | if (svc == NULL | 467 | if (svc == NULL |
430 | && protocol == IPPROTO_TCP | 468 | && protocol == IPPROTO_TCP |
@@ -434,7 +472,7 @@ ip_vs_service_get(__u32 fwmark, __u16 protocol, __be32 vaddr, __be16 vport) | |||
434 | * Check if ftp service entry exists, the packet | 472 | * Check if ftp service entry exists, the packet |
435 | * might belong to FTP data connections. | 473 | * might belong to FTP data connections. |
436 | */ | 474 | */ |
437 | svc = __ip_vs_service_get(protocol, vaddr, FTPPORT); | 475 | svc = __ip_vs_service_get(af, protocol, vaddr, FTPPORT); |
438 | } | 476 | } |
439 | 477 | ||
440 | if (svc == NULL | 478 | if (svc == NULL |
@@ -442,16 +480,16 @@ ip_vs_service_get(__u32 fwmark, __u16 protocol, __be32 vaddr, __be16 vport) | |||
442 | /* | 480 | /* |
443 | * Check if the catch-all port (port zero) exists | 481 | * Check if the catch-all port (port zero) exists |
444 | */ | 482 | */ |
445 | svc = __ip_vs_service_get(protocol, vaddr, 0); | 483 | svc = __ip_vs_service_get(af, protocol, vaddr, 0); |
446 | } | 484 | } |
447 | 485 | ||
448 | out: | 486 | out: |
449 | read_unlock(&__ip_vs_svc_lock); | 487 | read_unlock(&__ip_vs_svc_lock); |
450 | 488 | ||
451 | IP_VS_DBG(9, "lookup service: fwm %u %s %u.%u.%u.%u:%u %s\n", | 489 | IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n", |
452 | fwmark, ip_vs_proto_name(protocol), | 490 | fwmark, ip_vs_proto_name(protocol), |
453 | NIPQUAD(vaddr), ntohs(vport), | 491 | IP_VS_DBG_ADDR(af, vaddr), ntohs(vport), |
454 | svc?"hit":"not hit"); | 492 | svc ? "hit" : "not hit"); |
455 | 493 | ||
456 | return svc; | 494 | return svc; |
457 | } | 495 | } |
@@ -478,11 +516,20 @@ __ip_vs_unbind_svc(struct ip_vs_dest *dest) | |||
478 | /* | 516 | /* |
479 | * Returns hash value for real service | 517 | * Returns hash value for real service |
480 | */ | 518 | */ |
481 | static __inline__ unsigned ip_vs_rs_hashkey(__be32 addr, __be16 port) | 519 | static inline unsigned ip_vs_rs_hashkey(int af, |
520 | const union nf_inet_addr *addr, | ||
521 | __be16 port) | ||
482 | { | 522 | { |
483 | register unsigned porth = ntohs(port); | 523 | register unsigned porth = ntohs(port); |
524 | __be32 addr_fold = addr->ip; | ||
525 | |||
526 | #ifdef CONFIG_IP_VS_IPV6 | ||
527 | if (af == AF_INET6) | ||
528 | addr_fold = addr->ip6[0]^addr->ip6[1]^ | ||
529 | addr->ip6[2]^addr->ip6[3]; | ||
530 | #endif | ||
484 | 531 | ||
485 | return (ntohl(addr)^(porth>>IP_VS_RTAB_BITS)^porth) | 532 | return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth) |
486 | & IP_VS_RTAB_MASK; | 533 | & IP_VS_RTAB_MASK; |
487 | } | 534 | } |
488 | 535 | ||
@@ -502,7 +549,8 @@ static int ip_vs_rs_hash(struct ip_vs_dest *dest) | |||
502 | * Hash by proto,addr,port, | 549 | * Hash by proto,addr,port, |
503 | * which are the parameters of the real service. | 550 | * which are the parameters of the real service. |
504 | */ | 551 | */ |
505 | hash = ip_vs_rs_hashkey(dest->addr, dest->port); | 552 | hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port); |
553 | |||
506 | list_add(&dest->d_list, &ip_vs_rtable[hash]); | 554 | list_add(&dest->d_list, &ip_vs_rtable[hash]); |
507 | 555 | ||
508 | return 1; | 556 | return 1; |
@@ -529,7 +577,9 @@ static int ip_vs_rs_unhash(struct ip_vs_dest *dest) | |||
529 | * Lookup real service by <proto,addr,port> in the real service table. | 577 | * Lookup real service by <proto,addr,port> in the real service table. |
530 | */ | 578 | */ |
531 | struct ip_vs_dest * | 579 | struct ip_vs_dest * |
532 | ip_vs_lookup_real_service(__u16 protocol, __be32 daddr, __be16 dport) | 580 | ip_vs_lookup_real_service(int af, __u16 protocol, |
581 | const union nf_inet_addr *daddr, | ||
582 | __be16 dport) | ||
533 | { | 583 | { |
534 | unsigned hash; | 584 | unsigned hash; |
535 | struct ip_vs_dest *dest; | 585 | struct ip_vs_dest *dest; |
@@ -538,11 +588,12 @@ ip_vs_lookup_real_service(__u16 protocol, __be32 daddr, __be16 dport) | |||
538 | * Check for "full" addressed entries | 588 | * Check for "full" addressed entries |
539 | * Return the first found entry | 589 | * Return the first found entry |
540 | */ | 590 | */ |
541 | hash = ip_vs_rs_hashkey(daddr, dport); | 591 | hash = ip_vs_rs_hashkey(af, daddr, dport); |
542 | 592 | ||
543 | read_lock(&__ip_vs_rs_lock); | 593 | read_lock(&__ip_vs_rs_lock); |
544 | list_for_each_entry(dest, &ip_vs_rtable[hash], d_list) { | 594 | list_for_each_entry(dest, &ip_vs_rtable[hash], d_list) { |
545 | if ((dest->addr == daddr) | 595 | if ((dest->af == af) |
596 | && ip_vs_addr_equal(af, &dest->addr, daddr) | ||
546 | && (dest->port == dport) | 597 | && (dest->port == dport) |
547 | && ((dest->protocol == protocol) || | 598 | && ((dest->protocol == protocol) || |
548 | dest->vfwmark)) { | 599 | dest->vfwmark)) { |
@@ -560,7 +611,8 @@ ip_vs_lookup_real_service(__u16 protocol, __be32 daddr, __be16 dport) | |||
560 | * Lookup destination by {addr,port} in the given service | 611 | * Lookup destination by {addr,port} in the given service |
561 | */ | 612 | */ |
562 | static struct ip_vs_dest * | 613 | static struct ip_vs_dest * |
563 | ip_vs_lookup_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport) | 614 | ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr, |
615 | __be16 dport) | ||
564 | { | 616 | { |
565 | struct ip_vs_dest *dest; | 617 | struct ip_vs_dest *dest; |
566 | 618 | ||
@@ -568,7 +620,9 @@ ip_vs_lookup_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport) | |||
568 | * Find the destination for the given service | 620 | * Find the destination for the given service |
569 | */ | 621 | */ |
570 | list_for_each_entry(dest, &svc->destinations, n_list) { | 622 | list_for_each_entry(dest, &svc->destinations, n_list) { |
571 | if ((dest->addr == daddr) && (dest->port == dport)) { | 623 | if ((dest->af == svc->af) |
624 | && ip_vs_addr_equal(svc->af, &dest->addr, daddr) | ||
625 | && (dest->port == dport)) { | ||
572 | /* HIT */ | 626 | /* HIT */ |
573 | return dest; | 627 | return dest; |
574 | } | 628 | } |
@@ -587,13 +641,15 @@ ip_vs_lookup_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport) | |||
587 | * ip_vs_lookup_real_service() looked promissing, but | 641 | * ip_vs_lookup_real_service() looked promissing, but |
588 | * seems not working as expected. | 642 | * seems not working as expected. |
589 | */ | 643 | */ |
590 | struct ip_vs_dest *ip_vs_find_dest(__be32 daddr, __be16 dport, | 644 | struct ip_vs_dest *ip_vs_find_dest(int af, const union nf_inet_addr *daddr, |
591 | __be32 vaddr, __be16 vport, __u16 protocol) | 645 | __be16 dport, |
646 | const union nf_inet_addr *vaddr, | ||
647 | __be16 vport, __u16 protocol) | ||
592 | { | 648 | { |
593 | struct ip_vs_dest *dest; | 649 | struct ip_vs_dest *dest; |
594 | struct ip_vs_service *svc; | 650 | struct ip_vs_service *svc; |
595 | 651 | ||
596 | svc = ip_vs_service_get(0, protocol, vaddr, vport); | 652 | svc = ip_vs_service_get(af, 0, protocol, vaddr, vport); |
597 | if (!svc) | 653 | if (!svc) |
598 | return NULL; | 654 | return NULL; |
599 | dest = ip_vs_lookup_dest(svc, daddr, dport); | 655 | dest = ip_vs_lookup_dest(svc, daddr, dport); |
@@ -614,7 +670,8 @@ struct ip_vs_dest *ip_vs_find_dest(__be32 daddr, __be16 dport, | |||
614 | * scheduling. | 670 | * scheduling. |
615 | */ | 671 | */ |
616 | static struct ip_vs_dest * | 672 | static struct ip_vs_dest * |
617 | ip_vs_trash_get_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport) | 673 | ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr, |
674 | __be16 dport) | ||
618 | { | 675 | { |
619 | struct ip_vs_dest *dest, *nxt; | 676 | struct ip_vs_dest *dest, *nxt; |
620 | 677 | ||
@@ -622,17 +679,19 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport) | |||
622 | * Find the destination in trash | 679 | * Find the destination in trash |
623 | */ | 680 | */ |
624 | list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) { | 681 | list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) { |
625 | IP_VS_DBG(3, "Destination %u/%u.%u.%u.%u:%u still in trash, " | 682 | IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, " |
626 | "dest->refcnt=%d\n", | 683 | "dest->refcnt=%d\n", |
627 | dest->vfwmark, | 684 | dest->vfwmark, |
628 | NIPQUAD(dest->addr), ntohs(dest->port), | 685 | IP_VS_DBG_ADDR(svc->af, &dest->addr), |
629 | atomic_read(&dest->refcnt)); | 686 | ntohs(dest->port), |
630 | if (dest->addr == daddr && | 687 | atomic_read(&dest->refcnt)); |
688 | if (dest->af == svc->af && | ||
689 | ip_vs_addr_equal(svc->af, &dest->addr, daddr) && | ||
631 | dest->port == dport && | 690 | dest->port == dport && |
632 | dest->vfwmark == svc->fwmark && | 691 | dest->vfwmark == svc->fwmark && |
633 | dest->protocol == svc->protocol && | 692 | dest->protocol == svc->protocol && |
634 | (svc->fwmark || | 693 | (svc->fwmark || |
635 | (dest->vaddr == svc->addr && | 694 | (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) && |
636 | dest->vport == svc->port))) { | 695 | dest->vport == svc->port))) { |
637 | /* HIT */ | 696 | /* HIT */ |
638 | return dest; | 697 | return dest; |
@@ -642,10 +701,11 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport) | |||
642 | * Try to purge the destination from trash if not referenced | 701 | * Try to purge the destination from trash if not referenced |
643 | */ | 702 | */ |
644 | if (atomic_read(&dest->refcnt) == 1) { | 703 | if (atomic_read(&dest->refcnt) == 1) { |
645 | IP_VS_DBG(3, "Removing destination %u/%u.%u.%u.%u:%u " | 704 | IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u " |
646 | "from trash\n", | 705 | "from trash\n", |
647 | dest->vfwmark, | 706 | dest->vfwmark, |
648 | NIPQUAD(dest->addr), ntohs(dest->port)); | 707 | IP_VS_DBG_ADDR(svc->af, &dest->addr), |
708 | ntohs(dest->port)); | ||
649 | list_del(&dest->n_list); | 709 | list_del(&dest->n_list); |
650 | ip_vs_dst_reset(dest); | 710 | ip_vs_dst_reset(dest); |
651 | __ip_vs_unbind_svc(dest); | 711 | __ip_vs_unbind_svc(dest); |
@@ -684,18 +744,7 @@ ip_vs_zero_stats(struct ip_vs_stats *stats) | |||
684 | { | 744 | { |
685 | spin_lock_bh(&stats->lock); | 745 | spin_lock_bh(&stats->lock); |
686 | 746 | ||
687 | stats->conns = 0; | 747 | memset(&stats->ustats, 0, sizeof(stats->ustats)); |
688 | stats->inpkts = 0; | ||
689 | stats->outpkts = 0; | ||
690 | stats->inbytes = 0; | ||
691 | stats->outbytes = 0; | ||
692 | |||
693 | stats->cps = 0; | ||
694 | stats->inpps = 0; | ||
695 | stats->outpps = 0; | ||
696 | stats->inbps = 0; | ||
697 | stats->outbps = 0; | ||
698 | |||
699 | ip_vs_zero_estimator(stats); | 748 | ip_vs_zero_estimator(stats); |
700 | 749 | ||
701 | spin_unlock_bh(&stats->lock); | 750 | spin_unlock_bh(&stats->lock); |
@@ -706,7 +755,7 @@ ip_vs_zero_stats(struct ip_vs_stats *stats) | |||
706 | */ | 755 | */ |
707 | static void | 756 | static void |
708 | __ip_vs_update_dest(struct ip_vs_service *svc, | 757 | __ip_vs_update_dest(struct ip_vs_service *svc, |
709 | struct ip_vs_dest *dest, struct ip_vs_dest_user *udest) | 758 | struct ip_vs_dest *dest, struct ip_vs_dest_user_kern *udest) |
710 | { | 759 | { |
711 | int conn_flags; | 760 | int conn_flags; |
712 | 761 | ||
@@ -715,10 +764,18 @@ __ip_vs_update_dest(struct ip_vs_service *svc, | |||
715 | conn_flags = udest->conn_flags | IP_VS_CONN_F_INACTIVE; | 764 | conn_flags = udest->conn_flags | IP_VS_CONN_F_INACTIVE; |
716 | 765 | ||
717 | /* check if local node and update the flags */ | 766 | /* check if local node and update the flags */ |
718 | if (inet_addr_type(&init_net, udest->addr) == RTN_LOCAL) { | 767 | #ifdef CONFIG_IP_VS_IPV6 |
719 | conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK) | 768 | if (svc->af == AF_INET6) { |
720 | | IP_VS_CONN_F_LOCALNODE; | 769 | if (__ip_vs_addr_is_local_v6(&udest->addr.in6)) { |
721 | } | 770 | conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK) |
771 | | IP_VS_CONN_F_LOCALNODE; | ||
772 | } | ||
773 | } else | ||
774 | #endif | ||
775 | if (inet_addr_type(&init_net, udest->addr.ip) == RTN_LOCAL) { | ||
776 | conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK) | ||
777 | | IP_VS_CONN_F_LOCALNODE; | ||
778 | } | ||
722 | 779 | ||
723 | /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */ | 780 | /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */ |
724 | if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != 0) { | 781 | if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != 0) { |
@@ -759,7 +816,7 @@ __ip_vs_update_dest(struct ip_vs_service *svc, | |||
759 | * Create a destination for the given service | 816 | * Create a destination for the given service |
760 | */ | 817 | */ |
761 | static int | 818 | static int |
762 | ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest, | 819 | ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest, |
763 | struct ip_vs_dest **dest_p) | 820 | struct ip_vs_dest **dest_p) |
764 | { | 821 | { |
765 | struct ip_vs_dest *dest; | 822 | struct ip_vs_dest *dest; |
@@ -767,9 +824,20 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest, | |||
767 | 824 | ||
768 | EnterFunction(2); | 825 | EnterFunction(2); |
769 | 826 | ||
770 | atype = inet_addr_type(&init_net, udest->addr); | 827 | #ifdef CONFIG_IP_VS_IPV6 |
771 | if (atype != RTN_LOCAL && atype != RTN_UNICAST) | 828 | if (svc->af == AF_INET6) { |
772 | return -EINVAL; | 829 | atype = ipv6_addr_type(&udest->addr.in6); |
830 | if ((!(atype & IPV6_ADDR_UNICAST) || | ||
831 | atype & IPV6_ADDR_LINKLOCAL) && | ||
832 | !__ip_vs_addr_is_local_v6(&udest->addr.in6)) | ||
833 | return -EINVAL; | ||
834 | } else | ||
835 | #endif | ||
836 | { | ||
837 | atype = inet_addr_type(&init_net, udest->addr.ip); | ||
838 | if (atype != RTN_LOCAL && atype != RTN_UNICAST) | ||
839 | return -EINVAL; | ||
840 | } | ||
773 | 841 | ||
774 | dest = kzalloc(sizeof(struct ip_vs_dest), GFP_ATOMIC); | 842 | dest = kzalloc(sizeof(struct ip_vs_dest), GFP_ATOMIC); |
775 | if (dest == NULL) { | 843 | if (dest == NULL) { |
@@ -777,11 +845,12 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest, | |||
777 | return -ENOMEM; | 845 | return -ENOMEM; |
778 | } | 846 | } |
779 | 847 | ||
848 | dest->af = svc->af; | ||
780 | dest->protocol = svc->protocol; | 849 | dest->protocol = svc->protocol; |
781 | dest->vaddr = svc->addr; | 850 | dest->vaddr = svc->addr; |
782 | dest->vport = svc->port; | 851 | dest->vport = svc->port; |
783 | dest->vfwmark = svc->fwmark; | 852 | dest->vfwmark = svc->fwmark; |
784 | dest->addr = udest->addr; | 853 | ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr); |
785 | dest->port = udest->port; | 854 | dest->port = udest->port; |
786 | 855 | ||
787 | atomic_set(&dest->activeconns, 0); | 856 | atomic_set(&dest->activeconns, 0); |
@@ -806,10 +875,10 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest, | |||
806 | * Add a destination into an existing service | 875 | * Add a destination into an existing service |
807 | */ | 876 | */ |
808 | static int | 877 | static int |
809 | ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest) | 878 | ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) |
810 | { | 879 | { |
811 | struct ip_vs_dest *dest; | 880 | struct ip_vs_dest *dest; |
812 | __be32 daddr = udest->addr; | 881 | union nf_inet_addr daddr; |
813 | __be16 dport = udest->port; | 882 | __be16 dport = udest->port; |
814 | int ret; | 883 | int ret; |
815 | 884 | ||
@@ -826,10 +895,13 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest) | |||
826 | return -ERANGE; | 895 | return -ERANGE; |
827 | } | 896 | } |
828 | 897 | ||
898 | ip_vs_addr_copy(svc->af, &daddr, &udest->addr); | ||
899 | |||
829 | /* | 900 | /* |
830 | * Check if the dest already exists in the list | 901 | * Check if the dest already exists in the list |
831 | */ | 902 | */ |
832 | dest = ip_vs_lookup_dest(svc, daddr, dport); | 903 | dest = ip_vs_lookup_dest(svc, &daddr, dport); |
904 | |||
833 | if (dest != NULL) { | 905 | if (dest != NULL) { |
834 | IP_VS_DBG(1, "ip_vs_add_dest(): dest already exists\n"); | 906 | IP_VS_DBG(1, "ip_vs_add_dest(): dest already exists\n"); |
835 | return -EEXIST; | 907 | return -EEXIST; |
@@ -839,15 +911,17 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest) | |||
839 | * Check if the dest already exists in the trash and | 911 | * Check if the dest already exists in the trash and |
840 | * is from the same service | 912 | * is from the same service |
841 | */ | 913 | */ |
842 | dest = ip_vs_trash_get_dest(svc, daddr, dport); | 914 | dest = ip_vs_trash_get_dest(svc, &daddr, dport); |
915 | |||
843 | if (dest != NULL) { | 916 | if (dest != NULL) { |
844 | IP_VS_DBG(3, "Get destination %u.%u.%u.%u:%u from trash, " | 917 | IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, " |
845 | "dest->refcnt=%d, service %u/%u.%u.%u.%u:%u\n", | 918 | "dest->refcnt=%d, service %u/%s:%u\n", |
846 | NIPQUAD(daddr), ntohs(dport), | 919 | IP_VS_DBG_ADDR(svc->af, &daddr), ntohs(dport), |
847 | atomic_read(&dest->refcnt), | 920 | atomic_read(&dest->refcnt), |
848 | dest->vfwmark, | 921 | dest->vfwmark, |
849 | NIPQUAD(dest->vaddr), | 922 | IP_VS_DBG_ADDR(svc->af, &dest->vaddr), |
850 | ntohs(dest->vport)); | 923 | ntohs(dest->vport)); |
924 | |||
851 | __ip_vs_update_dest(svc, dest, udest); | 925 | __ip_vs_update_dest(svc, dest, udest); |
852 | 926 | ||
853 | /* | 927 | /* |
@@ -868,7 +942,8 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest) | |||
868 | svc->num_dests++; | 942 | svc->num_dests++; |
869 | 943 | ||
870 | /* call the update_service function of its scheduler */ | 944 | /* call the update_service function of its scheduler */ |
871 | svc->scheduler->update_service(svc); | 945 | if (svc->scheduler->update_service) |
946 | svc->scheduler->update_service(svc); | ||
872 | 947 | ||
873 | write_unlock_bh(&__ip_vs_svc_lock); | 948 | write_unlock_bh(&__ip_vs_svc_lock); |
874 | return 0; | 949 | return 0; |
@@ -898,7 +973,8 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest) | |||
898 | svc->num_dests++; | 973 | svc->num_dests++; |
899 | 974 | ||
900 | /* call the update_service function of its scheduler */ | 975 | /* call the update_service function of its scheduler */ |
901 | svc->scheduler->update_service(svc); | 976 | if (svc->scheduler->update_service) |
977 | svc->scheduler->update_service(svc); | ||
902 | 978 | ||
903 | write_unlock_bh(&__ip_vs_svc_lock); | 979 | write_unlock_bh(&__ip_vs_svc_lock); |
904 | 980 | ||
@@ -912,10 +988,10 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest) | |||
912 | * Edit a destination in the given service | 988 | * Edit a destination in the given service |
913 | */ | 989 | */ |
914 | static int | 990 | static int |
915 | ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest) | 991 | ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) |
916 | { | 992 | { |
917 | struct ip_vs_dest *dest; | 993 | struct ip_vs_dest *dest; |
918 | __be32 daddr = udest->addr; | 994 | union nf_inet_addr daddr; |
919 | __be16 dport = udest->port; | 995 | __be16 dport = udest->port; |
920 | 996 | ||
921 | EnterFunction(2); | 997 | EnterFunction(2); |
@@ -931,10 +1007,13 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest) | |||
931 | return -ERANGE; | 1007 | return -ERANGE; |
932 | } | 1008 | } |
933 | 1009 | ||
1010 | ip_vs_addr_copy(svc->af, &daddr, &udest->addr); | ||
1011 | |||
934 | /* | 1012 | /* |
935 | * Lookup the destination list | 1013 | * Lookup the destination list |
936 | */ | 1014 | */ |
937 | dest = ip_vs_lookup_dest(svc, daddr, dport); | 1015 | dest = ip_vs_lookup_dest(svc, &daddr, dport); |
1016 | |||
938 | if (dest == NULL) { | 1017 | if (dest == NULL) { |
939 | IP_VS_DBG(1, "ip_vs_edit_dest(): dest doesn't exist\n"); | 1018 | IP_VS_DBG(1, "ip_vs_edit_dest(): dest doesn't exist\n"); |
940 | return -ENOENT; | 1019 | return -ENOENT; |
@@ -948,7 +1027,8 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest) | |||
948 | IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1); | 1027 | IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1); |
949 | 1028 | ||
950 | /* call the update_service, because server weight may be changed */ | 1029 | /* call the update_service, because server weight may be changed */ |
951 | svc->scheduler->update_service(svc); | 1030 | if (svc->scheduler->update_service) |
1031 | svc->scheduler->update_service(svc); | ||
952 | 1032 | ||
953 | write_unlock_bh(&__ip_vs_svc_lock); | 1033 | write_unlock_bh(&__ip_vs_svc_lock); |
954 | 1034 | ||
@@ -987,10 +1067,11 @@ static void __ip_vs_del_dest(struct ip_vs_dest *dest) | |||
987 | atomic_dec(&dest->svc->refcnt); | 1067 | atomic_dec(&dest->svc->refcnt); |
988 | kfree(dest); | 1068 | kfree(dest); |
989 | } else { | 1069 | } else { |
990 | IP_VS_DBG(3, "Moving dest %u.%u.%u.%u:%u into trash, " | 1070 | IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, " |
991 | "dest->refcnt=%d\n", | 1071 | "dest->refcnt=%d\n", |
992 | NIPQUAD(dest->addr), ntohs(dest->port), | 1072 | IP_VS_DBG_ADDR(dest->af, &dest->addr), |
993 | atomic_read(&dest->refcnt)); | 1073 | ntohs(dest->port), |
1074 | atomic_read(&dest->refcnt)); | ||
994 | list_add(&dest->n_list, &ip_vs_dest_trash); | 1075 | list_add(&dest->n_list, &ip_vs_dest_trash); |
995 | atomic_inc(&dest->refcnt); | 1076 | atomic_inc(&dest->refcnt); |
996 | } | 1077 | } |
@@ -1011,12 +1092,12 @@ static void __ip_vs_unlink_dest(struct ip_vs_service *svc, | |||
1011 | */ | 1092 | */ |
1012 | list_del(&dest->n_list); | 1093 | list_del(&dest->n_list); |
1013 | svc->num_dests--; | 1094 | svc->num_dests--; |
1014 | if (svcupd) { | 1095 | |
1015 | /* | 1096 | /* |
1016 | * Call the update_service function of its scheduler | 1097 | * Call the update_service function of its scheduler |
1017 | */ | 1098 | */ |
1018 | svc->scheduler->update_service(svc); | 1099 | if (svcupd && svc->scheduler->update_service) |
1019 | } | 1100 | svc->scheduler->update_service(svc); |
1020 | } | 1101 | } |
1021 | 1102 | ||
1022 | 1103 | ||
@@ -1024,15 +1105,15 @@ static void __ip_vs_unlink_dest(struct ip_vs_service *svc, | |||
1024 | * Delete a destination server in the given service | 1105 | * Delete a destination server in the given service |
1025 | */ | 1106 | */ |
1026 | static int | 1107 | static int |
1027 | ip_vs_del_dest(struct ip_vs_service *svc,struct ip_vs_dest_user *udest) | 1108 | ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) |
1028 | { | 1109 | { |
1029 | struct ip_vs_dest *dest; | 1110 | struct ip_vs_dest *dest; |
1030 | __be32 daddr = udest->addr; | ||
1031 | __be16 dport = udest->port; | 1111 | __be16 dport = udest->port; |
1032 | 1112 | ||
1033 | EnterFunction(2); | 1113 | EnterFunction(2); |
1034 | 1114 | ||
1035 | dest = ip_vs_lookup_dest(svc, daddr, dport); | 1115 | dest = ip_vs_lookup_dest(svc, &udest->addr, dport); |
1116 | |||
1036 | if (dest == NULL) { | 1117 | if (dest == NULL) { |
1037 | IP_VS_DBG(1, "ip_vs_del_dest(): destination not found!\n"); | 1118 | IP_VS_DBG(1, "ip_vs_del_dest(): destination not found!\n"); |
1038 | return -ENOENT; | 1119 | return -ENOENT; |
@@ -1067,7 +1148,8 @@ ip_vs_del_dest(struct ip_vs_service *svc,struct ip_vs_dest_user *udest) | |||
1067 | * Add a service into the service hash table | 1148 | * Add a service into the service hash table |
1068 | */ | 1149 | */ |
1069 | static int | 1150 | static int |
1070 | ip_vs_add_service(struct ip_vs_service_user *u, struct ip_vs_service **svc_p) | 1151 | ip_vs_add_service(struct ip_vs_service_user_kern *u, |
1152 | struct ip_vs_service **svc_p) | ||
1071 | { | 1153 | { |
1072 | int ret = 0; | 1154 | int ret = 0; |
1073 | struct ip_vs_scheduler *sched = NULL; | 1155 | struct ip_vs_scheduler *sched = NULL; |
@@ -1085,6 +1167,19 @@ ip_vs_add_service(struct ip_vs_service_user *u, struct ip_vs_service **svc_p) | |||
1085 | goto out_mod_dec; | 1167 | goto out_mod_dec; |
1086 | } | 1168 | } |
1087 | 1169 | ||
1170 | #ifdef CONFIG_IP_VS_IPV6 | ||
1171 | if (u->af == AF_INET6) { | ||
1172 | if (!sched->supports_ipv6) { | ||
1173 | ret = -EAFNOSUPPORT; | ||
1174 | goto out_err; | ||
1175 | } | ||
1176 | if ((u->netmask < 1) || (u->netmask > 128)) { | ||
1177 | ret = -EINVAL; | ||
1178 | goto out_err; | ||
1179 | } | ||
1180 | } | ||
1181 | #endif | ||
1182 | |||
1088 | svc = kzalloc(sizeof(struct ip_vs_service), GFP_ATOMIC); | 1183 | svc = kzalloc(sizeof(struct ip_vs_service), GFP_ATOMIC); |
1089 | if (svc == NULL) { | 1184 | if (svc == NULL) { |
1090 | IP_VS_DBG(1, "ip_vs_add_service: kmalloc failed.\n"); | 1185 | IP_VS_DBG(1, "ip_vs_add_service: kmalloc failed.\n"); |
@@ -1096,8 +1191,9 @@ ip_vs_add_service(struct ip_vs_service_user *u, struct ip_vs_service **svc_p) | |||
1096 | atomic_set(&svc->usecnt, 1); | 1191 | atomic_set(&svc->usecnt, 1); |
1097 | atomic_set(&svc->refcnt, 0); | 1192 | atomic_set(&svc->refcnt, 0); |
1098 | 1193 | ||
1194 | svc->af = u->af; | ||
1099 | svc->protocol = u->protocol; | 1195 | svc->protocol = u->protocol; |
1100 | svc->addr = u->addr; | 1196 | ip_vs_addr_copy(svc->af, &svc->addr, &u->addr); |
1101 | svc->port = u->port; | 1197 | svc->port = u->port; |
1102 | svc->fwmark = u->fwmark; | 1198 | svc->fwmark = u->fwmark; |
1103 | svc->flags = u->flags; | 1199 | svc->flags = u->flags; |
@@ -1121,7 +1217,10 @@ ip_vs_add_service(struct ip_vs_service_user *u, struct ip_vs_service **svc_p) | |||
1121 | atomic_inc(&ip_vs_nullsvc_counter); | 1217 | atomic_inc(&ip_vs_nullsvc_counter); |
1122 | 1218 | ||
1123 | ip_vs_new_estimator(&svc->stats); | 1219 | ip_vs_new_estimator(&svc->stats); |
1124 | ip_vs_num_services++; | 1220 | |
1221 | /* Count only IPv4 services for old get/setsockopt interface */ | ||
1222 | if (svc->af == AF_INET) | ||
1223 | ip_vs_num_services++; | ||
1125 | 1224 | ||
1126 | /* Hash the service into the service table */ | 1225 | /* Hash the service into the service table */ |
1127 | write_lock_bh(&__ip_vs_svc_lock); | 1226 | write_lock_bh(&__ip_vs_svc_lock); |
@@ -1156,7 +1255,7 @@ ip_vs_add_service(struct ip_vs_service_user *u, struct ip_vs_service **svc_p) | |||
1156 | * Edit a service and bind it with a new scheduler | 1255 | * Edit a service and bind it with a new scheduler |
1157 | */ | 1256 | */ |
1158 | static int | 1257 | static int |
1159 | ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user *u) | 1258 | ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u) |
1160 | { | 1259 | { |
1161 | struct ip_vs_scheduler *sched, *old_sched; | 1260 | struct ip_vs_scheduler *sched, *old_sched; |
1162 | int ret = 0; | 1261 | int ret = 0; |
@@ -1172,6 +1271,19 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user *u) | |||
1172 | } | 1271 | } |
1173 | old_sched = sched; | 1272 | old_sched = sched; |
1174 | 1273 | ||
1274 | #ifdef CONFIG_IP_VS_IPV6 | ||
1275 | if (u->af == AF_INET6) { | ||
1276 | if (!sched->supports_ipv6) { | ||
1277 | ret = -EAFNOSUPPORT; | ||
1278 | goto out; | ||
1279 | } | ||
1280 | if ((u->netmask < 1) || (u->netmask > 128)) { | ||
1281 | ret = -EINVAL; | ||
1282 | goto out; | ||
1283 | } | ||
1284 | } | ||
1285 | #endif | ||
1286 | |||
1175 | write_lock_bh(&__ip_vs_svc_lock); | 1287 | write_lock_bh(&__ip_vs_svc_lock); |
1176 | 1288 | ||
1177 | /* | 1289 | /* |
@@ -1193,7 +1305,7 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user *u) | |||
1193 | */ | 1305 | */ |
1194 | if ((ret = ip_vs_unbind_scheduler(svc))) { | 1306 | if ((ret = ip_vs_unbind_scheduler(svc))) { |
1195 | old_sched = sched; | 1307 | old_sched = sched; |
1196 | goto out; | 1308 | goto out_unlock; |
1197 | } | 1309 | } |
1198 | 1310 | ||
1199 | /* | 1311 | /* |
@@ -1212,12 +1324,13 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user *u) | |||
1212 | */ | 1324 | */ |
1213 | ip_vs_bind_scheduler(svc, old_sched); | 1325 | ip_vs_bind_scheduler(svc, old_sched); |
1214 | old_sched = sched; | 1326 | old_sched = sched; |
1215 | goto out; | 1327 | goto out_unlock; |
1216 | } | 1328 | } |
1217 | } | 1329 | } |
1218 | 1330 | ||
1219 | out: | 1331 | out_unlock: |
1220 | write_unlock_bh(&__ip_vs_svc_lock); | 1332 | write_unlock_bh(&__ip_vs_svc_lock); |
1333 | out: | ||
1221 | 1334 | ||
1222 | if (old_sched) | 1335 | if (old_sched) |
1223 | ip_vs_scheduler_put(old_sched); | 1336 | ip_vs_scheduler_put(old_sched); |
@@ -1236,7 +1349,10 @@ static void __ip_vs_del_service(struct ip_vs_service *svc) | |||
1236 | struct ip_vs_dest *dest, *nxt; | 1349 | struct ip_vs_dest *dest, *nxt; |
1237 | struct ip_vs_scheduler *old_sched; | 1350 | struct ip_vs_scheduler *old_sched; |
1238 | 1351 | ||
1239 | ip_vs_num_services--; | 1352 | /* Count only IPv4 services for old get/setsockopt interface */ |
1353 | if (svc->af == AF_INET) | ||
1354 | ip_vs_num_services--; | ||
1355 | |||
1240 | ip_vs_kill_estimator(&svc->stats); | 1356 | ip_vs_kill_estimator(&svc->stats); |
1241 | 1357 | ||
1242 | /* Unbind scheduler */ | 1358 | /* Unbind scheduler */ |
@@ -1671,6 +1787,7 @@ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos) | |||
1671 | } | 1787 | } |
1672 | 1788 | ||
1673 | static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos) | 1789 | static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos) |
1790 | __acquires(__ip_vs_svc_lock) | ||
1674 | { | 1791 | { |
1675 | 1792 | ||
1676 | read_lock_bh(&__ip_vs_svc_lock); | 1793 | read_lock_bh(&__ip_vs_svc_lock); |
@@ -1724,6 +1841,7 @@ static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos) | |||
1724 | } | 1841 | } |
1725 | 1842 | ||
1726 | static void ip_vs_info_seq_stop(struct seq_file *seq, void *v) | 1843 | static void ip_vs_info_seq_stop(struct seq_file *seq, void *v) |
1844 | __releases(__ip_vs_svc_lock) | ||
1727 | { | 1845 | { |
1728 | read_unlock_bh(&__ip_vs_svc_lock); | 1846 | read_unlock_bh(&__ip_vs_svc_lock); |
1729 | } | 1847 | } |
@@ -1744,15 +1862,25 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v) | |||
1744 | const struct ip_vs_iter *iter = seq->private; | 1862 | const struct ip_vs_iter *iter = seq->private; |
1745 | const struct ip_vs_dest *dest; | 1863 | const struct ip_vs_dest *dest; |
1746 | 1864 | ||
1747 | if (iter->table == ip_vs_svc_table) | 1865 | if (iter->table == ip_vs_svc_table) { |
1748 | seq_printf(seq, "%s %08X:%04X %s ", | 1866 | #ifdef CONFIG_IP_VS_IPV6 |
1749 | ip_vs_proto_name(svc->protocol), | 1867 | if (svc->af == AF_INET6) |
1750 | ntohl(svc->addr), | 1868 | seq_printf(seq, "%s [" NIP6_FMT "]:%04X %s ", |
1751 | ntohs(svc->port), | 1869 | ip_vs_proto_name(svc->protocol), |
1752 | svc->scheduler->name); | 1870 | NIP6(svc->addr.in6), |
1753 | else | 1871 | ntohs(svc->port), |
1872 | svc->scheduler->name); | ||
1873 | else | ||
1874 | #endif | ||
1875 | seq_printf(seq, "%s %08X:%04X %s ", | ||
1876 | ip_vs_proto_name(svc->protocol), | ||
1877 | ntohl(svc->addr.ip), | ||
1878 | ntohs(svc->port), | ||
1879 | svc->scheduler->name); | ||
1880 | } else { | ||
1754 | seq_printf(seq, "FWM %08X %s ", | 1881 | seq_printf(seq, "FWM %08X %s ", |
1755 | svc->fwmark, svc->scheduler->name); | 1882 | svc->fwmark, svc->scheduler->name); |
1883 | } | ||
1756 | 1884 | ||
1757 | if (svc->flags & IP_VS_SVC_F_PERSISTENT) | 1885 | if (svc->flags & IP_VS_SVC_F_PERSISTENT) |
1758 | seq_printf(seq, "persistent %d %08X\n", | 1886 | seq_printf(seq, "persistent %d %08X\n", |
@@ -1762,13 +1890,29 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v) | |||
1762 | seq_putc(seq, '\n'); | 1890 | seq_putc(seq, '\n'); |
1763 | 1891 | ||
1764 | list_for_each_entry(dest, &svc->destinations, n_list) { | 1892 | list_for_each_entry(dest, &svc->destinations, n_list) { |
1765 | seq_printf(seq, | 1893 | #ifdef CONFIG_IP_VS_IPV6 |
1766 | " -> %08X:%04X %-7s %-6d %-10d %-10d\n", | 1894 | if (dest->af == AF_INET6) |
1767 | ntohl(dest->addr), ntohs(dest->port), | 1895 | seq_printf(seq, |
1768 | ip_vs_fwd_name(atomic_read(&dest->conn_flags)), | 1896 | " -> [" NIP6_FMT "]:%04X" |
1769 | atomic_read(&dest->weight), | 1897 | " %-7s %-6d %-10d %-10d\n", |
1770 | atomic_read(&dest->activeconns), | 1898 | NIP6(dest->addr.in6), |
1771 | atomic_read(&dest->inactconns)); | 1899 | ntohs(dest->port), |
1900 | ip_vs_fwd_name(atomic_read(&dest->conn_flags)), | ||
1901 | atomic_read(&dest->weight), | ||
1902 | atomic_read(&dest->activeconns), | ||
1903 | atomic_read(&dest->inactconns)); | ||
1904 | else | ||
1905 | #endif | ||
1906 | seq_printf(seq, | ||
1907 | " -> %08X:%04X " | ||
1908 | "%-7s %-6d %-10d %-10d\n", | ||
1909 | ntohl(dest->addr.ip), | ||
1910 | ntohs(dest->port), | ||
1911 | ip_vs_fwd_name(atomic_read(&dest->conn_flags)), | ||
1912 | atomic_read(&dest->weight), | ||
1913 | atomic_read(&dest->activeconns), | ||
1914 | atomic_read(&dest->inactconns)); | ||
1915 | |||
1772 | } | 1916 | } |
1773 | } | 1917 | } |
1774 | return 0; | 1918 | return 0; |
@@ -1812,20 +1956,20 @@ static int ip_vs_stats_show(struct seq_file *seq, void *v) | |||
1812 | " Conns Packets Packets Bytes Bytes\n"); | 1956 | " Conns Packets Packets Bytes Bytes\n"); |
1813 | 1957 | ||
1814 | spin_lock_bh(&ip_vs_stats.lock); | 1958 | spin_lock_bh(&ip_vs_stats.lock); |
1815 | seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats.conns, | 1959 | seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats.ustats.conns, |
1816 | ip_vs_stats.inpkts, ip_vs_stats.outpkts, | 1960 | ip_vs_stats.ustats.inpkts, ip_vs_stats.ustats.outpkts, |
1817 | (unsigned long long) ip_vs_stats.inbytes, | 1961 | (unsigned long long) ip_vs_stats.ustats.inbytes, |
1818 | (unsigned long long) ip_vs_stats.outbytes); | 1962 | (unsigned long long) ip_vs_stats.ustats.outbytes); |
1819 | 1963 | ||
1820 | /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ | 1964 | /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ |
1821 | seq_puts(seq, | 1965 | seq_puts(seq, |
1822 | " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n"); | 1966 | " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n"); |
1823 | seq_printf(seq,"%8X %8X %8X %16X %16X\n", | 1967 | seq_printf(seq,"%8X %8X %8X %16X %16X\n", |
1824 | ip_vs_stats.cps, | 1968 | ip_vs_stats.ustats.cps, |
1825 | ip_vs_stats.inpps, | 1969 | ip_vs_stats.ustats.inpps, |
1826 | ip_vs_stats.outpps, | 1970 | ip_vs_stats.ustats.outpps, |
1827 | ip_vs_stats.inbps, | 1971 | ip_vs_stats.ustats.inbps, |
1828 | ip_vs_stats.outbps); | 1972 | ip_vs_stats.ustats.outbps); |
1829 | spin_unlock_bh(&ip_vs_stats.lock); | 1973 | spin_unlock_bh(&ip_vs_stats.lock); |
1830 | 1974 | ||
1831 | return 0; | 1975 | return 0; |
@@ -1900,14 +2044,44 @@ static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = { | |||
1900 | [SET_CMDID(IP_VS_SO_SET_ZERO)] = SERVICE_ARG_LEN, | 2044 | [SET_CMDID(IP_VS_SO_SET_ZERO)] = SERVICE_ARG_LEN, |
1901 | }; | 2045 | }; |
1902 | 2046 | ||
2047 | static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc, | ||
2048 | struct ip_vs_service_user *usvc_compat) | ||
2049 | { | ||
2050 | usvc->af = AF_INET; | ||
2051 | usvc->protocol = usvc_compat->protocol; | ||
2052 | usvc->addr.ip = usvc_compat->addr; | ||
2053 | usvc->port = usvc_compat->port; | ||
2054 | usvc->fwmark = usvc_compat->fwmark; | ||
2055 | |||
2056 | /* Deep copy of sched_name is not needed here */ | ||
2057 | usvc->sched_name = usvc_compat->sched_name; | ||
2058 | |||
2059 | usvc->flags = usvc_compat->flags; | ||
2060 | usvc->timeout = usvc_compat->timeout; | ||
2061 | usvc->netmask = usvc_compat->netmask; | ||
2062 | } | ||
2063 | |||
2064 | static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest, | ||
2065 | struct ip_vs_dest_user *udest_compat) | ||
2066 | { | ||
2067 | udest->addr.ip = udest_compat->addr; | ||
2068 | udest->port = udest_compat->port; | ||
2069 | udest->conn_flags = udest_compat->conn_flags; | ||
2070 | udest->weight = udest_compat->weight; | ||
2071 | udest->u_threshold = udest_compat->u_threshold; | ||
2072 | udest->l_threshold = udest_compat->l_threshold; | ||
2073 | } | ||
2074 | |||
1903 | static int | 2075 | static int |
1904 | do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) | 2076 | do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) |
1905 | { | 2077 | { |
1906 | int ret; | 2078 | int ret; |
1907 | unsigned char arg[MAX_ARG_LEN]; | 2079 | unsigned char arg[MAX_ARG_LEN]; |
1908 | struct ip_vs_service_user *usvc; | 2080 | struct ip_vs_service_user *usvc_compat; |
2081 | struct ip_vs_service_user_kern usvc; | ||
1909 | struct ip_vs_service *svc; | 2082 | struct ip_vs_service *svc; |
1910 | struct ip_vs_dest_user *udest; | 2083 | struct ip_vs_dest_user *udest_compat; |
2084 | struct ip_vs_dest_user_kern udest; | ||
1911 | 2085 | ||
1912 | if (!capable(CAP_NET_ADMIN)) | 2086 | if (!capable(CAP_NET_ADMIN)) |
1913 | return -EPERM; | 2087 | return -EPERM; |
@@ -1947,35 +2121,40 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) | |||
1947 | goto out_unlock; | 2121 | goto out_unlock; |
1948 | } | 2122 | } |
1949 | 2123 | ||
1950 | usvc = (struct ip_vs_service_user *)arg; | 2124 | usvc_compat = (struct ip_vs_service_user *)arg; |
1951 | udest = (struct ip_vs_dest_user *)(usvc + 1); | 2125 | udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1); |
2126 | |||
2127 | /* We only use the new structs internally, so copy userspace compat | ||
2128 | * structs to extended internal versions */ | ||
2129 | ip_vs_copy_usvc_compat(&usvc, usvc_compat); | ||
2130 | ip_vs_copy_udest_compat(&udest, udest_compat); | ||
1952 | 2131 | ||
1953 | if (cmd == IP_VS_SO_SET_ZERO) { | 2132 | if (cmd == IP_VS_SO_SET_ZERO) { |
1954 | /* if no service address is set, zero counters in all */ | 2133 | /* if no service address is set, zero counters in all */ |
1955 | if (!usvc->fwmark && !usvc->addr && !usvc->port) { | 2134 | if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) { |
1956 | ret = ip_vs_zero_all(); | 2135 | ret = ip_vs_zero_all(); |
1957 | goto out_unlock; | 2136 | goto out_unlock; |
1958 | } | 2137 | } |
1959 | } | 2138 | } |
1960 | 2139 | ||
1961 | /* Check for valid protocol: TCP or UDP, even for fwmark!=0 */ | 2140 | /* Check for valid protocol: TCP or UDP, even for fwmark!=0 */ |
1962 | if (usvc->protocol!=IPPROTO_TCP && usvc->protocol!=IPPROTO_UDP) { | 2141 | if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP) { |
1963 | IP_VS_ERR("set_ctl: invalid protocol: %d %d.%d.%d.%d:%d %s\n", | 2142 | IP_VS_ERR("set_ctl: invalid protocol: %d %d.%d.%d.%d:%d %s\n", |
1964 | usvc->protocol, NIPQUAD(usvc->addr), | 2143 | usvc.protocol, NIPQUAD(usvc.addr.ip), |
1965 | ntohs(usvc->port), usvc->sched_name); | 2144 | ntohs(usvc.port), usvc.sched_name); |
1966 | ret = -EFAULT; | 2145 | ret = -EFAULT; |
1967 | goto out_unlock; | 2146 | goto out_unlock; |
1968 | } | 2147 | } |
1969 | 2148 | ||
1970 | /* Lookup the exact service by <protocol, addr, port> or fwmark */ | 2149 | /* Lookup the exact service by <protocol, addr, port> or fwmark */ |
1971 | if (usvc->fwmark == 0) | 2150 | if (usvc.fwmark == 0) |
1972 | svc = __ip_vs_service_get(usvc->protocol, | 2151 | svc = __ip_vs_service_get(usvc.af, usvc.protocol, |
1973 | usvc->addr, usvc->port); | 2152 | &usvc.addr, usvc.port); |
1974 | else | 2153 | else |
1975 | svc = __ip_vs_svc_fwm_get(usvc->fwmark); | 2154 | svc = __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark); |
1976 | 2155 | ||
1977 | if (cmd != IP_VS_SO_SET_ADD | 2156 | if (cmd != IP_VS_SO_SET_ADD |
1978 | && (svc == NULL || svc->protocol != usvc->protocol)) { | 2157 | && (svc == NULL || svc->protocol != usvc.protocol)) { |
1979 | ret = -ESRCH; | 2158 | ret = -ESRCH; |
1980 | goto out_unlock; | 2159 | goto out_unlock; |
1981 | } | 2160 | } |
@@ -1985,10 +2164,10 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) | |||
1985 | if (svc != NULL) | 2164 | if (svc != NULL) |
1986 | ret = -EEXIST; | 2165 | ret = -EEXIST; |
1987 | else | 2166 | else |
1988 | ret = ip_vs_add_service(usvc, &svc); | 2167 | ret = ip_vs_add_service(&usvc, &svc); |
1989 | break; | 2168 | break; |
1990 | case IP_VS_SO_SET_EDIT: | 2169 | case IP_VS_SO_SET_EDIT: |
1991 | ret = ip_vs_edit_service(svc, usvc); | 2170 | ret = ip_vs_edit_service(svc, &usvc); |
1992 | break; | 2171 | break; |
1993 | case IP_VS_SO_SET_DEL: | 2172 | case IP_VS_SO_SET_DEL: |
1994 | ret = ip_vs_del_service(svc); | 2173 | ret = ip_vs_del_service(svc); |
@@ -1999,13 +2178,13 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) | |||
1999 | ret = ip_vs_zero_service(svc); | 2178 | ret = ip_vs_zero_service(svc); |
2000 | break; | 2179 | break; |
2001 | case IP_VS_SO_SET_ADDDEST: | 2180 | case IP_VS_SO_SET_ADDDEST: |
2002 | ret = ip_vs_add_dest(svc, udest); | 2181 | ret = ip_vs_add_dest(svc, &udest); |
2003 | break; | 2182 | break; |
2004 | case IP_VS_SO_SET_EDITDEST: | 2183 | case IP_VS_SO_SET_EDITDEST: |
2005 | ret = ip_vs_edit_dest(svc, udest); | 2184 | ret = ip_vs_edit_dest(svc, &udest); |
2006 | break; | 2185 | break; |
2007 | case IP_VS_SO_SET_DELDEST: | 2186 | case IP_VS_SO_SET_DELDEST: |
2008 | ret = ip_vs_del_dest(svc, udest); | 2187 | ret = ip_vs_del_dest(svc, &udest); |
2009 | break; | 2188 | break; |
2010 | default: | 2189 | default: |
2011 | ret = -EINVAL; | 2190 | ret = -EINVAL; |
@@ -2028,7 +2207,7 @@ static void | |||
2028 | ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src) | 2207 | ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src) |
2029 | { | 2208 | { |
2030 | spin_lock_bh(&src->lock); | 2209 | spin_lock_bh(&src->lock); |
2031 | memcpy(dst, src, (char*)&src->lock - (char*)src); | 2210 | memcpy(dst, &src->ustats, sizeof(*dst)); |
2032 | spin_unlock_bh(&src->lock); | 2211 | spin_unlock_bh(&src->lock); |
2033 | } | 2212 | } |
2034 | 2213 | ||
@@ -2036,7 +2215,7 @@ static void | |||
2036 | ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src) | 2215 | ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src) |
2037 | { | 2216 | { |
2038 | dst->protocol = src->protocol; | 2217 | dst->protocol = src->protocol; |
2039 | dst->addr = src->addr; | 2218 | dst->addr = src->addr.ip; |
2040 | dst->port = src->port; | 2219 | dst->port = src->port; |
2041 | dst->fwmark = src->fwmark; | 2220 | dst->fwmark = src->fwmark; |
2042 | strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name)); | 2221 | strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name)); |
@@ -2058,6 +2237,10 @@ __ip_vs_get_service_entries(const struct ip_vs_get_services *get, | |||
2058 | 2237 | ||
2059 | for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { | 2238 | for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { |
2060 | list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { | 2239 | list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { |
2240 | /* Only expose IPv4 entries to old interface */ | ||
2241 | if (svc->af != AF_INET) | ||
2242 | continue; | ||
2243 | |||
2061 | if (count >= get->num_services) | 2244 | if (count >= get->num_services) |
2062 | goto out; | 2245 | goto out; |
2063 | memset(&entry, 0, sizeof(entry)); | 2246 | memset(&entry, 0, sizeof(entry)); |
@@ -2073,6 +2256,10 @@ __ip_vs_get_service_entries(const struct ip_vs_get_services *get, | |||
2073 | 2256 | ||
2074 | for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { | 2257 | for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { |
2075 | list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { | 2258 | list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { |
2259 | /* Only expose IPv4 entries to old interface */ | ||
2260 | if (svc->af != AF_INET) | ||
2261 | continue; | ||
2262 | |||
2076 | if (count >= get->num_services) | 2263 | if (count >= get->num_services) |
2077 | goto out; | 2264 | goto out; |
2078 | memset(&entry, 0, sizeof(entry)); | 2265 | memset(&entry, 0, sizeof(entry)); |
@@ -2094,13 +2281,15 @@ __ip_vs_get_dest_entries(const struct ip_vs_get_dests *get, | |||
2094 | struct ip_vs_get_dests __user *uptr) | 2281 | struct ip_vs_get_dests __user *uptr) |
2095 | { | 2282 | { |
2096 | struct ip_vs_service *svc; | 2283 | struct ip_vs_service *svc; |
2284 | union nf_inet_addr addr = { .ip = get->addr }; | ||
2097 | int ret = 0; | 2285 | int ret = 0; |
2098 | 2286 | ||
2099 | if (get->fwmark) | 2287 | if (get->fwmark) |
2100 | svc = __ip_vs_svc_fwm_get(get->fwmark); | 2288 | svc = __ip_vs_svc_fwm_get(AF_INET, get->fwmark); |
2101 | else | 2289 | else |
2102 | svc = __ip_vs_service_get(get->protocol, | 2290 | svc = __ip_vs_service_get(AF_INET, get->protocol, &addr, |
2103 | get->addr, get->port); | 2291 | get->port); |
2292 | |||
2104 | if (svc) { | 2293 | if (svc) { |
2105 | int count = 0; | 2294 | int count = 0; |
2106 | struct ip_vs_dest *dest; | 2295 | struct ip_vs_dest *dest; |
@@ -2110,7 +2299,7 @@ __ip_vs_get_dest_entries(const struct ip_vs_get_dests *get, | |||
2110 | if (count >= get->num_dests) | 2299 | if (count >= get->num_dests) |
2111 | break; | 2300 | break; |
2112 | 2301 | ||
2113 | entry.addr = dest->addr; | 2302 | entry.addr = dest->addr.ip; |
2114 | entry.port = dest->port; | 2303 | entry.port = dest->port; |
2115 | entry.conn_flags = atomic_read(&dest->conn_flags); | 2304 | entry.conn_flags = atomic_read(&dest->conn_flags); |
2116 | entry.weight = atomic_read(&dest->weight); | 2305 | entry.weight = atomic_read(&dest->weight); |
@@ -2235,13 +2424,15 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) | |||
2235 | { | 2424 | { |
2236 | struct ip_vs_service_entry *entry; | 2425 | struct ip_vs_service_entry *entry; |
2237 | struct ip_vs_service *svc; | 2426 | struct ip_vs_service *svc; |
2427 | union nf_inet_addr addr; | ||
2238 | 2428 | ||
2239 | entry = (struct ip_vs_service_entry *)arg; | 2429 | entry = (struct ip_vs_service_entry *)arg; |
2430 | addr.ip = entry->addr; | ||
2240 | if (entry->fwmark) | 2431 | if (entry->fwmark) |
2241 | svc = __ip_vs_svc_fwm_get(entry->fwmark); | 2432 | svc = __ip_vs_svc_fwm_get(AF_INET, entry->fwmark); |
2242 | else | 2433 | else |
2243 | svc = __ip_vs_service_get(entry->protocol, | 2434 | svc = __ip_vs_service_get(AF_INET, entry->protocol, |
2244 | entry->addr, entry->port); | 2435 | &addr, entry->port); |
2245 | if (svc) { | 2436 | if (svc) { |
2246 | ip_vs_copy_service(entry, svc); | 2437 | ip_vs_copy_service(entry, svc); |
2247 | if (copy_to_user(user, entry, sizeof(*entry)) != 0) | 2438 | if (copy_to_user(user, entry, sizeof(*entry)) != 0) |
@@ -2320,6 +2511,875 @@ static struct nf_sockopt_ops ip_vs_sockopts = { | |||
2320 | .owner = THIS_MODULE, | 2511 | .owner = THIS_MODULE, |
2321 | }; | 2512 | }; |
2322 | 2513 | ||
2514 | /* | ||
2515 | * Generic Netlink interface | ||
2516 | */ | ||
2517 | |||
2518 | /* IPVS genetlink family */ | ||
2519 | static struct genl_family ip_vs_genl_family = { | ||
2520 | .id = GENL_ID_GENERATE, | ||
2521 | .hdrsize = 0, | ||
2522 | .name = IPVS_GENL_NAME, | ||
2523 | .version = IPVS_GENL_VERSION, | ||
2524 | .maxattr = IPVS_CMD_MAX, | ||
2525 | }; | ||
2526 | |||
2527 | /* Policy used for first-level command attributes */ | ||
2528 | static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = { | ||
2529 | [IPVS_CMD_ATTR_SERVICE] = { .type = NLA_NESTED }, | ||
2530 | [IPVS_CMD_ATTR_DEST] = { .type = NLA_NESTED }, | ||
2531 | [IPVS_CMD_ATTR_DAEMON] = { .type = NLA_NESTED }, | ||
2532 | [IPVS_CMD_ATTR_TIMEOUT_TCP] = { .type = NLA_U32 }, | ||
2533 | [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 }, | ||
2534 | [IPVS_CMD_ATTR_TIMEOUT_UDP] = { .type = NLA_U32 }, | ||
2535 | }; | ||
2536 | |||
2537 | /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */ | ||
2538 | static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = { | ||
2539 | [IPVS_DAEMON_ATTR_STATE] = { .type = NLA_U32 }, | ||
2540 | [IPVS_DAEMON_ATTR_MCAST_IFN] = { .type = NLA_NUL_STRING, | ||
2541 | .len = IP_VS_IFNAME_MAXLEN }, | ||
2542 | [IPVS_DAEMON_ATTR_SYNC_ID] = { .type = NLA_U32 }, | ||
2543 | }; | ||
2544 | |||
2545 | /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */ | ||
2546 | static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = { | ||
2547 | [IPVS_SVC_ATTR_AF] = { .type = NLA_U16 }, | ||
2548 | [IPVS_SVC_ATTR_PROTOCOL] = { .type = NLA_U16 }, | ||
2549 | [IPVS_SVC_ATTR_ADDR] = { .type = NLA_BINARY, | ||
2550 | .len = sizeof(union nf_inet_addr) }, | ||
2551 | [IPVS_SVC_ATTR_PORT] = { .type = NLA_U16 }, | ||
2552 | [IPVS_SVC_ATTR_FWMARK] = { .type = NLA_U32 }, | ||
2553 | [IPVS_SVC_ATTR_SCHED_NAME] = { .type = NLA_NUL_STRING, | ||
2554 | .len = IP_VS_SCHEDNAME_MAXLEN }, | ||
2555 | [IPVS_SVC_ATTR_FLAGS] = { .type = NLA_BINARY, | ||
2556 | .len = sizeof(struct ip_vs_flags) }, | ||
2557 | [IPVS_SVC_ATTR_TIMEOUT] = { .type = NLA_U32 }, | ||
2558 | [IPVS_SVC_ATTR_NETMASK] = { .type = NLA_U32 }, | ||
2559 | [IPVS_SVC_ATTR_STATS] = { .type = NLA_NESTED }, | ||
2560 | }; | ||
2561 | |||
2562 | /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */ | ||
2563 | static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = { | ||
2564 | [IPVS_DEST_ATTR_ADDR] = { .type = NLA_BINARY, | ||
2565 | .len = sizeof(union nf_inet_addr) }, | ||
2566 | [IPVS_DEST_ATTR_PORT] = { .type = NLA_U16 }, | ||
2567 | [IPVS_DEST_ATTR_FWD_METHOD] = { .type = NLA_U32 }, | ||
2568 | [IPVS_DEST_ATTR_WEIGHT] = { .type = NLA_U32 }, | ||
2569 | [IPVS_DEST_ATTR_U_THRESH] = { .type = NLA_U32 }, | ||
2570 | [IPVS_DEST_ATTR_L_THRESH] = { .type = NLA_U32 }, | ||
2571 | [IPVS_DEST_ATTR_ACTIVE_CONNS] = { .type = NLA_U32 }, | ||
2572 | [IPVS_DEST_ATTR_INACT_CONNS] = { .type = NLA_U32 }, | ||
2573 | [IPVS_DEST_ATTR_PERSIST_CONNS] = { .type = NLA_U32 }, | ||
2574 | [IPVS_DEST_ATTR_STATS] = { .type = NLA_NESTED }, | ||
2575 | }; | ||
2576 | |||
2577 | static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type, | ||
2578 | struct ip_vs_stats *stats) | ||
2579 | { | ||
2580 | struct nlattr *nl_stats = nla_nest_start(skb, container_type); | ||
2581 | if (!nl_stats) | ||
2582 | return -EMSGSIZE; | ||
2583 | |||
2584 | spin_lock_bh(&stats->lock); | ||
2585 | |||
2586 | NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->ustats.conns); | ||
2587 | NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->ustats.inpkts); | ||
2588 | NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->ustats.outpkts); | ||
2589 | NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->ustats.inbytes); | ||
2590 | NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->ustats.outbytes); | ||
2591 | NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->ustats.cps); | ||
2592 | NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->ustats.inpps); | ||
2593 | NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->ustats.outpps); | ||
2594 | NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->ustats.inbps); | ||
2595 | NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->ustats.outbps); | ||
2596 | |||
2597 | spin_unlock_bh(&stats->lock); | ||
2598 | |||
2599 | nla_nest_end(skb, nl_stats); | ||
2600 | |||
2601 | return 0; | ||
2602 | |||
2603 | nla_put_failure: | ||
2604 | spin_unlock_bh(&stats->lock); | ||
2605 | nla_nest_cancel(skb, nl_stats); | ||
2606 | return -EMSGSIZE; | ||
2607 | } | ||
2608 | |||
2609 | static int ip_vs_genl_fill_service(struct sk_buff *skb, | ||
2610 | struct ip_vs_service *svc) | ||
2611 | { | ||
2612 | struct nlattr *nl_service; | ||
2613 | struct ip_vs_flags flags = { .flags = svc->flags, | ||
2614 | .mask = ~0 }; | ||
2615 | |||
2616 | nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE); | ||
2617 | if (!nl_service) | ||
2618 | return -EMSGSIZE; | ||
2619 | |||
2620 | NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, svc->af); | ||
2621 | |||
2622 | if (svc->fwmark) { | ||
2623 | NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark); | ||
2624 | } else { | ||
2625 | NLA_PUT_U16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol); | ||
2626 | NLA_PUT(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr); | ||
2627 | NLA_PUT_U16(skb, IPVS_SVC_ATTR_PORT, svc->port); | ||
2628 | } | ||
2629 | |||
2630 | NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name); | ||
2631 | NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags); | ||
2632 | NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ); | ||
2633 | NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask); | ||
2634 | |||
2635 | if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats)) | ||
2636 | goto nla_put_failure; | ||
2637 | |||
2638 | nla_nest_end(skb, nl_service); | ||
2639 | |||
2640 | return 0; | ||
2641 | |||
2642 | nla_put_failure: | ||
2643 | nla_nest_cancel(skb, nl_service); | ||
2644 | return -EMSGSIZE; | ||
2645 | } | ||
2646 | |||
2647 | static int ip_vs_genl_dump_service(struct sk_buff *skb, | ||
2648 | struct ip_vs_service *svc, | ||
2649 | struct netlink_callback *cb) | ||
2650 | { | ||
2651 | void *hdr; | ||
2652 | |||
2653 | hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, | ||
2654 | &ip_vs_genl_family, NLM_F_MULTI, | ||
2655 | IPVS_CMD_NEW_SERVICE); | ||
2656 | if (!hdr) | ||
2657 | return -EMSGSIZE; | ||
2658 | |||
2659 | if (ip_vs_genl_fill_service(skb, svc) < 0) | ||
2660 | goto nla_put_failure; | ||
2661 | |||
2662 | return genlmsg_end(skb, hdr); | ||
2663 | |||
2664 | nla_put_failure: | ||
2665 | genlmsg_cancel(skb, hdr); | ||
2666 | return -EMSGSIZE; | ||
2667 | } | ||
2668 | |||
2669 | static int ip_vs_genl_dump_services(struct sk_buff *skb, | ||
2670 | struct netlink_callback *cb) | ||
2671 | { | ||
2672 | int idx = 0, i; | ||
2673 | int start = cb->args[0]; | ||
2674 | struct ip_vs_service *svc; | ||
2675 | |||
2676 | mutex_lock(&__ip_vs_mutex); | ||
2677 | for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) { | ||
2678 | list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) { | ||
2679 | if (++idx <= start) | ||
2680 | continue; | ||
2681 | if (ip_vs_genl_dump_service(skb, svc, cb) < 0) { | ||
2682 | idx--; | ||
2683 | goto nla_put_failure; | ||
2684 | } | ||
2685 | } | ||
2686 | } | ||
2687 | |||
2688 | for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) { | ||
2689 | list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) { | ||
2690 | if (++idx <= start) | ||
2691 | continue; | ||
2692 | if (ip_vs_genl_dump_service(skb, svc, cb) < 0) { | ||
2693 | idx--; | ||
2694 | goto nla_put_failure; | ||
2695 | } | ||
2696 | } | ||
2697 | } | ||
2698 | |||
2699 | nla_put_failure: | ||
2700 | mutex_unlock(&__ip_vs_mutex); | ||
2701 | cb->args[0] = idx; | ||
2702 | |||
2703 | return skb->len; | ||
2704 | } | ||
2705 | |||
2706 | static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc, | ||
2707 | struct nlattr *nla, int full_entry) | ||
2708 | { | ||
2709 | struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1]; | ||
2710 | struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr; | ||
2711 | |||
2712 | /* Parse mandatory identifying service fields first */ | ||
2713 | if (nla == NULL || | ||
2714 | nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy)) | ||
2715 | return -EINVAL; | ||
2716 | |||
2717 | nla_af = attrs[IPVS_SVC_ATTR_AF]; | ||
2718 | nla_protocol = attrs[IPVS_SVC_ATTR_PROTOCOL]; | ||
2719 | nla_addr = attrs[IPVS_SVC_ATTR_ADDR]; | ||
2720 | nla_port = attrs[IPVS_SVC_ATTR_PORT]; | ||
2721 | nla_fwmark = attrs[IPVS_SVC_ATTR_FWMARK]; | ||
2722 | |||
2723 | if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr)))) | ||
2724 | return -EINVAL; | ||
2725 | |||
2726 | usvc->af = nla_get_u16(nla_af); | ||
2727 | #ifdef CONFIG_IP_VS_IPV6 | ||
2728 | if (usvc->af != AF_INET && usvc->af != AF_INET6) | ||
2729 | #else | ||
2730 | if (usvc->af != AF_INET) | ||
2731 | #endif | ||
2732 | return -EAFNOSUPPORT; | ||
2733 | |||
2734 | if (nla_fwmark) { | ||
2735 | usvc->protocol = IPPROTO_TCP; | ||
2736 | usvc->fwmark = nla_get_u32(nla_fwmark); | ||
2737 | } else { | ||
2738 | usvc->protocol = nla_get_u16(nla_protocol); | ||
2739 | nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr)); | ||
2740 | usvc->port = nla_get_u16(nla_port); | ||
2741 | usvc->fwmark = 0; | ||
2742 | } | ||
2743 | |||
2744 | /* If a full entry was requested, check for the additional fields */ | ||
2745 | if (full_entry) { | ||
2746 | struct nlattr *nla_sched, *nla_flags, *nla_timeout, | ||
2747 | *nla_netmask; | ||
2748 | struct ip_vs_flags flags; | ||
2749 | struct ip_vs_service *svc; | ||
2750 | |||
2751 | nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME]; | ||
2752 | nla_flags = attrs[IPVS_SVC_ATTR_FLAGS]; | ||
2753 | nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT]; | ||
2754 | nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK]; | ||
2755 | |||
2756 | if (!(nla_sched && nla_flags && nla_timeout && nla_netmask)) | ||
2757 | return -EINVAL; | ||
2758 | |||
2759 | nla_memcpy(&flags, nla_flags, sizeof(flags)); | ||
2760 | |||
2761 | /* prefill flags from service if it already exists */ | ||
2762 | if (usvc->fwmark) | ||
2763 | svc = __ip_vs_svc_fwm_get(usvc->af, usvc->fwmark); | ||
2764 | else | ||
2765 | svc = __ip_vs_service_get(usvc->af, usvc->protocol, | ||
2766 | &usvc->addr, usvc->port); | ||
2767 | if (svc) { | ||
2768 | usvc->flags = svc->flags; | ||
2769 | ip_vs_service_put(svc); | ||
2770 | } else | ||
2771 | usvc->flags = 0; | ||
2772 | |||
2773 | /* set new flags from userland */ | ||
2774 | usvc->flags = (usvc->flags & ~flags.mask) | | ||
2775 | (flags.flags & flags.mask); | ||
2776 | usvc->sched_name = nla_data(nla_sched); | ||
2777 | usvc->timeout = nla_get_u32(nla_timeout); | ||
2778 | usvc->netmask = nla_get_u32(nla_netmask); | ||
2779 | } | ||
2780 | |||
2781 | return 0; | ||
2782 | } | ||
2783 | |||
2784 | static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla) | ||
2785 | { | ||
2786 | struct ip_vs_service_user_kern usvc; | ||
2787 | int ret; | ||
2788 | |||
2789 | ret = ip_vs_genl_parse_service(&usvc, nla, 0); | ||
2790 | if (ret) | ||
2791 | return ERR_PTR(ret); | ||
2792 | |||
2793 | if (usvc.fwmark) | ||
2794 | return __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark); | ||
2795 | else | ||
2796 | return __ip_vs_service_get(usvc.af, usvc.protocol, | ||
2797 | &usvc.addr, usvc.port); | ||
2798 | } | ||
2799 | |||
2800 | static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest) | ||
2801 | { | ||
2802 | struct nlattr *nl_dest; | ||
2803 | |||
2804 | nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST); | ||
2805 | if (!nl_dest) | ||
2806 | return -EMSGSIZE; | ||
2807 | |||
2808 | NLA_PUT(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr); | ||
2809 | NLA_PUT_U16(skb, IPVS_DEST_ATTR_PORT, dest->port); | ||
2810 | |||
2811 | NLA_PUT_U32(skb, IPVS_DEST_ATTR_FWD_METHOD, | ||
2812 | atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK); | ||
2813 | NLA_PUT_U32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight)); | ||
2814 | NLA_PUT_U32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold); | ||
2815 | NLA_PUT_U32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold); | ||
2816 | NLA_PUT_U32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS, | ||
2817 | atomic_read(&dest->activeconns)); | ||
2818 | NLA_PUT_U32(skb, IPVS_DEST_ATTR_INACT_CONNS, | ||
2819 | atomic_read(&dest->inactconns)); | ||
2820 | NLA_PUT_U32(skb, IPVS_DEST_ATTR_PERSIST_CONNS, | ||
2821 | atomic_read(&dest->persistconns)); | ||
2822 | |||
2823 | if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats)) | ||
2824 | goto nla_put_failure; | ||
2825 | |||
2826 | nla_nest_end(skb, nl_dest); | ||
2827 | |||
2828 | return 0; | ||
2829 | |||
2830 | nla_put_failure: | ||
2831 | nla_nest_cancel(skb, nl_dest); | ||
2832 | return -EMSGSIZE; | ||
2833 | } | ||
2834 | |||
2835 | static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest, | ||
2836 | struct netlink_callback *cb) | ||
2837 | { | ||
2838 | void *hdr; | ||
2839 | |||
2840 | hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, | ||
2841 | &ip_vs_genl_family, NLM_F_MULTI, | ||
2842 | IPVS_CMD_NEW_DEST); | ||
2843 | if (!hdr) | ||
2844 | return -EMSGSIZE; | ||
2845 | |||
2846 | if (ip_vs_genl_fill_dest(skb, dest) < 0) | ||
2847 | goto nla_put_failure; | ||
2848 | |||
2849 | return genlmsg_end(skb, hdr); | ||
2850 | |||
2851 | nla_put_failure: | ||
2852 | genlmsg_cancel(skb, hdr); | ||
2853 | return -EMSGSIZE; | ||
2854 | } | ||
2855 | |||
2856 | static int ip_vs_genl_dump_dests(struct sk_buff *skb, | ||
2857 | struct netlink_callback *cb) | ||
2858 | { | ||
2859 | int idx = 0; | ||
2860 | int start = cb->args[0]; | ||
2861 | struct ip_vs_service *svc; | ||
2862 | struct ip_vs_dest *dest; | ||
2863 | struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1]; | ||
2864 | |||
2865 | mutex_lock(&__ip_vs_mutex); | ||
2866 | |||
2867 | /* Try to find the service for which to dump destinations */ | ||
2868 | if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs, | ||
2869 | IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy)) | ||
2870 | goto out_err; | ||
2871 | |||
2872 | svc = ip_vs_genl_find_service(attrs[IPVS_CMD_ATTR_SERVICE]); | ||
2873 | if (IS_ERR(svc) || svc == NULL) | ||
2874 | goto out_err; | ||
2875 | |||
2876 | /* Dump the destinations */ | ||
2877 | list_for_each_entry(dest, &svc->destinations, n_list) { | ||
2878 | if (++idx <= start) | ||
2879 | continue; | ||
2880 | if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) { | ||
2881 | idx--; | ||
2882 | goto nla_put_failure; | ||
2883 | } | ||
2884 | } | ||
2885 | |||
2886 | nla_put_failure: | ||
2887 | cb->args[0] = idx; | ||
2888 | ip_vs_service_put(svc); | ||
2889 | |||
2890 | out_err: | ||
2891 | mutex_unlock(&__ip_vs_mutex); | ||
2892 | |||
2893 | return skb->len; | ||
2894 | } | ||
2895 | |||
2896 | static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest, | ||
2897 | struct nlattr *nla, int full_entry) | ||
2898 | { | ||
2899 | struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1]; | ||
2900 | struct nlattr *nla_addr, *nla_port; | ||
2901 | |||
2902 | /* Parse mandatory identifying destination fields first */ | ||
2903 | if (nla == NULL || | ||
2904 | nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy)) | ||
2905 | return -EINVAL; | ||
2906 | |||
2907 | nla_addr = attrs[IPVS_DEST_ATTR_ADDR]; | ||
2908 | nla_port = attrs[IPVS_DEST_ATTR_PORT]; | ||
2909 | |||
2910 | if (!(nla_addr && nla_port)) | ||
2911 | return -EINVAL; | ||
2912 | |||
2913 | nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr)); | ||
2914 | udest->port = nla_get_u16(nla_port); | ||
2915 | |||
2916 | /* If a full entry was requested, check for the additional fields */ | ||
2917 | if (full_entry) { | ||
2918 | struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh, | ||
2919 | *nla_l_thresh; | ||
2920 | |||
2921 | nla_fwd = attrs[IPVS_DEST_ATTR_FWD_METHOD]; | ||
2922 | nla_weight = attrs[IPVS_DEST_ATTR_WEIGHT]; | ||
2923 | nla_u_thresh = attrs[IPVS_DEST_ATTR_U_THRESH]; | ||
2924 | nla_l_thresh = attrs[IPVS_DEST_ATTR_L_THRESH]; | ||
2925 | |||
2926 | if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh)) | ||
2927 | return -EINVAL; | ||
2928 | |||
2929 | udest->conn_flags = nla_get_u32(nla_fwd) | ||
2930 | & IP_VS_CONN_F_FWD_MASK; | ||
2931 | udest->weight = nla_get_u32(nla_weight); | ||
2932 | udest->u_threshold = nla_get_u32(nla_u_thresh); | ||
2933 | udest->l_threshold = nla_get_u32(nla_l_thresh); | ||
2934 | } | ||
2935 | |||
2936 | return 0; | ||
2937 | } | ||
2938 | |||
2939 | static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state, | ||
2940 | const char *mcast_ifn, __be32 syncid) | ||
2941 | { | ||
2942 | struct nlattr *nl_daemon; | ||
2943 | |||
2944 | nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON); | ||
2945 | if (!nl_daemon) | ||
2946 | return -EMSGSIZE; | ||
2947 | |||
2948 | NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_STATE, state); | ||
2949 | NLA_PUT_STRING(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn); | ||
2950 | NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid); | ||
2951 | |||
2952 | nla_nest_end(skb, nl_daemon); | ||
2953 | |||
2954 | return 0; | ||
2955 | |||
2956 | nla_put_failure: | ||
2957 | nla_nest_cancel(skb, nl_daemon); | ||
2958 | return -EMSGSIZE; | ||
2959 | } | ||
2960 | |||
2961 | static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state, | ||
2962 | const char *mcast_ifn, __be32 syncid, | ||
2963 | struct netlink_callback *cb) | ||
2964 | { | ||
2965 | void *hdr; | ||
2966 | hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, | ||
2967 | &ip_vs_genl_family, NLM_F_MULTI, | ||
2968 | IPVS_CMD_NEW_DAEMON); | ||
2969 | if (!hdr) | ||
2970 | return -EMSGSIZE; | ||
2971 | |||
2972 | if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid)) | ||
2973 | goto nla_put_failure; | ||
2974 | |||
2975 | return genlmsg_end(skb, hdr); | ||
2976 | |||
2977 | nla_put_failure: | ||
2978 | genlmsg_cancel(skb, hdr); | ||
2979 | return -EMSGSIZE; | ||
2980 | } | ||
2981 | |||
2982 | static int ip_vs_genl_dump_daemons(struct sk_buff *skb, | ||
2983 | struct netlink_callback *cb) | ||
2984 | { | ||
2985 | mutex_lock(&__ip_vs_mutex); | ||
2986 | if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) { | ||
2987 | if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER, | ||
2988 | ip_vs_master_mcast_ifn, | ||
2989 | ip_vs_master_syncid, cb) < 0) | ||
2990 | goto nla_put_failure; | ||
2991 | |||
2992 | cb->args[0] = 1; | ||
2993 | } | ||
2994 | |||
2995 | if ((ip_vs_sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) { | ||
2996 | if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP, | ||
2997 | ip_vs_backup_mcast_ifn, | ||
2998 | ip_vs_backup_syncid, cb) < 0) | ||
2999 | goto nla_put_failure; | ||
3000 | |||
3001 | cb->args[1] = 1; | ||
3002 | } | ||
3003 | |||
3004 | nla_put_failure: | ||
3005 | mutex_unlock(&__ip_vs_mutex); | ||
3006 | |||
3007 | return skb->len; | ||
3008 | } | ||
3009 | |||
3010 | static int ip_vs_genl_new_daemon(struct nlattr **attrs) | ||
3011 | { | ||
3012 | if (!(attrs[IPVS_DAEMON_ATTR_STATE] && | ||
3013 | attrs[IPVS_DAEMON_ATTR_MCAST_IFN] && | ||
3014 | attrs[IPVS_DAEMON_ATTR_SYNC_ID])) | ||
3015 | return -EINVAL; | ||
3016 | |||
3017 | return start_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]), | ||
3018 | nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]), | ||
3019 | nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID])); | ||
3020 | } | ||
3021 | |||
3022 | static int ip_vs_genl_del_daemon(struct nlattr **attrs) | ||
3023 | { | ||
3024 | if (!attrs[IPVS_DAEMON_ATTR_STATE]) | ||
3025 | return -EINVAL; | ||
3026 | |||
3027 | return stop_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE])); | ||
3028 | } | ||
3029 | |||
3030 | static int ip_vs_genl_set_config(struct nlattr **attrs) | ||
3031 | { | ||
3032 | struct ip_vs_timeout_user t; | ||
3033 | |||
3034 | __ip_vs_get_timeouts(&t); | ||
3035 | |||
3036 | if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]) | ||
3037 | t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]); | ||
3038 | |||
3039 | if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]) | ||
3040 | t.tcp_fin_timeout = | ||
3041 | nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]); | ||
3042 | |||
3043 | if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]) | ||
3044 | t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]); | ||
3045 | |||
3046 | return ip_vs_set_timeout(&t); | ||
3047 | } | ||
3048 | |||
3049 | static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) | ||
3050 | { | ||
3051 | struct ip_vs_service *svc = NULL; | ||
3052 | struct ip_vs_service_user_kern usvc; | ||
3053 | struct ip_vs_dest_user_kern udest; | ||
3054 | int ret = 0, cmd; | ||
3055 | int need_full_svc = 0, need_full_dest = 0; | ||
3056 | |||
3057 | cmd = info->genlhdr->cmd; | ||
3058 | |||
3059 | mutex_lock(&__ip_vs_mutex); | ||
3060 | |||
3061 | if (cmd == IPVS_CMD_FLUSH) { | ||
3062 | ret = ip_vs_flush(); | ||
3063 | goto out; | ||
3064 | } else if (cmd == IPVS_CMD_SET_CONFIG) { | ||
3065 | ret = ip_vs_genl_set_config(info->attrs); | ||
3066 | goto out; | ||
3067 | } else if (cmd == IPVS_CMD_NEW_DAEMON || | ||
3068 | cmd == IPVS_CMD_DEL_DAEMON) { | ||
3069 | |||
3070 | struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1]; | ||
3071 | |||
3072 | if (!info->attrs[IPVS_CMD_ATTR_DAEMON] || | ||
3073 | nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX, | ||
3074 | info->attrs[IPVS_CMD_ATTR_DAEMON], | ||
3075 | ip_vs_daemon_policy)) { | ||
3076 | ret = -EINVAL; | ||
3077 | goto out; | ||
3078 | } | ||
3079 | |||
3080 | if (cmd == IPVS_CMD_NEW_DAEMON) | ||
3081 | ret = ip_vs_genl_new_daemon(daemon_attrs); | ||
3082 | else | ||
3083 | ret = ip_vs_genl_del_daemon(daemon_attrs); | ||
3084 | goto out; | ||
3085 | } else if (cmd == IPVS_CMD_ZERO && | ||
3086 | !info->attrs[IPVS_CMD_ATTR_SERVICE]) { | ||
3087 | ret = ip_vs_zero_all(); | ||
3088 | goto out; | ||
3089 | } | ||
3090 | |||
3091 | /* All following commands require a service argument, so check if we | ||
3092 | * received a valid one. We need a full service specification when | ||
3093 | * adding / editing a service. Only identifying members otherwise. */ | ||
3094 | if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE) | ||
3095 | need_full_svc = 1; | ||
3096 | |||
3097 | ret = ip_vs_genl_parse_service(&usvc, | ||
3098 | info->attrs[IPVS_CMD_ATTR_SERVICE], | ||
3099 | need_full_svc); | ||
3100 | if (ret) | ||
3101 | goto out; | ||
3102 | |||
3103 | /* Lookup the exact service by <protocol, addr, port> or fwmark */ | ||
3104 | if (usvc.fwmark == 0) | ||
3105 | svc = __ip_vs_service_get(usvc.af, usvc.protocol, | ||
3106 | &usvc.addr, usvc.port); | ||
3107 | else | ||
3108 | svc = __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark); | ||
3109 | |||
3110 | /* Unless we're adding a new service, the service must already exist */ | ||
3111 | if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) { | ||
3112 | ret = -ESRCH; | ||
3113 | goto out; | ||
3114 | } | ||
3115 | |||
3116 | /* Destination commands require a valid destination argument. For | ||
3117 | * adding / editing a destination, we need a full destination | ||
3118 | * specification. */ | ||
3119 | if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST || | ||
3120 | cmd == IPVS_CMD_DEL_DEST) { | ||
3121 | if (cmd != IPVS_CMD_DEL_DEST) | ||
3122 | need_full_dest = 1; | ||
3123 | |||
3124 | ret = ip_vs_genl_parse_dest(&udest, | ||
3125 | info->attrs[IPVS_CMD_ATTR_DEST], | ||
3126 | need_full_dest); | ||
3127 | if (ret) | ||
3128 | goto out; | ||
3129 | } | ||
3130 | |||
3131 | switch (cmd) { | ||
3132 | case IPVS_CMD_NEW_SERVICE: | ||
3133 | if (svc == NULL) | ||
3134 | ret = ip_vs_add_service(&usvc, &svc); | ||
3135 | else | ||
3136 | ret = -EEXIST; | ||
3137 | break; | ||
3138 | case IPVS_CMD_SET_SERVICE: | ||
3139 | ret = ip_vs_edit_service(svc, &usvc); | ||
3140 | break; | ||
3141 | case IPVS_CMD_DEL_SERVICE: | ||
3142 | ret = ip_vs_del_service(svc); | ||
3143 | break; | ||
3144 | case IPVS_CMD_NEW_DEST: | ||
3145 | ret = ip_vs_add_dest(svc, &udest); | ||
3146 | break; | ||
3147 | case IPVS_CMD_SET_DEST: | ||
3148 | ret = ip_vs_edit_dest(svc, &udest); | ||
3149 | break; | ||
3150 | case IPVS_CMD_DEL_DEST: | ||
3151 | ret = ip_vs_del_dest(svc, &udest); | ||
3152 | break; | ||
3153 | case IPVS_CMD_ZERO: | ||
3154 | ret = ip_vs_zero_service(svc); | ||
3155 | break; | ||
3156 | default: | ||
3157 | ret = -EINVAL; | ||
3158 | } | ||
3159 | |||
3160 | out: | ||
3161 | if (svc) | ||
3162 | ip_vs_service_put(svc); | ||
3163 | mutex_unlock(&__ip_vs_mutex); | ||
3164 | |||
3165 | return ret; | ||
3166 | } | ||
3167 | |||
3168 | static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info) | ||
3169 | { | ||
3170 | struct sk_buff *msg; | ||
3171 | void *reply; | ||
3172 | int ret, cmd, reply_cmd; | ||
3173 | |||
3174 | cmd = info->genlhdr->cmd; | ||
3175 | |||
3176 | if (cmd == IPVS_CMD_GET_SERVICE) | ||
3177 | reply_cmd = IPVS_CMD_NEW_SERVICE; | ||
3178 | else if (cmd == IPVS_CMD_GET_INFO) | ||
3179 | reply_cmd = IPVS_CMD_SET_INFO; | ||
3180 | else if (cmd == IPVS_CMD_GET_CONFIG) | ||
3181 | reply_cmd = IPVS_CMD_SET_CONFIG; | ||
3182 | else { | ||
3183 | IP_VS_ERR("unknown Generic Netlink command\n"); | ||
3184 | return -EINVAL; | ||
3185 | } | ||
3186 | |||
3187 | msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); | ||
3188 | if (!msg) | ||
3189 | return -ENOMEM; | ||
3190 | |||
3191 | mutex_lock(&__ip_vs_mutex); | ||
3192 | |||
3193 | reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd); | ||
3194 | if (reply == NULL) | ||
3195 | goto nla_put_failure; | ||
3196 | |||
3197 | switch (cmd) { | ||
3198 | case IPVS_CMD_GET_SERVICE: | ||
3199 | { | ||
3200 | struct ip_vs_service *svc; | ||
3201 | |||
3202 | svc = ip_vs_genl_find_service(info->attrs[IPVS_CMD_ATTR_SERVICE]); | ||
3203 | if (IS_ERR(svc)) { | ||
3204 | ret = PTR_ERR(svc); | ||
3205 | goto out_err; | ||
3206 | } else if (svc) { | ||
3207 | ret = ip_vs_genl_fill_service(msg, svc); | ||
3208 | ip_vs_service_put(svc); | ||
3209 | if (ret) | ||
3210 | goto nla_put_failure; | ||
3211 | } else { | ||
3212 | ret = -ESRCH; | ||
3213 | goto out_err; | ||
3214 | } | ||
3215 | |||
3216 | break; | ||
3217 | } | ||
3218 | |||
3219 | case IPVS_CMD_GET_CONFIG: | ||
3220 | { | ||
3221 | struct ip_vs_timeout_user t; | ||
3222 | |||
3223 | __ip_vs_get_timeouts(&t); | ||
3224 | #ifdef CONFIG_IP_VS_PROTO_TCP | ||
3225 | NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout); | ||
3226 | NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN, | ||
3227 | t.tcp_fin_timeout); | ||
3228 | #endif | ||
3229 | #ifdef CONFIG_IP_VS_PROTO_UDP | ||
3230 | NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout); | ||
3231 | #endif | ||
3232 | |||
3233 | break; | ||
3234 | } | ||
3235 | |||
3236 | case IPVS_CMD_GET_INFO: | ||
3237 | NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE); | ||
3238 | NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE, | ||
3239 | IP_VS_CONN_TAB_SIZE); | ||
3240 | break; | ||
3241 | } | ||
3242 | |||
3243 | genlmsg_end(msg, reply); | ||
3244 | ret = genlmsg_unicast(msg, info->snd_pid); | ||
3245 | goto out; | ||
3246 | |||
3247 | nla_put_failure: | ||
3248 | IP_VS_ERR("not enough space in Netlink message\n"); | ||
3249 | ret = -EMSGSIZE; | ||
3250 | |||
3251 | out_err: | ||
3252 | nlmsg_free(msg); | ||
3253 | out: | ||
3254 | mutex_unlock(&__ip_vs_mutex); | ||
3255 | |||
3256 | return ret; | ||
3257 | } | ||
3258 | |||
3259 | |||
3260 | static struct genl_ops ip_vs_genl_ops[] __read_mostly = { | ||
3261 | { | ||
3262 | .cmd = IPVS_CMD_NEW_SERVICE, | ||
3263 | .flags = GENL_ADMIN_PERM, | ||
3264 | .policy = ip_vs_cmd_policy, | ||
3265 | .doit = ip_vs_genl_set_cmd, | ||
3266 | }, | ||
3267 | { | ||
3268 | .cmd = IPVS_CMD_SET_SERVICE, | ||
3269 | .flags = GENL_ADMIN_PERM, | ||
3270 | .policy = ip_vs_cmd_policy, | ||
3271 | .doit = ip_vs_genl_set_cmd, | ||
3272 | }, | ||
3273 | { | ||
3274 | .cmd = IPVS_CMD_DEL_SERVICE, | ||
3275 | .flags = GENL_ADMIN_PERM, | ||
3276 | .policy = ip_vs_cmd_policy, | ||
3277 | .doit = ip_vs_genl_set_cmd, | ||
3278 | }, | ||
3279 | { | ||
3280 | .cmd = IPVS_CMD_GET_SERVICE, | ||
3281 | .flags = GENL_ADMIN_PERM, | ||
3282 | .doit = ip_vs_genl_get_cmd, | ||
3283 | .dumpit = ip_vs_genl_dump_services, | ||
3284 | .policy = ip_vs_cmd_policy, | ||
3285 | }, | ||
3286 | { | ||
3287 | .cmd = IPVS_CMD_NEW_DEST, | ||
3288 | .flags = GENL_ADMIN_PERM, | ||
3289 | .policy = ip_vs_cmd_policy, | ||
3290 | .doit = ip_vs_genl_set_cmd, | ||
3291 | }, | ||
3292 | { | ||
3293 | .cmd = IPVS_CMD_SET_DEST, | ||
3294 | .flags = GENL_ADMIN_PERM, | ||
3295 | .policy = ip_vs_cmd_policy, | ||
3296 | .doit = ip_vs_genl_set_cmd, | ||
3297 | }, | ||
3298 | { | ||
3299 | .cmd = IPVS_CMD_DEL_DEST, | ||
3300 | .flags = GENL_ADMIN_PERM, | ||
3301 | .policy = ip_vs_cmd_policy, | ||
3302 | .doit = ip_vs_genl_set_cmd, | ||
3303 | }, | ||
3304 | { | ||
3305 | .cmd = IPVS_CMD_GET_DEST, | ||
3306 | .flags = GENL_ADMIN_PERM, | ||
3307 | .policy = ip_vs_cmd_policy, | ||
3308 | .dumpit = ip_vs_genl_dump_dests, | ||
3309 | }, | ||
3310 | { | ||
3311 | .cmd = IPVS_CMD_NEW_DAEMON, | ||
3312 | .flags = GENL_ADMIN_PERM, | ||
3313 | .policy = ip_vs_cmd_policy, | ||
3314 | .doit = ip_vs_genl_set_cmd, | ||
3315 | }, | ||
3316 | { | ||
3317 | .cmd = IPVS_CMD_DEL_DAEMON, | ||
3318 | .flags = GENL_ADMIN_PERM, | ||
3319 | .policy = ip_vs_cmd_policy, | ||
3320 | .doit = ip_vs_genl_set_cmd, | ||
3321 | }, | ||
3322 | { | ||
3323 | .cmd = IPVS_CMD_GET_DAEMON, | ||
3324 | .flags = GENL_ADMIN_PERM, | ||
3325 | .dumpit = ip_vs_genl_dump_daemons, | ||
3326 | }, | ||
3327 | { | ||
3328 | .cmd = IPVS_CMD_SET_CONFIG, | ||
3329 | .flags = GENL_ADMIN_PERM, | ||
3330 | .policy = ip_vs_cmd_policy, | ||
3331 | .doit = ip_vs_genl_set_cmd, | ||
3332 | }, | ||
3333 | { | ||
3334 | .cmd = IPVS_CMD_GET_CONFIG, | ||
3335 | .flags = GENL_ADMIN_PERM, | ||
3336 | .doit = ip_vs_genl_get_cmd, | ||
3337 | }, | ||
3338 | { | ||
3339 | .cmd = IPVS_CMD_GET_INFO, | ||
3340 | .flags = GENL_ADMIN_PERM, | ||
3341 | .doit = ip_vs_genl_get_cmd, | ||
3342 | }, | ||
3343 | { | ||
3344 | .cmd = IPVS_CMD_ZERO, | ||
3345 | .flags = GENL_ADMIN_PERM, | ||
3346 | .policy = ip_vs_cmd_policy, | ||
3347 | .doit = ip_vs_genl_set_cmd, | ||
3348 | }, | ||
3349 | { | ||
3350 | .cmd = IPVS_CMD_FLUSH, | ||
3351 | .flags = GENL_ADMIN_PERM, | ||
3352 | .doit = ip_vs_genl_set_cmd, | ||
3353 | }, | ||
3354 | }; | ||
3355 | |||
3356 | static int __init ip_vs_genl_register(void) | ||
3357 | { | ||
3358 | int ret, i; | ||
3359 | |||
3360 | ret = genl_register_family(&ip_vs_genl_family); | ||
3361 | if (ret) | ||
3362 | return ret; | ||
3363 | |||
3364 | for (i = 0; i < ARRAY_SIZE(ip_vs_genl_ops); i++) { | ||
3365 | ret = genl_register_ops(&ip_vs_genl_family, &ip_vs_genl_ops[i]); | ||
3366 | if (ret) | ||
3367 | goto err_out; | ||
3368 | } | ||
3369 | return 0; | ||
3370 | |||
3371 | err_out: | ||
3372 | genl_unregister_family(&ip_vs_genl_family); | ||
3373 | return ret; | ||
3374 | } | ||
3375 | |||
3376 | static void ip_vs_genl_unregister(void) | ||
3377 | { | ||
3378 | genl_unregister_family(&ip_vs_genl_family); | ||
3379 | } | ||
3380 | |||
3381 | /* End of Generic Netlink interface definitions */ | ||
3382 | |||
2323 | 3383 | ||
2324 | int __init ip_vs_control_init(void) | 3384 | int __init ip_vs_control_init(void) |
2325 | { | 3385 | { |
@@ -2334,6 +3394,13 @@ int __init ip_vs_control_init(void) | |||
2334 | return ret; | 3394 | return ret; |
2335 | } | 3395 | } |
2336 | 3396 | ||
3397 | ret = ip_vs_genl_register(); | ||
3398 | if (ret) { | ||
3399 | IP_VS_ERR("cannot register Generic Netlink interface.\n"); | ||
3400 | nf_unregister_sockopt(&ip_vs_sockopts); | ||
3401 | return ret; | ||
3402 | } | ||
3403 | |||
2337 | proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops); | 3404 | proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops); |
2338 | proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops); | 3405 | proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops); |
2339 | 3406 | ||
@@ -2368,6 +3435,7 @@ void ip_vs_control_cleanup(void) | |||
2368 | unregister_sysctl_table(sysctl_header); | 3435 | unregister_sysctl_table(sysctl_header); |
2369 | proc_net_remove(&init_net, "ip_vs_stats"); | 3436 | proc_net_remove(&init_net, "ip_vs_stats"); |
2370 | proc_net_remove(&init_net, "ip_vs"); | 3437 | proc_net_remove(&init_net, "ip_vs"); |
3438 | ip_vs_genl_unregister(); | ||
2371 | nf_unregister_sockopt(&ip_vs_sockopts); | 3439 | nf_unregister_sockopt(&ip_vs_sockopts); |
2372 | LeaveFunction(2); | 3440 | LeaveFunction(2); |
2373 | } | 3441 | } |
diff --git a/net/ipv4/ipvs/ip_vs_dh.c b/net/ipv4/ipvs/ip_vs_dh.c index fa66824d264f..a16943fd72f1 100644 --- a/net/ipv4/ipvs/ip_vs_dh.c +++ b/net/ipv4/ipvs/ip_vs_dh.c | |||
@@ -218,7 +218,7 @@ ip_vs_dh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) | |||
218 | IP_VS_DBG(6, "DH: destination IP address %u.%u.%u.%u " | 218 | IP_VS_DBG(6, "DH: destination IP address %u.%u.%u.%u " |
219 | "--> server %u.%u.%u.%u:%d\n", | 219 | "--> server %u.%u.%u.%u:%d\n", |
220 | NIPQUAD(iph->daddr), | 220 | NIPQUAD(iph->daddr), |
221 | NIPQUAD(dest->addr), | 221 | NIPQUAD(dest->addr.ip), |
222 | ntohs(dest->port)); | 222 | ntohs(dest->port)); |
223 | 223 | ||
224 | return dest; | 224 | return dest; |
@@ -234,6 +234,9 @@ static struct ip_vs_scheduler ip_vs_dh_scheduler = | |||
234 | .refcnt = ATOMIC_INIT(0), | 234 | .refcnt = ATOMIC_INIT(0), |
235 | .module = THIS_MODULE, | 235 | .module = THIS_MODULE, |
236 | .n_list = LIST_HEAD_INIT(ip_vs_dh_scheduler.n_list), | 236 | .n_list = LIST_HEAD_INIT(ip_vs_dh_scheduler.n_list), |
237 | #ifdef CONFIG_IP_VS_IPV6 | ||
238 | .supports_ipv6 = 0, | ||
239 | #endif | ||
237 | .init_service = ip_vs_dh_init_svc, | 240 | .init_service = ip_vs_dh_init_svc, |
238 | .done_service = ip_vs_dh_done_svc, | 241 | .done_service = ip_vs_dh_done_svc, |
239 | .update_service = ip_vs_dh_update_svc, | 242 | .update_service = ip_vs_dh_update_svc, |
diff --git a/net/ipv4/ipvs/ip_vs_est.c b/net/ipv4/ipvs/ip_vs_est.c index 5a20f93bd7f9..2eb2860dabb5 100644 --- a/net/ipv4/ipvs/ip_vs_est.c +++ b/net/ipv4/ipvs/ip_vs_est.c | |||
@@ -65,37 +65,37 @@ static void estimation_timer(unsigned long arg) | |||
65 | s = container_of(e, struct ip_vs_stats, est); | 65 | s = container_of(e, struct ip_vs_stats, est); |
66 | 66 | ||
67 | spin_lock(&s->lock); | 67 | spin_lock(&s->lock); |
68 | n_conns = s->conns; | 68 | n_conns = s->ustats.conns; |
69 | n_inpkts = s->inpkts; | 69 | n_inpkts = s->ustats.inpkts; |
70 | n_outpkts = s->outpkts; | 70 | n_outpkts = s->ustats.outpkts; |
71 | n_inbytes = s->inbytes; | 71 | n_inbytes = s->ustats.inbytes; |
72 | n_outbytes = s->outbytes; | 72 | n_outbytes = s->ustats.outbytes; |
73 | 73 | ||
74 | /* scaled by 2^10, but divided 2 seconds */ | 74 | /* scaled by 2^10, but divided 2 seconds */ |
75 | rate = (n_conns - e->last_conns)<<9; | 75 | rate = (n_conns - e->last_conns)<<9; |
76 | e->last_conns = n_conns; | 76 | e->last_conns = n_conns; |
77 | e->cps += ((long)rate - (long)e->cps)>>2; | 77 | e->cps += ((long)rate - (long)e->cps)>>2; |
78 | s->cps = (e->cps+0x1FF)>>10; | 78 | s->ustats.cps = (e->cps+0x1FF)>>10; |
79 | 79 | ||
80 | rate = (n_inpkts - e->last_inpkts)<<9; | 80 | rate = (n_inpkts - e->last_inpkts)<<9; |
81 | e->last_inpkts = n_inpkts; | 81 | e->last_inpkts = n_inpkts; |
82 | e->inpps += ((long)rate - (long)e->inpps)>>2; | 82 | e->inpps += ((long)rate - (long)e->inpps)>>2; |
83 | s->inpps = (e->inpps+0x1FF)>>10; | 83 | s->ustats.inpps = (e->inpps+0x1FF)>>10; |
84 | 84 | ||
85 | rate = (n_outpkts - e->last_outpkts)<<9; | 85 | rate = (n_outpkts - e->last_outpkts)<<9; |
86 | e->last_outpkts = n_outpkts; | 86 | e->last_outpkts = n_outpkts; |
87 | e->outpps += ((long)rate - (long)e->outpps)>>2; | 87 | e->outpps += ((long)rate - (long)e->outpps)>>2; |
88 | s->outpps = (e->outpps+0x1FF)>>10; | 88 | s->ustats.outpps = (e->outpps+0x1FF)>>10; |
89 | 89 | ||
90 | rate = (n_inbytes - e->last_inbytes)<<4; | 90 | rate = (n_inbytes - e->last_inbytes)<<4; |
91 | e->last_inbytes = n_inbytes; | 91 | e->last_inbytes = n_inbytes; |
92 | e->inbps += ((long)rate - (long)e->inbps)>>2; | 92 | e->inbps += ((long)rate - (long)e->inbps)>>2; |
93 | s->inbps = (e->inbps+0xF)>>5; | 93 | s->ustats.inbps = (e->inbps+0xF)>>5; |
94 | 94 | ||
95 | rate = (n_outbytes - e->last_outbytes)<<4; | 95 | rate = (n_outbytes - e->last_outbytes)<<4; |
96 | e->last_outbytes = n_outbytes; | 96 | e->last_outbytes = n_outbytes; |
97 | e->outbps += ((long)rate - (long)e->outbps)>>2; | 97 | e->outbps += ((long)rate - (long)e->outbps)>>2; |
98 | s->outbps = (e->outbps+0xF)>>5; | 98 | s->ustats.outbps = (e->outbps+0xF)>>5; |
99 | spin_unlock(&s->lock); | 99 | spin_unlock(&s->lock); |
100 | } | 100 | } |
101 | spin_unlock(&est_lock); | 101 | spin_unlock(&est_lock); |
@@ -108,24 +108,22 @@ void ip_vs_new_estimator(struct ip_vs_stats *stats) | |||
108 | 108 | ||
109 | INIT_LIST_HEAD(&est->list); | 109 | INIT_LIST_HEAD(&est->list); |
110 | 110 | ||
111 | est->last_conns = stats->conns; | 111 | est->last_conns = stats->ustats.conns; |
112 | est->cps = stats->cps<<10; | 112 | est->cps = stats->ustats.cps<<10; |
113 | 113 | ||
114 | est->last_inpkts = stats->inpkts; | 114 | est->last_inpkts = stats->ustats.inpkts; |
115 | est->inpps = stats->inpps<<10; | 115 | est->inpps = stats->ustats.inpps<<10; |
116 | 116 | ||
117 | est->last_outpkts = stats->outpkts; | 117 | est->last_outpkts = stats->ustats.outpkts; |
118 | est->outpps = stats->outpps<<10; | 118 | est->outpps = stats->ustats.outpps<<10; |
119 | 119 | ||
120 | est->last_inbytes = stats->inbytes; | 120 | est->last_inbytes = stats->ustats.inbytes; |
121 | est->inbps = stats->inbps<<5; | 121 | est->inbps = stats->ustats.inbps<<5; |
122 | 122 | ||
123 | est->last_outbytes = stats->outbytes; | 123 | est->last_outbytes = stats->ustats.outbytes; |
124 | est->outbps = stats->outbps<<5; | 124 | est->outbps = stats->ustats.outbps<<5; |
125 | 125 | ||
126 | spin_lock_bh(&est_lock); | 126 | spin_lock_bh(&est_lock); |
127 | if (list_empty(&est_list)) | ||
128 | mod_timer(&est_timer, jiffies + 2 * HZ); | ||
129 | list_add(&est->list, &est_list); | 127 | list_add(&est->list, &est_list); |
130 | spin_unlock_bh(&est_lock); | 128 | spin_unlock_bh(&est_lock); |
131 | } | 129 | } |
@@ -136,11 +134,6 @@ void ip_vs_kill_estimator(struct ip_vs_stats *stats) | |||
136 | 134 | ||
137 | spin_lock_bh(&est_lock); | 135 | spin_lock_bh(&est_lock); |
138 | list_del(&est->list); | 136 | list_del(&est->list); |
139 | while (list_empty(&est_list) && try_to_del_timer_sync(&est_timer) < 0) { | ||
140 | spin_unlock_bh(&est_lock); | ||
141 | cpu_relax(); | ||
142 | spin_lock_bh(&est_lock); | ||
143 | } | ||
144 | spin_unlock_bh(&est_lock); | 137 | spin_unlock_bh(&est_lock); |
145 | } | 138 | } |
146 | 139 | ||
@@ -160,3 +153,14 @@ void ip_vs_zero_estimator(struct ip_vs_stats *stats) | |||
160 | est->inbps = 0; | 153 | est->inbps = 0; |
161 | est->outbps = 0; | 154 | est->outbps = 0; |
162 | } | 155 | } |
156 | |||
157 | int __init ip_vs_estimator_init(void) | ||
158 | { | ||
159 | mod_timer(&est_timer, jiffies + 2 * HZ); | ||
160 | return 0; | ||
161 | } | ||
162 | |||
163 | void ip_vs_estimator_cleanup(void) | ||
164 | { | ||
165 | del_timer_sync(&est_timer); | ||
166 | } | ||
diff --git a/net/ipv4/ipvs/ip_vs_ftp.c b/net/ipv4/ipvs/ip_vs_ftp.c index c1c758e4f733..2e7dbd8b73a4 100644 --- a/net/ipv4/ipvs/ip_vs_ftp.c +++ b/net/ipv4/ipvs/ip_vs_ftp.c | |||
@@ -140,13 +140,21 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, | |||
140 | struct tcphdr *th; | 140 | struct tcphdr *th; |
141 | char *data, *data_limit; | 141 | char *data, *data_limit; |
142 | char *start, *end; | 142 | char *start, *end; |
143 | __be32 from; | 143 | union nf_inet_addr from; |
144 | __be16 port; | 144 | __be16 port; |
145 | struct ip_vs_conn *n_cp; | 145 | struct ip_vs_conn *n_cp; |
146 | char buf[24]; /* xxx.xxx.xxx.xxx,ppp,ppp\000 */ | 146 | char buf[24]; /* xxx.xxx.xxx.xxx,ppp,ppp\000 */ |
147 | unsigned buf_len; | 147 | unsigned buf_len; |
148 | int ret; | 148 | int ret; |
149 | 149 | ||
150 | #ifdef CONFIG_IP_VS_IPV6 | ||
151 | /* This application helper doesn't work with IPv6 yet, | ||
152 | * so turn this into a no-op for IPv6 packets | ||
153 | */ | ||
154 | if (cp->af == AF_INET6) | ||
155 | return 1; | ||
156 | #endif | ||
157 | |||
150 | *diff = 0; | 158 | *diff = 0; |
151 | 159 | ||
152 | /* Only useful for established sessions */ | 160 | /* Only useful for established sessions */ |
@@ -166,24 +174,25 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, | |||
166 | if (ip_vs_ftp_get_addrport(data, data_limit, | 174 | if (ip_vs_ftp_get_addrport(data, data_limit, |
167 | SERVER_STRING, | 175 | SERVER_STRING, |
168 | sizeof(SERVER_STRING)-1, ')', | 176 | sizeof(SERVER_STRING)-1, ')', |
169 | &from, &port, | 177 | &from.ip, &port, |
170 | &start, &end) != 1) | 178 | &start, &end) != 1) |
171 | return 1; | 179 | return 1; |
172 | 180 | ||
173 | IP_VS_DBG(7, "PASV response (%u.%u.%u.%u:%d) -> " | 181 | IP_VS_DBG(7, "PASV response (%u.%u.%u.%u:%d) -> " |
174 | "%u.%u.%u.%u:%d detected\n", | 182 | "%u.%u.%u.%u:%d detected\n", |
175 | NIPQUAD(from), ntohs(port), NIPQUAD(cp->caddr), 0); | 183 | NIPQUAD(from.ip), ntohs(port), |
184 | NIPQUAD(cp->caddr.ip), 0); | ||
176 | 185 | ||
177 | /* | 186 | /* |
178 | * Now update or create an connection entry for it | 187 | * Now update or create an connection entry for it |
179 | */ | 188 | */ |
180 | n_cp = ip_vs_conn_out_get(iph->protocol, from, port, | 189 | n_cp = ip_vs_conn_out_get(AF_INET, iph->protocol, &from, port, |
181 | cp->caddr, 0); | 190 | &cp->caddr, 0); |
182 | if (!n_cp) { | 191 | if (!n_cp) { |
183 | n_cp = ip_vs_conn_new(IPPROTO_TCP, | 192 | n_cp = ip_vs_conn_new(AF_INET, IPPROTO_TCP, |
184 | cp->caddr, 0, | 193 | &cp->caddr, 0, |
185 | cp->vaddr, port, | 194 | &cp->vaddr, port, |
186 | from, port, | 195 | &from, port, |
187 | IP_VS_CONN_F_NO_CPORT, | 196 | IP_VS_CONN_F_NO_CPORT, |
188 | cp->dest); | 197 | cp->dest); |
189 | if (!n_cp) | 198 | if (!n_cp) |
@@ -196,9 +205,9 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, | |||
196 | /* | 205 | /* |
197 | * Replace the old passive address with the new one | 206 | * Replace the old passive address with the new one |
198 | */ | 207 | */ |
199 | from = n_cp->vaddr; | 208 | from.ip = n_cp->vaddr.ip; |
200 | port = n_cp->vport; | 209 | port = n_cp->vport; |
201 | sprintf(buf,"%d,%d,%d,%d,%d,%d", NIPQUAD(from), | 210 | sprintf(buf, "%d,%d,%d,%d,%d,%d", NIPQUAD(from.ip), |
202 | (ntohs(port)>>8)&255, ntohs(port)&255); | 211 | (ntohs(port)>>8)&255, ntohs(port)&255); |
203 | buf_len = strlen(buf); | 212 | buf_len = strlen(buf); |
204 | 213 | ||
@@ -243,10 +252,18 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, | |||
243 | struct tcphdr *th; | 252 | struct tcphdr *th; |
244 | char *data, *data_start, *data_limit; | 253 | char *data, *data_start, *data_limit; |
245 | char *start, *end; | 254 | char *start, *end; |
246 | __be32 to; | 255 | union nf_inet_addr to; |
247 | __be16 port; | 256 | __be16 port; |
248 | struct ip_vs_conn *n_cp; | 257 | struct ip_vs_conn *n_cp; |
249 | 258 | ||
259 | #ifdef CONFIG_IP_VS_IPV6 | ||
260 | /* This application helper doesn't work with IPv6 yet, | ||
261 | * so turn this into a no-op for IPv6 packets | ||
262 | */ | ||
263 | if (cp->af == AF_INET6) | ||
264 | return 1; | ||
265 | #endif | ||
266 | |||
250 | /* no diff required for incoming packets */ | 267 | /* no diff required for incoming packets */ |
251 | *diff = 0; | 268 | *diff = 0; |
252 | 269 | ||
@@ -291,12 +308,12 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, | |||
291 | */ | 308 | */ |
292 | if (ip_vs_ftp_get_addrport(data_start, data_limit, | 309 | if (ip_vs_ftp_get_addrport(data_start, data_limit, |
293 | CLIENT_STRING, sizeof(CLIENT_STRING)-1, | 310 | CLIENT_STRING, sizeof(CLIENT_STRING)-1, |
294 | '\r', &to, &port, | 311 | '\r', &to.ip, &port, |
295 | &start, &end) != 1) | 312 | &start, &end) != 1) |
296 | return 1; | 313 | return 1; |
297 | 314 | ||
298 | IP_VS_DBG(7, "PORT %u.%u.%u.%u:%d detected\n", | 315 | IP_VS_DBG(7, "PORT %u.%u.%u.%u:%d detected\n", |
299 | NIPQUAD(to), ntohs(port)); | 316 | NIPQUAD(to.ip), ntohs(port)); |
300 | 317 | ||
301 | /* Passive mode off */ | 318 | /* Passive mode off */ |
302 | cp->app_data = NULL; | 319 | cp->app_data = NULL; |
@@ -306,16 +323,16 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, | |||
306 | */ | 323 | */ |
307 | IP_VS_DBG(7, "protocol %s %u.%u.%u.%u:%d %u.%u.%u.%u:%d\n", | 324 | IP_VS_DBG(7, "protocol %s %u.%u.%u.%u:%d %u.%u.%u.%u:%d\n", |
308 | ip_vs_proto_name(iph->protocol), | 325 | ip_vs_proto_name(iph->protocol), |
309 | NIPQUAD(to), ntohs(port), NIPQUAD(cp->vaddr), 0); | 326 | NIPQUAD(to.ip), ntohs(port), NIPQUAD(cp->vaddr.ip), 0); |
310 | 327 | ||
311 | n_cp = ip_vs_conn_in_get(iph->protocol, | 328 | n_cp = ip_vs_conn_in_get(AF_INET, iph->protocol, |
312 | to, port, | 329 | &to, port, |
313 | cp->vaddr, htons(ntohs(cp->vport)-1)); | 330 | &cp->vaddr, htons(ntohs(cp->vport)-1)); |
314 | if (!n_cp) { | 331 | if (!n_cp) { |
315 | n_cp = ip_vs_conn_new(IPPROTO_TCP, | 332 | n_cp = ip_vs_conn_new(AF_INET, IPPROTO_TCP, |
316 | to, port, | 333 | &to, port, |
317 | cp->vaddr, htons(ntohs(cp->vport)-1), | 334 | &cp->vaddr, htons(ntohs(cp->vport)-1), |
318 | cp->daddr, htons(ntohs(cp->dport)-1), | 335 | &cp->daddr, htons(ntohs(cp->dport)-1), |
319 | 0, | 336 | 0, |
320 | cp->dest); | 337 | cp->dest); |
321 | if (!n_cp) | 338 | if (!n_cp) |
diff --git a/net/ipv4/ipvs/ip_vs_lblc.c b/net/ipv4/ipvs/ip_vs_lblc.c index 7a6a319f544a..6ecef3518cac 100644 --- a/net/ipv4/ipvs/ip_vs_lblc.c +++ b/net/ipv4/ipvs/ip_vs_lblc.c | |||
@@ -96,7 +96,6 @@ struct ip_vs_lblc_entry { | |||
96 | * IPVS lblc hash table | 96 | * IPVS lblc hash table |
97 | */ | 97 | */ |
98 | struct ip_vs_lblc_table { | 98 | struct ip_vs_lblc_table { |
99 | rwlock_t lock; /* lock for this table */ | ||
100 | struct list_head bucket[IP_VS_LBLC_TAB_SIZE]; /* hash bucket */ | 99 | struct list_head bucket[IP_VS_LBLC_TAB_SIZE]; /* hash bucket */ |
101 | atomic_t entries; /* number of entries */ | 100 | atomic_t entries; /* number of entries */ |
102 | int max_size; /* maximum size of entries */ | 101 | int max_size; /* maximum size of entries */ |
@@ -123,31 +122,6 @@ static ctl_table vs_vars_table[] = { | |||
123 | 122 | ||
124 | static struct ctl_table_header * sysctl_header; | 123 | static struct ctl_table_header * sysctl_header; |
125 | 124 | ||
126 | /* | ||
127 | * new/free a ip_vs_lblc_entry, which is a mapping of a destionation | ||
128 | * IP address to a server. | ||
129 | */ | ||
130 | static inline struct ip_vs_lblc_entry * | ||
131 | ip_vs_lblc_new(__be32 daddr, struct ip_vs_dest *dest) | ||
132 | { | ||
133 | struct ip_vs_lblc_entry *en; | ||
134 | |||
135 | en = kmalloc(sizeof(struct ip_vs_lblc_entry), GFP_ATOMIC); | ||
136 | if (en == NULL) { | ||
137 | IP_VS_ERR("ip_vs_lblc_new(): no memory\n"); | ||
138 | return NULL; | ||
139 | } | ||
140 | |||
141 | INIT_LIST_HEAD(&en->list); | ||
142 | en->addr = daddr; | ||
143 | |||
144 | atomic_inc(&dest->refcnt); | ||
145 | en->dest = dest; | ||
146 | |||
147 | return en; | ||
148 | } | ||
149 | |||
150 | |||
151 | static inline void ip_vs_lblc_free(struct ip_vs_lblc_entry *en) | 125 | static inline void ip_vs_lblc_free(struct ip_vs_lblc_entry *en) |
152 | { | 126 | { |
153 | list_del(&en->list); | 127 | list_del(&en->list); |
@@ -173,55 +147,66 @@ static inline unsigned ip_vs_lblc_hashkey(__be32 addr) | |||
173 | * Hash an entry in the ip_vs_lblc_table. | 147 | * Hash an entry in the ip_vs_lblc_table. |
174 | * returns bool success. | 148 | * returns bool success. |
175 | */ | 149 | */ |
176 | static int | 150 | static void |
177 | ip_vs_lblc_hash(struct ip_vs_lblc_table *tbl, struct ip_vs_lblc_entry *en) | 151 | ip_vs_lblc_hash(struct ip_vs_lblc_table *tbl, struct ip_vs_lblc_entry *en) |
178 | { | 152 | { |
179 | unsigned hash; | 153 | unsigned hash = ip_vs_lblc_hashkey(en->addr); |
180 | |||
181 | if (!list_empty(&en->list)) { | ||
182 | IP_VS_ERR("ip_vs_lblc_hash(): request for already hashed, " | ||
183 | "called from %p\n", __builtin_return_address(0)); | ||
184 | return 0; | ||
185 | } | ||
186 | |||
187 | /* | ||
188 | * Hash by destination IP address | ||
189 | */ | ||
190 | hash = ip_vs_lblc_hashkey(en->addr); | ||
191 | 154 | ||
192 | write_lock(&tbl->lock); | ||
193 | list_add(&en->list, &tbl->bucket[hash]); | 155 | list_add(&en->list, &tbl->bucket[hash]); |
194 | atomic_inc(&tbl->entries); | 156 | atomic_inc(&tbl->entries); |
195 | write_unlock(&tbl->lock); | ||
196 | |||
197 | return 1; | ||
198 | } | 157 | } |
199 | 158 | ||
200 | 159 | ||
201 | /* | 160 | /* |
202 | * Get ip_vs_lblc_entry associated with supplied parameters. | 161 | * Get ip_vs_lblc_entry associated with supplied parameters. Called under read |
162 | * lock | ||
203 | */ | 163 | */ |
204 | static inline struct ip_vs_lblc_entry * | 164 | static inline struct ip_vs_lblc_entry * |
205 | ip_vs_lblc_get(struct ip_vs_lblc_table *tbl, __be32 addr) | 165 | ip_vs_lblc_get(struct ip_vs_lblc_table *tbl, __be32 addr) |
206 | { | 166 | { |
207 | unsigned hash; | 167 | unsigned hash = ip_vs_lblc_hashkey(addr); |
208 | struct ip_vs_lblc_entry *en; | 168 | struct ip_vs_lblc_entry *en; |
209 | 169 | ||
210 | hash = ip_vs_lblc_hashkey(addr); | 170 | list_for_each_entry(en, &tbl->bucket[hash], list) |
171 | if (en->addr == addr) | ||
172 | return en; | ||
211 | 173 | ||
212 | read_lock(&tbl->lock); | 174 | return NULL; |
175 | } | ||
213 | 176 | ||
214 | list_for_each_entry(en, &tbl->bucket[hash], list) { | 177 | |
215 | if (en->addr == addr) { | 178 | /* |
216 | /* HIT */ | 179 | * Create or update an ip_vs_lblc_entry, which is a mapping of a destination IP |
217 | read_unlock(&tbl->lock); | 180 | * address to a server. Called under write lock. |
218 | return en; | 181 | */ |
182 | static inline struct ip_vs_lblc_entry * | ||
183 | ip_vs_lblc_new(struct ip_vs_lblc_table *tbl, __be32 daddr, | ||
184 | struct ip_vs_dest *dest) | ||
185 | { | ||
186 | struct ip_vs_lblc_entry *en; | ||
187 | |||
188 | en = ip_vs_lblc_get(tbl, daddr); | ||
189 | if (!en) { | ||
190 | en = kmalloc(sizeof(*en), GFP_ATOMIC); | ||
191 | if (!en) { | ||
192 | IP_VS_ERR("ip_vs_lblc_new(): no memory\n"); | ||
193 | return NULL; | ||
219 | } | 194 | } |
220 | } | ||
221 | 195 | ||
222 | read_unlock(&tbl->lock); | 196 | en->addr = daddr; |
197 | en->lastuse = jiffies; | ||
223 | 198 | ||
224 | return NULL; | 199 | atomic_inc(&dest->refcnt); |
200 | en->dest = dest; | ||
201 | |||
202 | ip_vs_lblc_hash(tbl, en); | ||
203 | } else if (en->dest != dest) { | ||
204 | atomic_dec(&en->dest->refcnt); | ||
205 | atomic_inc(&dest->refcnt); | ||
206 | en->dest = dest; | ||
207 | } | ||
208 | |||
209 | return en; | ||
225 | } | 210 | } |
226 | 211 | ||
227 | 212 | ||
@@ -230,30 +215,29 @@ ip_vs_lblc_get(struct ip_vs_lblc_table *tbl, __be32 addr) | |||
230 | */ | 215 | */ |
231 | static void ip_vs_lblc_flush(struct ip_vs_lblc_table *tbl) | 216 | static void ip_vs_lblc_flush(struct ip_vs_lblc_table *tbl) |
232 | { | 217 | { |
233 | int i; | ||
234 | struct ip_vs_lblc_entry *en, *nxt; | 218 | struct ip_vs_lblc_entry *en, *nxt; |
219 | int i; | ||
235 | 220 | ||
236 | for (i=0; i<IP_VS_LBLC_TAB_SIZE; i++) { | 221 | for (i=0; i<IP_VS_LBLC_TAB_SIZE; i++) { |
237 | write_lock(&tbl->lock); | ||
238 | list_for_each_entry_safe(en, nxt, &tbl->bucket[i], list) { | 222 | list_for_each_entry_safe(en, nxt, &tbl->bucket[i], list) { |
239 | ip_vs_lblc_free(en); | 223 | ip_vs_lblc_free(en); |
240 | atomic_dec(&tbl->entries); | 224 | atomic_dec(&tbl->entries); |
241 | } | 225 | } |
242 | write_unlock(&tbl->lock); | ||
243 | } | 226 | } |
244 | } | 227 | } |
245 | 228 | ||
246 | 229 | ||
247 | static inline void ip_vs_lblc_full_check(struct ip_vs_lblc_table *tbl) | 230 | static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc) |
248 | { | 231 | { |
232 | struct ip_vs_lblc_table *tbl = svc->sched_data; | ||
233 | struct ip_vs_lblc_entry *en, *nxt; | ||
249 | unsigned long now = jiffies; | 234 | unsigned long now = jiffies; |
250 | int i, j; | 235 | int i, j; |
251 | struct ip_vs_lblc_entry *en, *nxt; | ||
252 | 236 | ||
253 | for (i=0, j=tbl->rover; i<IP_VS_LBLC_TAB_SIZE; i++) { | 237 | for (i=0, j=tbl->rover; i<IP_VS_LBLC_TAB_SIZE; i++) { |
254 | j = (j + 1) & IP_VS_LBLC_TAB_MASK; | 238 | j = (j + 1) & IP_VS_LBLC_TAB_MASK; |
255 | 239 | ||
256 | write_lock(&tbl->lock); | 240 | write_lock(&svc->sched_lock); |
257 | list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) { | 241 | list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) { |
258 | if (time_before(now, | 242 | if (time_before(now, |
259 | en->lastuse + sysctl_ip_vs_lblc_expiration)) | 243 | en->lastuse + sysctl_ip_vs_lblc_expiration)) |
@@ -262,7 +246,7 @@ static inline void ip_vs_lblc_full_check(struct ip_vs_lblc_table *tbl) | |||
262 | ip_vs_lblc_free(en); | 246 | ip_vs_lblc_free(en); |
263 | atomic_dec(&tbl->entries); | 247 | atomic_dec(&tbl->entries); |
264 | } | 248 | } |
265 | write_unlock(&tbl->lock); | 249 | write_unlock(&svc->sched_lock); |
266 | } | 250 | } |
267 | tbl->rover = j; | 251 | tbl->rover = j; |
268 | } | 252 | } |
@@ -281,17 +265,16 @@ static inline void ip_vs_lblc_full_check(struct ip_vs_lblc_table *tbl) | |||
281 | */ | 265 | */ |
282 | static void ip_vs_lblc_check_expire(unsigned long data) | 266 | static void ip_vs_lblc_check_expire(unsigned long data) |
283 | { | 267 | { |
284 | struct ip_vs_lblc_table *tbl; | 268 | struct ip_vs_service *svc = (struct ip_vs_service *) data; |
269 | struct ip_vs_lblc_table *tbl = svc->sched_data; | ||
285 | unsigned long now = jiffies; | 270 | unsigned long now = jiffies; |
286 | int goal; | 271 | int goal; |
287 | int i, j; | 272 | int i, j; |
288 | struct ip_vs_lblc_entry *en, *nxt; | 273 | struct ip_vs_lblc_entry *en, *nxt; |
289 | 274 | ||
290 | tbl = (struct ip_vs_lblc_table *)data; | ||
291 | |||
292 | if ((tbl->counter % COUNT_FOR_FULL_EXPIRATION) == 0) { | 275 | if ((tbl->counter % COUNT_FOR_FULL_EXPIRATION) == 0) { |
293 | /* do full expiration check */ | 276 | /* do full expiration check */ |
294 | ip_vs_lblc_full_check(tbl); | 277 | ip_vs_lblc_full_check(svc); |
295 | tbl->counter = 1; | 278 | tbl->counter = 1; |
296 | goto out; | 279 | goto out; |
297 | } | 280 | } |
@@ -308,7 +291,7 @@ static void ip_vs_lblc_check_expire(unsigned long data) | |||
308 | for (i=0, j=tbl->rover; i<IP_VS_LBLC_TAB_SIZE; i++) { | 291 | for (i=0, j=tbl->rover; i<IP_VS_LBLC_TAB_SIZE; i++) { |
309 | j = (j + 1) & IP_VS_LBLC_TAB_MASK; | 292 | j = (j + 1) & IP_VS_LBLC_TAB_MASK; |
310 | 293 | ||
311 | write_lock(&tbl->lock); | 294 | write_lock(&svc->sched_lock); |
312 | list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) { | 295 | list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) { |
313 | if (time_before(now, en->lastuse + ENTRY_TIMEOUT)) | 296 | if (time_before(now, en->lastuse + ENTRY_TIMEOUT)) |
314 | continue; | 297 | continue; |
@@ -317,7 +300,7 @@ static void ip_vs_lblc_check_expire(unsigned long data) | |||
317 | atomic_dec(&tbl->entries); | 300 | atomic_dec(&tbl->entries); |
318 | goal--; | 301 | goal--; |
319 | } | 302 | } |
320 | write_unlock(&tbl->lock); | 303 | write_unlock(&svc->sched_lock); |
321 | if (goal <= 0) | 304 | if (goal <= 0) |
322 | break; | 305 | break; |
323 | } | 306 | } |
@@ -336,15 +319,14 @@ static int ip_vs_lblc_init_svc(struct ip_vs_service *svc) | |||
336 | /* | 319 | /* |
337 | * Allocate the ip_vs_lblc_table for this service | 320 | * Allocate the ip_vs_lblc_table for this service |
338 | */ | 321 | */ |
339 | tbl = kmalloc(sizeof(struct ip_vs_lblc_table), GFP_ATOMIC); | 322 | tbl = kmalloc(sizeof(*tbl), GFP_ATOMIC); |
340 | if (tbl == NULL) { | 323 | if (tbl == NULL) { |
341 | IP_VS_ERR("ip_vs_lblc_init_svc(): no memory\n"); | 324 | IP_VS_ERR("ip_vs_lblc_init_svc(): no memory\n"); |
342 | return -ENOMEM; | 325 | return -ENOMEM; |
343 | } | 326 | } |
344 | svc->sched_data = tbl; | 327 | svc->sched_data = tbl; |
345 | IP_VS_DBG(6, "LBLC hash table (memory=%Zdbytes) allocated for " | 328 | IP_VS_DBG(6, "LBLC hash table (memory=%Zdbytes) allocated for " |
346 | "current service\n", | 329 | "current service\n", sizeof(*tbl)); |
347 | sizeof(struct ip_vs_lblc_table)); | ||
348 | 330 | ||
349 | /* | 331 | /* |
350 | * Initialize the hash buckets | 332 | * Initialize the hash buckets |
@@ -352,7 +334,6 @@ static int ip_vs_lblc_init_svc(struct ip_vs_service *svc) | |||
352 | for (i=0; i<IP_VS_LBLC_TAB_SIZE; i++) { | 334 | for (i=0; i<IP_VS_LBLC_TAB_SIZE; i++) { |
353 | INIT_LIST_HEAD(&tbl->bucket[i]); | 335 | INIT_LIST_HEAD(&tbl->bucket[i]); |
354 | } | 336 | } |
355 | rwlock_init(&tbl->lock); | ||
356 | tbl->max_size = IP_VS_LBLC_TAB_SIZE*16; | 337 | tbl->max_size = IP_VS_LBLC_TAB_SIZE*16; |
357 | tbl->rover = 0; | 338 | tbl->rover = 0; |
358 | tbl->counter = 1; | 339 | tbl->counter = 1; |
@@ -361,9 +342,8 @@ static int ip_vs_lblc_init_svc(struct ip_vs_service *svc) | |||
361 | * Hook periodic timer for garbage collection | 342 | * Hook periodic timer for garbage collection |
362 | */ | 343 | */ |
363 | setup_timer(&tbl->periodic_timer, ip_vs_lblc_check_expire, | 344 | setup_timer(&tbl->periodic_timer, ip_vs_lblc_check_expire, |
364 | (unsigned long)tbl); | 345 | (unsigned long)svc); |
365 | tbl->periodic_timer.expires = jiffies+CHECK_EXPIRE_INTERVAL; | 346 | mod_timer(&tbl->periodic_timer, jiffies + CHECK_EXPIRE_INTERVAL); |
366 | add_timer(&tbl->periodic_timer); | ||
367 | 347 | ||
368 | return 0; | 348 | return 0; |
369 | } | 349 | } |
@@ -380,22 +360,16 @@ static int ip_vs_lblc_done_svc(struct ip_vs_service *svc) | |||
380 | ip_vs_lblc_flush(tbl); | 360 | ip_vs_lblc_flush(tbl); |
381 | 361 | ||
382 | /* release the table itself */ | 362 | /* release the table itself */ |
383 | kfree(svc->sched_data); | 363 | kfree(tbl); |
384 | IP_VS_DBG(6, "LBLC hash table (memory=%Zdbytes) released\n", | 364 | IP_VS_DBG(6, "LBLC hash table (memory=%Zdbytes) released\n", |
385 | sizeof(struct ip_vs_lblc_table)); | 365 | sizeof(*tbl)); |
386 | 366 | ||
387 | return 0; | 367 | return 0; |
388 | } | 368 | } |
389 | 369 | ||
390 | 370 | ||
391 | static int ip_vs_lblc_update_svc(struct ip_vs_service *svc) | ||
392 | { | ||
393 | return 0; | ||
394 | } | ||
395 | |||
396 | |||
397 | static inline struct ip_vs_dest * | 371 | static inline struct ip_vs_dest * |
398 | __ip_vs_wlc_schedule(struct ip_vs_service *svc, struct iphdr *iph) | 372 | __ip_vs_lblc_schedule(struct ip_vs_service *svc, struct iphdr *iph) |
399 | { | 373 | { |
400 | struct ip_vs_dest *dest, *least; | 374 | struct ip_vs_dest *dest, *least; |
401 | int loh, doh; | 375 | int loh, doh; |
@@ -448,7 +422,7 @@ __ip_vs_wlc_schedule(struct ip_vs_service *svc, struct iphdr *iph) | |||
448 | 422 | ||
449 | IP_VS_DBG(6, "LBLC: server %d.%d.%d.%d:%d " | 423 | IP_VS_DBG(6, "LBLC: server %d.%d.%d.%d:%d " |
450 | "activeconns %d refcnt %d weight %d overhead %d\n", | 424 | "activeconns %d refcnt %d weight %d overhead %d\n", |
451 | NIPQUAD(least->addr), ntohs(least->port), | 425 | NIPQUAD(least->addr.ip), ntohs(least->port), |
452 | atomic_read(&least->activeconns), | 426 | atomic_read(&least->activeconns), |
453 | atomic_read(&least->refcnt), | 427 | atomic_read(&least->refcnt), |
454 | atomic_read(&least->weight), loh); | 428 | atomic_read(&least->weight), loh); |
@@ -484,47 +458,55 @@ is_overloaded(struct ip_vs_dest *dest, struct ip_vs_service *svc) | |||
484 | static struct ip_vs_dest * | 458 | static struct ip_vs_dest * |
485 | ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) | 459 | ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) |
486 | { | 460 | { |
487 | struct ip_vs_dest *dest; | 461 | struct ip_vs_lblc_table *tbl = svc->sched_data; |
488 | struct ip_vs_lblc_table *tbl; | ||
489 | struct ip_vs_lblc_entry *en; | ||
490 | struct iphdr *iph = ip_hdr(skb); | 462 | struct iphdr *iph = ip_hdr(skb); |
463 | struct ip_vs_dest *dest = NULL; | ||
464 | struct ip_vs_lblc_entry *en; | ||
491 | 465 | ||
492 | IP_VS_DBG(6, "ip_vs_lblc_schedule(): Scheduling...\n"); | 466 | IP_VS_DBG(6, "ip_vs_lblc_schedule(): Scheduling...\n"); |
493 | 467 | ||
494 | tbl = (struct ip_vs_lblc_table *)svc->sched_data; | 468 | /* First look in our cache */ |
469 | read_lock(&svc->sched_lock); | ||
495 | en = ip_vs_lblc_get(tbl, iph->daddr); | 470 | en = ip_vs_lblc_get(tbl, iph->daddr); |
496 | if (en == NULL) { | 471 | if (en) { |
497 | dest = __ip_vs_wlc_schedule(svc, iph); | 472 | /* We only hold a read lock, but this is atomic */ |
498 | if (dest == NULL) { | 473 | en->lastuse = jiffies; |
499 | IP_VS_DBG(1, "no destination available\n"); | 474 | |
500 | return NULL; | 475 | /* |
501 | } | 476 | * If the destination is not available, i.e. it's in the trash, |
502 | en = ip_vs_lblc_new(iph->daddr, dest); | 477 | * we must ignore it, as it may be removed from under our feet, |
503 | if (en == NULL) { | 478 | * if someone drops our reference count. Our caller only makes |
504 | return NULL; | 479 | * sure that destinations, that are not in the trash, are not |
505 | } | 480 | * moved to the trash, while we are scheduling. But anyone can |
506 | ip_vs_lblc_hash(tbl, en); | 481 | * free up entries from the trash at any time. |
507 | } else { | 482 | */ |
508 | dest = en->dest; | 483 | |
509 | if (!(dest->flags & IP_VS_DEST_F_AVAILABLE) | 484 | if (en->dest->flags & IP_VS_DEST_F_AVAILABLE) |
510 | || atomic_read(&dest->weight) <= 0 | 485 | dest = en->dest; |
511 | || is_overloaded(dest, svc)) { | 486 | } |
512 | dest = __ip_vs_wlc_schedule(svc, iph); | 487 | read_unlock(&svc->sched_lock); |
513 | if (dest == NULL) { | 488 | |
514 | IP_VS_DBG(1, "no destination available\n"); | 489 | /* If the destination has a weight and is not overloaded, use it */ |
515 | return NULL; | 490 | if (dest && atomic_read(&dest->weight) > 0 && !is_overloaded(dest, svc)) |
516 | } | 491 | goto out; |
517 | atomic_dec(&en->dest->refcnt); | 492 | |
518 | atomic_inc(&dest->refcnt); | 493 | /* No cache entry or it is invalid, time to schedule */ |
519 | en->dest = dest; | 494 | dest = __ip_vs_lblc_schedule(svc, iph); |
520 | } | 495 | if (!dest) { |
496 | IP_VS_DBG(1, "no destination available\n"); | ||
497 | return NULL; | ||
521 | } | 498 | } |
522 | en->lastuse = jiffies; | ||
523 | 499 | ||
500 | /* If we fail to create a cache entry, we'll just use the valid dest */ | ||
501 | write_lock(&svc->sched_lock); | ||
502 | ip_vs_lblc_new(tbl, iph->daddr, dest); | ||
503 | write_unlock(&svc->sched_lock); | ||
504 | |||
505 | out: | ||
524 | IP_VS_DBG(6, "LBLC: destination IP address %u.%u.%u.%u " | 506 | IP_VS_DBG(6, "LBLC: destination IP address %u.%u.%u.%u " |
525 | "--> server %u.%u.%u.%u:%d\n", | 507 | "--> server %u.%u.%u.%u:%d\n", |
526 | NIPQUAD(en->addr), | 508 | NIPQUAD(iph->daddr), |
527 | NIPQUAD(dest->addr), | 509 | NIPQUAD(dest->addr.ip), |
528 | ntohs(dest->port)); | 510 | ntohs(dest->port)); |
529 | 511 | ||
530 | return dest; | 512 | return dest; |
@@ -540,9 +522,11 @@ static struct ip_vs_scheduler ip_vs_lblc_scheduler = | |||
540 | .refcnt = ATOMIC_INIT(0), | 522 | .refcnt = ATOMIC_INIT(0), |
541 | .module = THIS_MODULE, | 523 | .module = THIS_MODULE, |
542 | .n_list = LIST_HEAD_INIT(ip_vs_lblc_scheduler.n_list), | 524 | .n_list = LIST_HEAD_INIT(ip_vs_lblc_scheduler.n_list), |
525 | #ifdef CONFIG_IP_VS_IPV6 | ||
526 | .supports_ipv6 = 0, | ||
527 | #endif | ||
543 | .init_service = ip_vs_lblc_init_svc, | 528 | .init_service = ip_vs_lblc_init_svc, |
544 | .done_service = ip_vs_lblc_done_svc, | 529 | .done_service = ip_vs_lblc_done_svc, |
545 | .update_service = ip_vs_lblc_update_svc, | ||
546 | .schedule = ip_vs_lblc_schedule, | 530 | .schedule = ip_vs_lblc_schedule, |
547 | }; | 531 | }; |
548 | 532 | ||
diff --git a/net/ipv4/ipvs/ip_vs_lblcr.c b/net/ipv4/ipvs/ip_vs_lblcr.c index c234e73968a6..1f75ea83bcf8 100644 --- a/net/ipv4/ipvs/ip_vs_lblcr.c +++ b/net/ipv4/ipvs/ip_vs_lblcr.c | |||
@@ -106,7 +106,7 @@ ip_vs_dest_set_insert(struct ip_vs_dest_set *set, struct ip_vs_dest *dest) | |||
106 | return NULL; | 106 | return NULL; |
107 | } | 107 | } |
108 | 108 | ||
109 | e = kmalloc(sizeof(struct ip_vs_dest_list), GFP_ATOMIC); | 109 | e = kmalloc(sizeof(*e), GFP_ATOMIC); |
110 | if (e == NULL) { | 110 | if (e == NULL) { |
111 | IP_VS_ERR("ip_vs_dest_set_insert(): no memory\n"); | 111 | IP_VS_ERR("ip_vs_dest_set_insert(): no memory\n"); |
112 | return NULL; | 112 | return NULL; |
@@ -116,11 +116,9 @@ ip_vs_dest_set_insert(struct ip_vs_dest_set *set, struct ip_vs_dest *dest) | |||
116 | e->dest = dest; | 116 | e->dest = dest; |
117 | 117 | ||
118 | /* link it to the list */ | 118 | /* link it to the list */ |
119 | write_lock(&set->lock); | ||
120 | e->next = set->list; | 119 | e->next = set->list; |
121 | set->list = e; | 120 | set->list = e; |
122 | atomic_inc(&set->size); | 121 | atomic_inc(&set->size); |
123 | write_unlock(&set->lock); | ||
124 | 122 | ||
125 | set->lastmod = jiffies; | 123 | set->lastmod = jiffies; |
126 | return e; | 124 | return e; |
@@ -131,7 +129,6 @@ ip_vs_dest_set_erase(struct ip_vs_dest_set *set, struct ip_vs_dest *dest) | |||
131 | { | 129 | { |
132 | struct ip_vs_dest_list *e, **ep; | 130 | struct ip_vs_dest_list *e, **ep; |
133 | 131 | ||
134 | write_lock(&set->lock); | ||
135 | for (ep=&set->list, e=*ep; e!=NULL; e=*ep) { | 132 | for (ep=&set->list, e=*ep; e!=NULL; e=*ep) { |
136 | if (e->dest == dest) { | 133 | if (e->dest == dest) { |
137 | /* HIT */ | 134 | /* HIT */ |
@@ -144,7 +141,6 @@ ip_vs_dest_set_erase(struct ip_vs_dest_set *set, struct ip_vs_dest *dest) | |||
144 | } | 141 | } |
145 | ep = &e->next; | 142 | ep = &e->next; |
146 | } | 143 | } |
147 | write_unlock(&set->lock); | ||
148 | } | 144 | } |
149 | 145 | ||
150 | static void ip_vs_dest_set_eraseall(struct ip_vs_dest_set *set) | 146 | static void ip_vs_dest_set_eraseall(struct ip_vs_dest_set *set) |
@@ -174,7 +170,6 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set) | |||
174 | if (set == NULL) | 170 | if (set == NULL) |
175 | return NULL; | 171 | return NULL; |
176 | 172 | ||
177 | read_lock(&set->lock); | ||
178 | /* select the first destination server, whose weight > 0 */ | 173 | /* select the first destination server, whose weight > 0 */ |
179 | for (e=set->list; e!=NULL; e=e->next) { | 174 | for (e=set->list; e!=NULL; e=e->next) { |
180 | least = e->dest; | 175 | least = e->dest; |
@@ -188,7 +183,6 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set) | |||
188 | goto nextstage; | 183 | goto nextstage; |
189 | } | 184 | } |
190 | } | 185 | } |
191 | read_unlock(&set->lock); | ||
192 | return NULL; | 186 | return NULL; |
193 | 187 | ||
194 | /* find the destination with the weighted least load */ | 188 | /* find the destination with the weighted least load */ |
@@ -207,11 +201,10 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set) | |||
207 | loh = doh; | 201 | loh = doh; |
208 | } | 202 | } |
209 | } | 203 | } |
210 | read_unlock(&set->lock); | ||
211 | 204 | ||
212 | IP_VS_DBG(6, "ip_vs_dest_set_min: server %d.%d.%d.%d:%d " | 205 | IP_VS_DBG(6, "ip_vs_dest_set_min: server %d.%d.%d.%d:%d " |
213 | "activeconns %d refcnt %d weight %d overhead %d\n", | 206 | "activeconns %d refcnt %d weight %d overhead %d\n", |
214 | NIPQUAD(least->addr), ntohs(least->port), | 207 | NIPQUAD(least->addr.ip), ntohs(least->port), |
215 | atomic_read(&least->activeconns), | 208 | atomic_read(&least->activeconns), |
216 | atomic_read(&least->refcnt), | 209 | atomic_read(&least->refcnt), |
217 | atomic_read(&least->weight), loh); | 210 | atomic_read(&least->weight), loh); |
@@ -229,7 +222,6 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set) | |||
229 | if (set == NULL) | 222 | if (set == NULL) |
230 | return NULL; | 223 | return NULL; |
231 | 224 | ||
232 | read_lock(&set->lock); | ||
233 | /* select the first destination server, whose weight > 0 */ | 225 | /* select the first destination server, whose weight > 0 */ |
234 | for (e=set->list; e!=NULL; e=e->next) { | 226 | for (e=set->list; e!=NULL; e=e->next) { |
235 | most = e->dest; | 227 | most = e->dest; |
@@ -239,7 +231,6 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set) | |||
239 | goto nextstage; | 231 | goto nextstage; |
240 | } | 232 | } |
241 | } | 233 | } |
242 | read_unlock(&set->lock); | ||
243 | return NULL; | 234 | return NULL; |
244 | 235 | ||
245 | /* find the destination with the weighted most load */ | 236 | /* find the destination with the weighted most load */ |
@@ -256,11 +247,10 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set) | |||
256 | moh = doh; | 247 | moh = doh; |
257 | } | 248 | } |
258 | } | 249 | } |
259 | read_unlock(&set->lock); | ||
260 | 250 | ||
261 | IP_VS_DBG(6, "ip_vs_dest_set_max: server %d.%d.%d.%d:%d " | 251 | IP_VS_DBG(6, "ip_vs_dest_set_max: server %d.%d.%d.%d:%d " |
262 | "activeconns %d refcnt %d weight %d overhead %d\n", | 252 | "activeconns %d refcnt %d weight %d overhead %d\n", |
263 | NIPQUAD(most->addr), ntohs(most->port), | 253 | NIPQUAD(most->addr.ip), ntohs(most->port), |
264 | atomic_read(&most->activeconns), | 254 | atomic_read(&most->activeconns), |
265 | atomic_read(&most->refcnt), | 255 | atomic_read(&most->refcnt), |
266 | atomic_read(&most->weight), moh); | 256 | atomic_read(&most->weight), moh); |
@@ -284,7 +274,6 @@ struct ip_vs_lblcr_entry { | |||
284 | * IPVS lblcr hash table | 274 | * IPVS lblcr hash table |
285 | */ | 275 | */ |
286 | struct ip_vs_lblcr_table { | 276 | struct ip_vs_lblcr_table { |
287 | rwlock_t lock; /* lock for this table */ | ||
288 | struct list_head bucket[IP_VS_LBLCR_TAB_SIZE]; /* hash bucket */ | 277 | struct list_head bucket[IP_VS_LBLCR_TAB_SIZE]; /* hash bucket */ |
289 | atomic_t entries; /* number of entries */ | 278 | atomic_t entries; /* number of entries */ |
290 | int max_size; /* maximum size of entries */ | 279 | int max_size; /* maximum size of entries */ |
@@ -311,32 +300,6 @@ static ctl_table vs_vars_table[] = { | |||
311 | 300 | ||
312 | static struct ctl_table_header * sysctl_header; | 301 | static struct ctl_table_header * sysctl_header; |
313 | 302 | ||
314 | /* | ||
315 | * new/free a ip_vs_lblcr_entry, which is a mapping of a destination | ||
316 | * IP address to a server. | ||
317 | */ | ||
318 | static inline struct ip_vs_lblcr_entry *ip_vs_lblcr_new(__be32 daddr) | ||
319 | { | ||
320 | struct ip_vs_lblcr_entry *en; | ||
321 | |||
322 | en = kmalloc(sizeof(struct ip_vs_lblcr_entry), GFP_ATOMIC); | ||
323 | if (en == NULL) { | ||
324 | IP_VS_ERR("ip_vs_lblcr_new(): no memory\n"); | ||
325 | return NULL; | ||
326 | } | ||
327 | |||
328 | INIT_LIST_HEAD(&en->list); | ||
329 | en->addr = daddr; | ||
330 | |||
331 | /* initilize its dest set */ | ||
332 | atomic_set(&(en->set.size), 0); | ||
333 | en->set.list = NULL; | ||
334 | rwlock_init(&en->set.lock); | ||
335 | |||
336 | return en; | ||
337 | } | ||
338 | |||
339 | |||
340 | static inline void ip_vs_lblcr_free(struct ip_vs_lblcr_entry *en) | 303 | static inline void ip_vs_lblcr_free(struct ip_vs_lblcr_entry *en) |
341 | { | 304 | { |
342 | list_del(&en->list); | 305 | list_del(&en->list); |
@@ -358,55 +321,68 @@ static inline unsigned ip_vs_lblcr_hashkey(__be32 addr) | |||
358 | * Hash an entry in the ip_vs_lblcr_table. | 321 | * Hash an entry in the ip_vs_lblcr_table. |
359 | * returns bool success. | 322 | * returns bool success. |
360 | */ | 323 | */ |
361 | static int | 324 | static void |
362 | ip_vs_lblcr_hash(struct ip_vs_lblcr_table *tbl, struct ip_vs_lblcr_entry *en) | 325 | ip_vs_lblcr_hash(struct ip_vs_lblcr_table *tbl, struct ip_vs_lblcr_entry *en) |
363 | { | 326 | { |
364 | unsigned hash; | 327 | unsigned hash = ip_vs_lblcr_hashkey(en->addr); |
365 | 328 | ||
366 | if (!list_empty(&en->list)) { | ||
367 | IP_VS_ERR("ip_vs_lblcr_hash(): request for already hashed, " | ||
368 | "called from %p\n", __builtin_return_address(0)); | ||
369 | return 0; | ||
370 | } | ||
371 | |||
372 | /* | ||
373 | * Hash by destination IP address | ||
374 | */ | ||
375 | hash = ip_vs_lblcr_hashkey(en->addr); | ||
376 | |||
377 | write_lock(&tbl->lock); | ||
378 | list_add(&en->list, &tbl->bucket[hash]); | 329 | list_add(&en->list, &tbl->bucket[hash]); |
379 | atomic_inc(&tbl->entries); | 330 | atomic_inc(&tbl->entries); |
380 | write_unlock(&tbl->lock); | ||
381 | |||
382 | return 1; | ||
383 | } | 331 | } |
384 | 332 | ||
385 | 333 | ||
386 | /* | 334 | /* |
387 | * Get ip_vs_lblcr_entry associated with supplied parameters. | 335 | * Get ip_vs_lblcr_entry associated with supplied parameters. Called under |
336 | * read lock. | ||
388 | */ | 337 | */ |
389 | static inline struct ip_vs_lblcr_entry * | 338 | static inline struct ip_vs_lblcr_entry * |
390 | ip_vs_lblcr_get(struct ip_vs_lblcr_table *tbl, __be32 addr) | 339 | ip_vs_lblcr_get(struct ip_vs_lblcr_table *tbl, __be32 addr) |
391 | { | 340 | { |
392 | unsigned hash; | 341 | unsigned hash = ip_vs_lblcr_hashkey(addr); |
393 | struct ip_vs_lblcr_entry *en; | 342 | struct ip_vs_lblcr_entry *en; |
394 | 343 | ||
395 | hash = ip_vs_lblcr_hashkey(addr); | 344 | list_for_each_entry(en, &tbl->bucket[hash], list) |
345 | if (en->addr == addr) | ||
346 | return en; | ||
396 | 347 | ||
397 | read_lock(&tbl->lock); | 348 | return NULL; |
349 | } | ||
398 | 350 | ||
399 | list_for_each_entry(en, &tbl->bucket[hash], list) { | 351 | |
400 | if (en->addr == addr) { | 352 | /* |
401 | /* HIT */ | 353 | * Create or update an ip_vs_lblcr_entry, which is a mapping of a destination |
402 | read_unlock(&tbl->lock); | 354 | * IP address to a server. Called under write lock. |
403 | return en; | 355 | */ |
356 | static inline struct ip_vs_lblcr_entry * | ||
357 | ip_vs_lblcr_new(struct ip_vs_lblcr_table *tbl, __be32 daddr, | ||
358 | struct ip_vs_dest *dest) | ||
359 | { | ||
360 | struct ip_vs_lblcr_entry *en; | ||
361 | |||
362 | en = ip_vs_lblcr_get(tbl, daddr); | ||
363 | if (!en) { | ||
364 | en = kmalloc(sizeof(*en), GFP_ATOMIC); | ||
365 | if (!en) { | ||
366 | IP_VS_ERR("ip_vs_lblcr_new(): no memory\n"); | ||
367 | return NULL; | ||
404 | } | 368 | } |
369 | |||
370 | en->addr = daddr; | ||
371 | en->lastuse = jiffies; | ||
372 | |||
373 | /* initilize its dest set */ | ||
374 | atomic_set(&(en->set.size), 0); | ||
375 | en->set.list = NULL; | ||
376 | rwlock_init(&en->set.lock); | ||
377 | |||
378 | ip_vs_lblcr_hash(tbl, en); | ||
405 | } | 379 | } |
406 | 380 | ||
407 | read_unlock(&tbl->lock); | 381 | write_lock(&en->set.lock); |
382 | ip_vs_dest_set_insert(&en->set, dest); | ||
383 | write_unlock(&en->set.lock); | ||
408 | 384 | ||
409 | return NULL; | 385 | return en; |
410 | } | 386 | } |
411 | 387 | ||
412 | 388 | ||
@@ -418,19 +394,18 @@ static void ip_vs_lblcr_flush(struct ip_vs_lblcr_table *tbl) | |||
418 | int i; | 394 | int i; |
419 | struct ip_vs_lblcr_entry *en, *nxt; | 395 | struct ip_vs_lblcr_entry *en, *nxt; |
420 | 396 | ||
397 | /* No locking required, only called during cleanup. */ | ||
421 | for (i=0; i<IP_VS_LBLCR_TAB_SIZE; i++) { | 398 | for (i=0; i<IP_VS_LBLCR_TAB_SIZE; i++) { |
422 | write_lock(&tbl->lock); | ||
423 | list_for_each_entry_safe(en, nxt, &tbl->bucket[i], list) { | 399 | list_for_each_entry_safe(en, nxt, &tbl->bucket[i], list) { |
424 | ip_vs_lblcr_free(en); | 400 | ip_vs_lblcr_free(en); |
425 | atomic_dec(&tbl->entries); | ||
426 | } | 401 | } |
427 | write_unlock(&tbl->lock); | ||
428 | } | 402 | } |
429 | } | 403 | } |
430 | 404 | ||
431 | 405 | ||
432 | static inline void ip_vs_lblcr_full_check(struct ip_vs_lblcr_table *tbl) | 406 | static inline void ip_vs_lblcr_full_check(struct ip_vs_service *svc) |
433 | { | 407 | { |
408 | struct ip_vs_lblcr_table *tbl = svc->sched_data; | ||
434 | unsigned long now = jiffies; | 409 | unsigned long now = jiffies; |
435 | int i, j; | 410 | int i, j; |
436 | struct ip_vs_lblcr_entry *en, *nxt; | 411 | struct ip_vs_lblcr_entry *en, *nxt; |
@@ -438,7 +413,7 @@ static inline void ip_vs_lblcr_full_check(struct ip_vs_lblcr_table *tbl) | |||
438 | for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) { | 413 | for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) { |
439 | j = (j + 1) & IP_VS_LBLCR_TAB_MASK; | 414 | j = (j + 1) & IP_VS_LBLCR_TAB_MASK; |
440 | 415 | ||
441 | write_lock(&tbl->lock); | 416 | write_lock(&svc->sched_lock); |
442 | list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) { | 417 | list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) { |
443 | if (time_after(en->lastuse+sysctl_ip_vs_lblcr_expiration, | 418 | if (time_after(en->lastuse+sysctl_ip_vs_lblcr_expiration, |
444 | now)) | 419 | now)) |
@@ -447,7 +422,7 @@ static inline void ip_vs_lblcr_full_check(struct ip_vs_lblcr_table *tbl) | |||
447 | ip_vs_lblcr_free(en); | 422 | ip_vs_lblcr_free(en); |
448 | atomic_dec(&tbl->entries); | 423 | atomic_dec(&tbl->entries); |
449 | } | 424 | } |
450 | write_unlock(&tbl->lock); | 425 | write_unlock(&svc->sched_lock); |
451 | } | 426 | } |
452 | tbl->rover = j; | 427 | tbl->rover = j; |
453 | } | 428 | } |
@@ -466,17 +441,16 @@ static inline void ip_vs_lblcr_full_check(struct ip_vs_lblcr_table *tbl) | |||
466 | */ | 441 | */ |
467 | static void ip_vs_lblcr_check_expire(unsigned long data) | 442 | static void ip_vs_lblcr_check_expire(unsigned long data) |
468 | { | 443 | { |
469 | struct ip_vs_lblcr_table *tbl; | 444 | struct ip_vs_service *svc = (struct ip_vs_service *) data; |
445 | struct ip_vs_lblcr_table *tbl = svc->sched_data; | ||
470 | unsigned long now = jiffies; | 446 | unsigned long now = jiffies; |
471 | int goal; | 447 | int goal; |
472 | int i, j; | 448 | int i, j; |
473 | struct ip_vs_lblcr_entry *en, *nxt; | 449 | struct ip_vs_lblcr_entry *en, *nxt; |
474 | 450 | ||
475 | tbl = (struct ip_vs_lblcr_table *)data; | ||
476 | |||
477 | if ((tbl->counter % COUNT_FOR_FULL_EXPIRATION) == 0) { | 451 | if ((tbl->counter % COUNT_FOR_FULL_EXPIRATION) == 0) { |
478 | /* do full expiration check */ | 452 | /* do full expiration check */ |
479 | ip_vs_lblcr_full_check(tbl); | 453 | ip_vs_lblcr_full_check(svc); |
480 | tbl->counter = 1; | 454 | tbl->counter = 1; |
481 | goto out; | 455 | goto out; |
482 | } | 456 | } |
@@ -493,7 +467,7 @@ static void ip_vs_lblcr_check_expire(unsigned long data) | |||
493 | for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) { | 467 | for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) { |
494 | j = (j + 1) & IP_VS_LBLCR_TAB_MASK; | 468 | j = (j + 1) & IP_VS_LBLCR_TAB_MASK; |
495 | 469 | ||
496 | write_lock(&tbl->lock); | 470 | write_lock(&svc->sched_lock); |
497 | list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) { | 471 | list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) { |
498 | if (time_before(now, en->lastuse+ENTRY_TIMEOUT)) | 472 | if (time_before(now, en->lastuse+ENTRY_TIMEOUT)) |
499 | continue; | 473 | continue; |
@@ -502,7 +476,7 @@ static void ip_vs_lblcr_check_expire(unsigned long data) | |||
502 | atomic_dec(&tbl->entries); | 476 | atomic_dec(&tbl->entries); |
503 | goal--; | 477 | goal--; |
504 | } | 478 | } |
505 | write_unlock(&tbl->lock); | 479 | write_unlock(&svc->sched_lock); |
506 | if (goal <= 0) | 480 | if (goal <= 0) |
507 | break; | 481 | break; |
508 | } | 482 | } |
@@ -520,15 +494,14 @@ static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc) | |||
520 | /* | 494 | /* |
521 | * Allocate the ip_vs_lblcr_table for this service | 495 | * Allocate the ip_vs_lblcr_table for this service |
522 | */ | 496 | */ |
523 | tbl = kmalloc(sizeof(struct ip_vs_lblcr_table), GFP_ATOMIC); | 497 | tbl = kmalloc(sizeof(*tbl), GFP_ATOMIC); |
524 | if (tbl == NULL) { | 498 | if (tbl == NULL) { |
525 | IP_VS_ERR("ip_vs_lblcr_init_svc(): no memory\n"); | 499 | IP_VS_ERR("ip_vs_lblcr_init_svc(): no memory\n"); |
526 | return -ENOMEM; | 500 | return -ENOMEM; |
527 | } | 501 | } |
528 | svc->sched_data = tbl; | 502 | svc->sched_data = tbl; |
529 | IP_VS_DBG(6, "LBLCR hash table (memory=%Zdbytes) allocated for " | 503 | IP_VS_DBG(6, "LBLCR hash table (memory=%Zdbytes) allocated for " |
530 | "current service\n", | 504 | "current service\n", sizeof(*tbl)); |
531 | sizeof(struct ip_vs_lblcr_table)); | ||
532 | 505 | ||
533 | /* | 506 | /* |
534 | * Initialize the hash buckets | 507 | * Initialize the hash buckets |
@@ -536,7 +509,6 @@ static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc) | |||
536 | for (i=0; i<IP_VS_LBLCR_TAB_SIZE; i++) { | 509 | for (i=0; i<IP_VS_LBLCR_TAB_SIZE; i++) { |
537 | INIT_LIST_HEAD(&tbl->bucket[i]); | 510 | INIT_LIST_HEAD(&tbl->bucket[i]); |
538 | } | 511 | } |
539 | rwlock_init(&tbl->lock); | ||
540 | tbl->max_size = IP_VS_LBLCR_TAB_SIZE*16; | 512 | tbl->max_size = IP_VS_LBLCR_TAB_SIZE*16; |
541 | tbl->rover = 0; | 513 | tbl->rover = 0; |
542 | tbl->counter = 1; | 514 | tbl->counter = 1; |
@@ -545,9 +517,8 @@ static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc) | |||
545 | * Hook periodic timer for garbage collection | 517 | * Hook periodic timer for garbage collection |
546 | */ | 518 | */ |
547 | setup_timer(&tbl->periodic_timer, ip_vs_lblcr_check_expire, | 519 | setup_timer(&tbl->periodic_timer, ip_vs_lblcr_check_expire, |
548 | (unsigned long)tbl); | 520 | (unsigned long)svc); |
549 | tbl->periodic_timer.expires = jiffies+CHECK_EXPIRE_INTERVAL; | 521 | mod_timer(&tbl->periodic_timer, jiffies + CHECK_EXPIRE_INTERVAL); |
550 | add_timer(&tbl->periodic_timer); | ||
551 | 522 | ||
552 | return 0; | 523 | return 0; |
553 | } | 524 | } |
@@ -564,22 +535,16 @@ static int ip_vs_lblcr_done_svc(struct ip_vs_service *svc) | |||
564 | ip_vs_lblcr_flush(tbl); | 535 | ip_vs_lblcr_flush(tbl); |
565 | 536 | ||
566 | /* release the table itself */ | 537 | /* release the table itself */ |
567 | kfree(svc->sched_data); | 538 | kfree(tbl); |
568 | IP_VS_DBG(6, "LBLCR hash table (memory=%Zdbytes) released\n", | 539 | IP_VS_DBG(6, "LBLCR hash table (memory=%Zdbytes) released\n", |
569 | sizeof(struct ip_vs_lblcr_table)); | 540 | sizeof(*tbl)); |
570 | 541 | ||
571 | return 0; | 542 | return 0; |
572 | } | 543 | } |
573 | 544 | ||
574 | 545 | ||
575 | static int ip_vs_lblcr_update_svc(struct ip_vs_service *svc) | ||
576 | { | ||
577 | return 0; | ||
578 | } | ||
579 | |||
580 | |||
581 | static inline struct ip_vs_dest * | 546 | static inline struct ip_vs_dest * |
582 | __ip_vs_wlc_schedule(struct ip_vs_service *svc, struct iphdr *iph) | 547 | __ip_vs_lblcr_schedule(struct ip_vs_service *svc, struct iphdr *iph) |
583 | { | 548 | { |
584 | struct ip_vs_dest *dest, *least; | 549 | struct ip_vs_dest *dest, *least; |
585 | int loh, doh; | 550 | int loh, doh; |
@@ -633,7 +598,7 @@ __ip_vs_wlc_schedule(struct ip_vs_service *svc, struct iphdr *iph) | |||
633 | 598 | ||
634 | IP_VS_DBG(6, "LBLCR: server %d.%d.%d.%d:%d " | 599 | IP_VS_DBG(6, "LBLCR: server %d.%d.%d.%d:%d " |
635 | "activeconns %d refcnt %d weight %d overhead %d\n", | 600 | "activeconns %d refcnt %d weight %d overhead %d\n", |
636 | NIPQUAD(least->addr), ntohs(least->port), | 601 | NIPQUAD(least->addr.ip), ntohs(least->port), |
637 | atomic_read(&least->activeconns), | 602 | atomic_read(&least->activeconns), |
638 | atomic_read(&least->refcnt), | 603 | atomic_read(&least->refcnt), |
639 | atomic_read(&least->weight), loh); | 604 | atomic_read(&least->weight), loh); |
@@ -669,51 +634,79 @@ is_overloaded(struct ip_vs_dest *dest, struct ip_vs_service *svc) | |||
669 | static struct ip_vs_dest * | 634 | static struct ip_vs_dest * |
670 | ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) | 635 | ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) |
671 | { | 636 | { |
672 | struct ip_vs_dest *dest; | 637 | struct ip_vs_lblcr_table *tbl = svc->sched_data; |
673 | struct ip_vs_lblcr_table *tbl; | ||
674 | struct ip_vs_lblcr_entry *en; | ||
675 | struct iphdr *iph = ip_hdr(skb); | 638 | struct iphdr *iph = ip_hdr(skb); |
639 | struct ip_vs_dest *dest = NULL; | ||
640 | struct ip_vs_lblcr_entry *en; | ||
676 | 641 | ||
677 | IP_VS_DBG(6, "ip_vs_lblcr_schedule(): Scheduling...\n"); | 642 | IP_VS_DBG(6, "ip_vs_lblcr_schedule(): Scheduling...\n"); |
678 | 643 | ||
679 | tbl = (struct ip_vs_lblcr_table *)svc->sched_data; | 644 | /* First look in our cache */ |
645 | read_lock(&svc->sched_lock); | ||
680 | en = ip_vs_lblcr_get(tbl, iph->daddr); | 646 | en = ip_vs_lblcr_get(tbl, iph->daddr); |
681 | if (en == NULL) { | 647 | if (en) { |
682 | dest = __ip_vs_wlc_schedule(svc, iph); | 648 | /* We only hold a read lock, but this is atomic */ |
683 | if (dest == NULL) { | 649 | en->lastuse = jiffies; |
684 | IP_VS_DBG(1, "no destination available\n"); | 650 | |
685 | return NULL; | 651 | /* Get the least loaded destination */ |
686 | } | 652 | read_lock(&en->set.lock); |
687 | en = ip_vs_lblcr_new(iph->daddr); | ||
688 | if (en == NULL) { | ||
689 | return NULL; | ||
690 | } | ||
691 | ip_vs_dest_set_insert(&en->set, dest); | ||
692 | ip_vs_lblcr_hash(tbl, en); | ||
693 | } else { | ||
694 | dest = ip_vs_dest_set_min(&en->set); | 653 | dest = ip_vs_dest_set_min(&en->set); |
695 | if (!dest || is_overloaded(dest, svc)) { | 654 | read_unlock(&en->set.lock); |
696 | dest = __ip_vs_wlc_schedule(svc, iph); | 655 | |
697 | if (dest == NULL) { | 656 | /* More than one destination + enough time passed by, cleanup */ |
698 | IP_VS_DBG(1, "no destination available\n"); | ||
699 | return NULL; | ||
700 | } | ||
701 | ip_vs_dest_set_insert(&en->set, dest); | ||
702 | } | ||
703 | if (atomic_read(&en->set.size) > 1 && | 657 | if (atomic_read(&en->set.size) > 1 && |
704 | jiffies-en->set.lastmod > sysctl_ip_vs_lblcr_expiration) { | 658 | time_after(jiffies, en->set.lastmod + |
659 | sysctl_ip_vs_lblcr_expiration)) { | ||
705 | struct ip_vs_dest *m; | 660 | struct ip_vs_dest *m; |
661 | |||
662 | write_lock(&en->set.lock); | ||
706 | m = ip_vs_dest_set_max(&en->set); | 663 | m = ip_vs_dest_set_max(&en->set); |
707 | if (m) | 664 | if (m) |
708 | ip_vs_dest_set_erase(&en->set, m); | 665 | ip_vs_dest_set_erase(&en->set, m); |
666 | write_unlock(&en->set.lock); | ||
709 | } | 667 | } |
668 | |||
669 | /* If the destination is not overloaded, use it */ | ||
670 | if (dest && !is_overloaded(dest, svc)) { | ||
671 | read_unlock(&svc->sched_lock); | ||
672 | goto out; | ||
673 | } | ||
674 | |||
675 | /* The cache entry is invalid, time to schedule */ | ||
676 | dest = __ip_vs_lblcr_schedule(svc, iph); | ||
677 | if (!dest) { | ||
678 | IP_VS_DBG(1, "no destination available\n"); | ||
679 | read_unlock(&svc->sched_lock); | ||
680 | return NULL; | ||
681 | } | ||
682 | |||
683 | /* Update our cache entry */ | ||
684 | write_lock(&en->set.lock); | ||
685 | ip_vs_dest_set_insert(&en->set, dest); | ||
686 | write_unlock(&en->set.lock); | ||
687 | } | ||
688 | read_unlock(&svc->sched_lock); | ||
689 | |||
690 | if (dest) | ||
691 | goto out; | ||
692 | |||
693 | /* No cache entry, time to schedule */ | ||
694 | dest = __ip_vs_lblcr_schedule(svc, iph); | ||
695 | if (!dest) { | ||
696 | IP_VS_DBG(1, "no destination available\n"); | ||
697 | return NULL; | ||
710 | } | 698 | } |
711 | en->lastuse = jiffies; | ||
712 | 699 | ||
700 | /* If we fail to create a cache entry, we'll just use the valid dest */ | ||
701 | write_lock(&svc->sched_lock); | ||
702 | ip_vs_lblcr_new(tbl, iph->daddr, dest); | ||
703 | write_unlock(&svc->sched_lock); | ||
704 | |||
705 | out: | ||
713 | IP_VS_DBG(6, "LBLCR: destination IP address %u.%u.%u.%u " | 706 | IP_VS_DBG(6, "LBLCR: destination IP address %u.%u.%u.%u " |
714 | "--> server %u.%u.%u.%u:%d\n", | 707 | "--> server %u.%u.%u.%u:%d\n", |
715 | NIPQUAD(en->addr), | 708 | NIPQUAD(iph->daddr), |
716 | NIPQUAD(dest->addr), | 709 | NIPQUAD(dest->addr.ip), |
717 | ntohs(dest->port)); | 710 | ntohs(dest->port)); |
718 | 711 | ||
719 | return dest; | 712 | return dest; |
@@ -729,9 +722,11 @@ static struct ip_vs_scheduler ip_vs_lblcr_scheduler = | |||
729 | .refcnt = ATOMIC_INIT(0), | 722 | .refcnt = ATOMIC_INIT(0), |
730 | .module = THIS_MODULE, | 723 | .module = THIS_MODULE, |
731 | .n_list = LIST_HEAD_INIT(ip_vs_lblcr_scheduler.n_list), | 724 | .n_list = LIST_HEAD_INIT(ip_vs_lblcr_scheduler.n_list), |
725 | #ifdef CONFIG_IP_VS_IPV6 | ||
726 | .supports_ipv6 = 0, | ||
727 | #endif | ||
732 | .init_service = ip_vs_lblcr_init_svc, | 728 | .init_service = ip_vs_lblcr_init_svc, |
733 | .done_service = ip_vs_lblcr_done_svc, | 729 | .done_service = ip_vs_lblcr_done_svc, |
734 | .update_service = ip_vs_lblcr_update_svc, | ||
735 | .schedule = ip_vs_lblcr_schedule, | 730 | .schedule = ip_vs_lblcr_schedule, |
736 | }; | 731 | }; |
737 | 732 | ||
diff --git a/net/ipv4/ipvs/ip_vs_lc.c b/net/ipv4/ipvs/ip_vs_lc.c index ebcdbf75ac65..b69f808ac461 100644 --- a/net/ipv4/ipvs/ip_vs_lc.c +++ b/net/ipv4/ipvs/ip_vs_lc.c | |||
@@ -20,24 +20,6 @@ | |||
20 | #include <net/ip_vs.h> | 20 | #include <net/ip_vs.h> |
21 | 21 | ||
22 | 22 | ||
23 | static int ip_vs_lc_init_svc(struct ip_vs_service *svc) | ||
24 | { | ||
25 | return 0; | ||
26 | } | ||
27 | |||
28 | |||
29 | static int ip_vs_lc_done_svc(struct ip_vs_service *svc) | ||
30 | { | ||
31 | return 0; | ||
32 | } | ||
33 | |||
34 | |||
35 | static int ip_vs_lc_update_svc(struct ip_vs_service *svc) | ||
36 | { | ||
37 | return 0; | ||
38 | } | ||
39 | |||
40 | |||
41 | static inline unsigned int | 23 | static inline unsigned int |
42 | ip_vs_lc_dest_overhead(struct ip_vs_dest *dest) | 24 | ip_vs_lc_dest_overhead(struct ip_vs_dest *dest) |
43 | { | 25 | { |
@@ -85,10 +67,10 @@ ip_vs_lc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) | |||
85 | } | 67 | } |
86 | 68 | ||
87 | if (least) | 69 | if (least) |
88 | IP_VS_DBG(6, "LC: server %u.%u.%u.%u:%u activeconns %d inactconns %d\n", | 70 | IP_VS_DBG_BUF(6, "LC: server %s:%u activeconns %d inactconns %d\n", |
89 | NIPQUAD(least->addr), ntohs(least->port), | 71 | IP_VS_DBG_ADDR(svc->af, &least->addr), ntohs(least->port), |
90 | atomic_read(&least->activeconns), | 72 | atomic_read(&least->activeconns), |
91 | atomic_read(&least->inactconns)); | 73 | atomic_read(&least->inactconns)); |
92 | 74 | ||
93 | return least; | 75 | return least; |
94 | } | 76 | } |
@@ -99,9 +81,9 @@ static struct ip_vs_scheduler ip_vs_lc_scheduler = { | |||
99 | .refcnt = ATOMIC_INIT(0), | 81 | .refcnt = ATOMIC_INIT(0), |
100 | .module = THIS_MODULE, | 82 | .module = THIS_MODULE, |
101 | .n_list = LIST_HEAD_INIT(ip_vs_lc_scheduler.n_list), | 83 | .n_list = LIST_HEAD_INIT(ip_vs_lc_scheduler.n_list), |
102 | .init_service = ip_vs_lc_init_svc, | 84 | #ifdef CONFIG_IP_VS_IPV6 |
103 | .done_service = ip_vs_lc_done_svc, | 85 | .supports_ipv6 = 1, |
104 | .update_service = ip_vs_lc_update_svc, | 86 | #endif |
105 | .schedule = ip_vs_lc_schedule, | 87 | .schedule = ip_vs_lc_schedule, |
106 | }; | 88 | }; |
107 | 89 | ||
diff --git a/net/ipv4/ipvs/ip_vs_nq.c b/net/ipv4/ipvs/ip_vs_nq.c index 92f3a6770031..9a2d8033f08f 100644 --- a/net/ipv4/ipvs/ip_vs_nq.c +++ b/net/ipv4/ipvs/ip_vs_nq.c | |||
@@ -37,27 +37,6 @@ | |||
37 | #include <net/ip_vs.h> | 37 | #include <net/ip_vs.h> |
38 | 38 | ||
39 | 39 | ||
40 | static int | ||
41 | ip_vs_nq_init_svc(struct ip_vs_service *svc) | ||
42 | { | ||
43 | return 0; | ||
44 | } | ||
45 | |||
46 | |||
47 | static int | ||
48 | ip_vs_nq_done_svc(struct ip_vs_service *svc) | ||
49 | { | ||
50 | return 0; | ||
51 | } | ||
52 | |||
53 | |||
54 | static int | ||
55 | ip_vs_nq_update_svc(struct ip_vs_service *svc) | ||
56 | { | ||
57 | return 0; | ||
58 | } | ||
59 | |||
60 | |||
61 | static inline unsigned int | 40 | static inline unsigned int |
62 | ip_vs_nq_dest_overhead(struct ip_vs_dest *dest) | 41 | ip_vs_nq_dest_overhead(struct ip_vs_dest *dest) |
63 | { | 42 | { |
@@ -120,12 +99,12 @@ ip_vs_nq_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) | |||
120 | return NULL; | 99 | return NULL; |
121 | 100 | ||
122 | out: | 101 | out: |
123 | IP_VS_DBG(6, "NQ: server %u.%u.%u.%u:%u " | 102 | IP_VS_DBG_BUF(6, "NQ: server %s:%u " |
124 | "activeconns %d refcnt %d weight %d overhead %d\n", | 103 | "activeconns %d refcnt %d weight %d overhead %d\n", |
125 | NIPQUAD(least->addr), ntohs(least->port), | 104 | IP_VS_DBG_ADDR(svc->af, &least->addr), ntohs(least->port), |
126 | atomic_read(&least->activeconns), | 105 | atomic_read(&least->activeconns), |
127 | atomic_read(&least->refcnt), | 106 | atomic_read(&least->refcnt), |
128 | atomic_read(&least->weight), loh); | 107 | atomic_read(&least->weight), loh); |
129 | 108 | ||
130 | return least; | 109 | return least; |
131 | } | 110 | } |
@@ -137,9 +116,9 @@ static struct ip_vs_scheduler ip_vs_nq_scheduler = | |||
137 | .refcnt = ATOMIC_INIT(0), | 116 | .refcnt = ATOMIC_INIT(0), |
138 | .module = THIS_MODULE, | 117 | .module = THIS_MODULE, |
139 | .n_list = LIST_HEAD_INIT(ip_vs_nq_scheduler.n_list), | 118 | .n_list = LIST_HEAD_INIT(ip_vs_nq_scheduler.n_list), |
140 | .init_service = ip_vs_nq_init_svc, | 119 | #ifdef CONFIG_IP_VS_IPV6 |
141 | .done_service = ip_vs_nq_done_svc, | 120 | .supports_ipv6 = 1, |
142 | .update_service = ip_vs_nq_update_svc, | 121 | #endif |
143 | .schedule = ip_vs_nq_schedule, | 122 | .schedule = ip_vs_nq_schedule, |
144 | }; | 123 | }; |
145 | 124 | ||
diff --git a/net/ipv4/ipvs/ip_vs_proto.c b/net/ipv4/ipvs/ip_vs_proto.c index 6099a88fc200..0791f9e08feb 100644 --- a/net/ipv4/ipvs/ip_vs_proto.c +++ b/net/ipv4/ipvs/ip_vs_proto.c | |||
@@ -151,11 +151,11 @@ const char * ip_vs_state_name(__u16 proto, int state) | |||
151 | } | 151 | } |
152 | 152 | ||
153 | 153 | ||
154 | void | 154 | static void |
155 | ip_vs_tcpudp_debug_packet(struct ip_vs_protocol *pp, | 155 | ip_vs_tcpudp_debug_packet_v4(struct ip_vs_protocol *pp, |
156 | const struct sk_buff *skb, | 156 | const struct sk_buff *skb, |
157 | int offset, | 157 | int offset, |
158 | const char *msg) | 158 | const char *msg) |
159 | { | 159 | { |
160 | char buf[128]; | 160 | char buf[128]; |
161 | struct iphdr _iph, *ih; | 161 | struct iphdr _iph, *ih; |
@@ -189,6 +189,61 @@ ip_vs_tcpudp_debug_packet(struct ip_vs_protocol *pp, | |||
189 | printk(KERN_DEBUG "IPVS: %s: %s\n", msg, buf); | 189 | printk(KERN_DEBUG "IPVS: %s: %s\n", msg, buf); |
190 | } | 190 | } |
191 | 191 | ||
192 | #ifdef CONFIG_IP_VS_IPV6 | ||
193 | static void | ||
194 | ip_vs_tcpudp_debug_packet_v6(struct ip_vs_protocol *pp, | ||
195 | const struct sk_buff *skb, | ||
196 | int offset, | ||
197 | const char *msg) | ||
198 | { | ||
199 | char buf[192]; | ||
200 | struct ipv6hdr _iph, *ih; | ||
201 | |||
202 | ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph); | ||
203 | if (ih == NULL) | ||
204 | sprintf(buf, "%s TRUNCATED", pp->name); | ||
205 | else if (ih->nexthdr == IPPROTO_FRAGMENT) | ||
206 | sprintf(buf, "%s " NIP6_FMT "->" NIP6_FMT " frag", | ||
207 | pp->name, NIP6(ih->saddr), | ||
208 | NIP6(ih->daddr)); | ||
209 | else { | ||
210 | __be16 _ports[2], *pptr; | ||
211 | |||
212 | pptr = skb_header_pointer(skb, offset + sizeof(struct ipv6hdr), | ||
213 | sizeof(_ports), _ports); | ||
214 | if (pptr == NULL) | ||
215 | sprintf(buf, "%s TRUNCATED " NIP6_FMT "->" NIP6_FMT, | ||
216 | pp->name, | ||
217 | NIP6(ih->saddr), | ||
218 | NIP6(ih->daddr)); | ||
219 | else | ||
220 | sprintf(buf, "%s " NIP6_FMT ":%u->" NIP6_FMT ":%u", | ||
221 | pp->name, | ||
222 | NIP6(ih->saddr), | ||
223 | ntohs(pptr[0]), | ||
224 | NIP6(ih->daddr), | ||
225 | ntohs(pptr[1])); | ||
226 | } | ||
227 | |||
228 | printk(KERN_DEBUG "IPVS: %s: %s\n", msg, buf); | ||
229 | } | ||
230 | #endif | ||
231 | |||
232 | |||
233 | void | ||
234 | ip_vs_tcpudp_debug_packet(struct ip_vs_protocol *pp, | ||
235 | const struct sk_buff *skb, | ||
236 | int offset, | ||
237 | const char *msg) | ||
238 | { | ||
239 | #ifdef CONFIG_IP_VS_IPV6 | ||
240 | if (skb->protocol == htons(ETH_P_IPV6)) | ||
241 | ip_vs_tcpudp_debug_packet_v6(pp, skb, offset, msg); | ||
242 | else | ||
243 | #endif | ||
244 | ip_vs_tcpudp_debug_packet_v4(pp, skb, offset, msg); | ||
245 | } | ||
246 | |||
192 | 247 | ||
193 | int __init ip_vs_protocol_init(void) | 248 | int __init ip_vs_protocol_init(void) |
194 | { | 249 | { |
diff --git a/net/ipv4/ipvs/ip_vs_proto_ah.c b/net/ipv4/ipvs/ip_vs_proto_ah.c deleted file mode 100644 index 73e0ea87c1f5..000000000000 --- a/net/ipv4/ipvs/ip_vs_proto_ah.c +++ /dev/null | |||
@@ -1,178 +0,0 @@ | |||
1 | /* | ||
2 | * ip_vs_proto_ah.c: AH IPSec load balancing support for IPVS | ||
3 | * | ||
4 | * Authors: Julian Anastasov <ja@ssi.bg>, February 2002 | ||
5 | * Wensong Zhang <wensong@linuxvirtualserver.org> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or | ||
8 | * modify it under the terms of the GNU General Public License | ||
9 | * version 2 as published by the Free Software Foundation; | ||
10 | * | ||
11 | */ | ||
12 | |||
13 | #include <linux/in.h> | ||
14 | #include <linux/ip.h> | ||
15 | #include <linux/module.h> | ||
16 | #include <linux/kernel.h> | ||
17 | #include <linux/netfilter.h> | ||
18 | #include <linux/netfilter_ipv4.h> | ||
19 | |||
20 | #include <net/ip_vs.h> | ||
21 | |||
22 | |||
23 | /* TODO: | ||
24 | |||
25 | struct isakmp_hdr { | ||
26 | __u8 icookie[8]; | ||
27 | __u8 rcookie[8]; | ||
28 | __u8 np; | ||
29 | __u8 version; | ||
30 | __u8 xchgtype; | ||
31 | __u8 flags; | ||
32 | __u32 msgid; | ||
33 | __u32 length; | ||
34 | }; | ||
35 | |||
36 | */ | ||
37 | |||
38 | #define PORT_ISAKMP 500 | ||
39 | |||
40 | |||
41 | static struct ip_vs_conn * | ||
42 | ah_conn_in_get(const struct sk_buff *skb, | ||
43 | struct ip_vs_protocol *pp, | ||
44 | const struct iphdr *iph, | ||
45 | unsigned int proto_off, | ||
46 | int inverse) | ||
47 | { | ||
48 | struct ip_vs_conn *cp; | ||
49 | |||
50 | if (likely(!inverse)) { | ||
51 | cp = ip_vs_conn_in_get(IPPROTO_UDP, | ||
52 | iph->saddr, | ||
53 | htons(PORT_ISAKMP), | ||
54 | iph->daddr, | ||
55 | htons(PORT_ISAKMP)); | ||
56 | } else { | ||
57 | cp = ip_vs_conn_in_get(IPPROTO_UDP, | ||
58 | iph->daddr, | ||
59 | htons(PORT_ISAKMP), | ||
60 | iph->saddr, | ||
61 | htons(PORT_ISAKMP)); | ||
62 | } | ||
63 | |||
64 | if (!cp) { | ||
65 | /* | ||
66 | * We are not sure if the packet is from our | ||
67 | * service, so our conn_schedule hook should return NF_ACCEPT | ||
68 | */ | ||
69 | IP_VS_DBG(12, "Unknown ISAKMP entry for outin packet " | ||
70 | "%s%s %u.%u.%u.%u->%u.%u.%u.%u\n", | ||
71 | inverse ? "ICMP+" : "", | ||
72 | pp->name, | ||
73 | NIPQUAD(iph->saddr), | ||
74 | NIPQUAD(iph->daddr)); | ||
75 | } | ||
76 | |||
77 | return cp; | ||
78 | } | ||
79 | |||
80 | |||
81 | static struct ip_vs_conn * | ||
82 | ah_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp, | ||
83 | const struct iphdr *iph, unsigned int proto_off, int inverse) | ||
84 | { | ||
85 | struct ip_vs_conn *cp; | ||
86 | |||
87 | if (likely(!inverse)) { | ||
88 | cp = ip_vs_conn_out_get(IPPROTO_UDP, | ||
89 | iph->saddr, | ||
90 | htons(PORT_ISAKMP), | ||
91 | iph->daddr, | ||
92 | htons(PORT_ISAKMP)); | ||
93 | } else { | ||
94 | cp = ip_vs_conn_out_get(IPPROTO_UDP, | ||
95 | iph->daddr, | ||
96 | htons(PORT_ISAKMP), | ||
97 | iph->saddr, | ||
98 | htons(PORT_ISAKMP)); | ||
99 | } | ||
100 | |||
101 | if (!cp) { | ||
102 | IP_VS_DBG(12, "Unknown ISAKMP entry for inout packet " | ||
103 | "%s%s %u.%u.%u.%u->%u.%u.%u.%u\n", | ||
104 | inverse ? "ICMP+" : "", | ||
105 | pp->name, | ||
106 | NIPQUAD(iph->saddr), | ||
107 | NIPQUAD(iph->daddr)); | ||
108 | } | ||
109 | |||
110 | return cp; | ||
111 | } | ||
112 | |||
113 | |||
114 | static int | ||
115 | ah_conn_schedule(struct sk_buff *skb, | ||
116 | struct ip_vs_protocol *pp, | ||
117 | int *verdict, struct ip_vs_conn **cpp) | ||
118 | { | ||
119 | /* | ||
120 | * AH is only related traffic. Pass the packet to IP stack. | ||
121 | */ | ||
122 | *verdict = NF_ACCEPT; | ||
123 | return 0; | ||
124 | } | ||
125 | |||
126 | |||
127 | static void | ||
128 | ah_debug_packet(struct ip_vs_protocol *pp, const struct sk_buff *skb, | ||
129 | int offset, const char *msg) | ||
130 | { | ||
131 | char buf[256]; | ||
132 | struct iphdr _iph, *ih; | ||
133 | |||
134 | ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph); | ||
135 | if (ih == NULL) | ||
136 | sprintf(buf, "%s TRUNCATED", pp->name); | ||
137 | else | ||
138 | sprintf(buf, "%s %u.%u.%u.%u->%u.%u.%u.%u", | ||
139 | pp->name, NIPQUAD(ih->saddr), | ||
140 | NIPQUAD(ih->daddr)); | ||
141 | |||
142 | printk(KERN_DEBUG "IPVS: %s: %s\n", msg, buf); | ||
143 | } | ||
144 | |||
145 | |||
146 | static void ah_init(struct ip_vs_protocol *pp) | ||
147 | { | ||
148 | /* nothing to do now */ | ||
149 | } | ||
150 | |||
151 | |||
152 | static void ah_exit(struct ip_vs_protocol *pp) | ||
153 | { | ||
154 | /* nothing to do now */ | ||
155 | } | ||
156 | |||
157 | |||
158 | struct ip_vs_protocol ip_vs_protocol_ah = { | ||
159 | .name = "AH", | ||
160 | .protocol = IPPROTO_AH, | ||
161 | .num_states = 1, | ||
162 | .dont_defrag = 1, | ||
163 | .init = ah_init, | ||
164 | .exit = ah_exit, | ||
165 | .conn_schedule = ah_conn_schedule, | ||
166 | .conn_in_get = ah_conn_in_get, | ||
167 | .conn_out_get = ah_conn_out_get, | ||
168 | .snat_handler = NULL, | ||
169 | .dnat_handler = NULL, | ||
170 | .csum_check = NULL, | ||
171 | .state_transition = NULL, | ||
172 | .register_app = NULL, | ||
173 | .unregister_app = NULL, | ||
174 | .app_conn_bind = NULL, | ||
175 | .debug_packet = ah_debug_packet, | ||
176 | .timeout_change = NULL, /* ISAKMP */ | ||
177 | .set_state_timeout = NULL, | ||
178 | }; | ||
diff --git a/net/ipv4/ipvs/ip_vs_proto_ah_esp.c b/net/ipv4/ipvs/ip_vs_proto_ah_esp.c new file mode 100644 index 000000000000..80ab0c8e5b4a --- /dev/null +++ b/net/ipv4/ipvs/ip_vs_proto_ah_esp.c | |||
@@ -0,0 +1,235 @@ | |||
1 | /* | ||
2 | * ip_vs_proto_ah_esp.c: AH/ESP IPSec load balancing support for IPVS | ||
3 | * | ||
4 | * Authors: Julian Anastasov <ja@ssi.bg>, February 2002 | ||
5 | * Wensong Zhang <wensong@linuxvirtualserver.org> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or | ||
8 | * modify it under the terms of the GNU General Public License | ||
9 | * version 2 as published by the Free Software Foundation; | ||
10 | * | ||
11 | */ | ||
12 | |||
13 | #include <linux/in.h> | ||
14 | #include <linux/ip.h> | ||
15 | #include <linux/module.h> | ||
16 | #include <linux/kernel.h> | ||
17 | #include <linux/netfilter.h> | ||
18 | #include <linux/netfilter_ipv4.h> | ||
19 | |||
20 | #include <net/ip_vs.h> | ||
21 | |||
22 | |||
23 | /* TODO: | ||
24 | |||
25 | struct isakmp_hdr { | ||
26 | __u8 icookie[8]; | ||
27 | __u8 rcookie[8]; | ||
28 | __u8 np; | ||
29 | __u8 version; | ||
30 | __u8 xchgtype; | ||
31 | __u8 flags; | ||
32 | __u32 msgid; | ||
33 | __u32 length; | ||
34 | }; | ||
35 | |||
36 | */ | ||
37 | |||
38 | #define PORT_ISAKMP 500 | ||
39 | |||
40 | |||
41 | static struct ip_vs_conn * | ||
42 | ah_esp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp, | ||
43 | const struct ip_vs_iphdr *iph, unsigned int proto_off, | ||
44 | int inverse) | ||
45 | { | ||
46 | struct ip_vs_conn *cp; | ||
47 | |||
48 | if (likely(!inverse)) { | ||
49 | cp = ip_vs_conn_in_get(af, IPPROTO_UDP, | ||
50 | &iph->saddr, | ||
51 | htons(PORT_ISAKMP), | ||
52 | &iph->daddr, | ||
53 | htons(PORT_ISAKMP)); | ||
54 | } else { | ||
55 | cp = ip_vs_conn_in_get(af, IPPROTO_UDP, | ||
56 | &iph->daddr, | ||
57 | htons(PORT_ISAKMP), | ||
58 | &iph->saddr, | ||
59 | htons(PORT_ISAKMP)); | ||
60 | } | ||
61 | |||
62 | if (!cp) { | ||
63 | /* | ||
64 | * We are not sure if the packet is from our | ||
65 | * service, so our conn_schedule hook should return NF_ACCEPT | ||
66 | */ | ||
67 | IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for outin packet " | ||
68 | "%s%s %s->%s\n", | ||
69 | inverse ? "ICMP+" : "", | ||
70 | pp->name, | ||
71 | IP_VS_DBG_ADDR(af, &iph->saddr), | ||
72 | IP_VS_DBG_ADDR(af, &iph->daddr)); | ||
73 | } | ||
74 | |||
75 | return cp; | ||
76 | } | ||
77 | |||
78 | |||
79 | static struct ip_vs_conn * | ||
80 | ah_esp_conn_out_get(int af, const struct sk_buff *skb, | ||
81 | struct ip_vs_protocol *pp, | ||
82 | const struct ip_vs_iphdr *iph, | ||
83 | unsigned int proto_off, | ||
84 | int inverse) | ||
85 | { | ||
86 | struct ip_vs_conn *cp; | ||
87 | |||
88 | if (likely(!inverse)) { | ||
89 | cp = ip_vs_conn_out_get(af, IPPROTO_UDP, | ||
90 | &iph->saddr, | ||
91 | htons(PORT_ISAKMP), | ||
92 | &iph->daddr, | ||
93 | htons(PORT_ISAKMP)); | ||
94 | } else { | ||
95 | cp = ip_vs_conn_out_get(af, IPPROTO_UDP, | ||
96 | &iph->daddr, | ||
97 | htons(PORT_ISAKMP), | ||
98 | &iph->saddr, | ||
99 | htons(PORT_ISAKMP)); | ||
100 | } | ||
101 | |||
102 | if (!cp) { | ||
103 | IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for inout packet " | ||
104 | "%s%s %s->%s\n", | ||
105 | inverse ? "ICMP+" : "", | ||
106 | pp->name, | ||
107 | IP_VS_DBG_ADDR(af, &iph->saddr), | ||
108 | IP_VS_DBG_ADDR(af, &iph->daddr)); | ||
109 | } | ||
110 | |||
111 | return cp; | ||
112 | } | ||
113 | |||
114 | |||
115 | static int | ||
116 | ah_esp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, | ||
117 | int *verdict, struct ip_vs_conn **cpp) | ||
118 | { | ||
119 | /* | ||
120 | * AH/ESP is only related traffic. Pass the packet to IP stack. | ||
121 | */ | ||
122 | *verdict = NF_ACCEPT; | ||
123 | return 0; | ||
124 | } | ||
125 | |||
126 | |||
127 | static void | ||
128 | ah_esp_debug_packet_v4(struct ip_vs_protocol *pp, const struct sk_buff *skb, | ||
129 | int offset, const char *msg) | ||
130 | { | ||
131 | char buf[256]; | ||
132 | struct iphdr _iph, *ih; | ||
133 | |||
134 | ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph); | ||
135 | if (ih == NULL) | ||
136 | sprintf(buf, "%s TRUNCATED", pp->name); | ||
137 | else | ||
138 | sprintf(buf, "%s %u.%u.%u.%u->%u.%u.%u.%u", | ||
139 | pp->name, NIPQUAD(ih->saddr), | ||
140 | NIPQUAD(ih->daddr)); | ||
141 | |||
142 | printk(KERN_DEBUG "IPVS: %s: %s\n", msg, buf); | ||
143 | } | ||
144 | |||
145 | #ifdef CONFIG_IP_VS_IPV6 | ||
146 | static void | ||
147 | ah_esp_debug_packet_v6(struct ip_vs_protocol *pp, const struct sk_buff *skb, | ||
148 | int offset, const char *msg) | ||
149 | { | ||
150 | char buf[256]; | ||
151 | struct ipv6hdr _iph, *ih; | ||
152 | |||
153 | ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph); | ||
154 | if (ih == NULL) | ||
155 | sprintf(buf, "%s TRUNCATED", pp->name); | ||
156 | else | ||
157 | sprintf(buf, "%s " NIP6_FMT "->" NIP6_FMT, | ||
158 | pp->name, NIP6(ih->saddr), | ||
159 | NIP6(ih->daddr)); | ||
160 | |||
161 | printk(KERN_DEBUG "IPVS: %s: %s\n", msg, buf); | ||
162 | } | ||
163 | #endif | ||
164 | |||
165 | static void | ||
166 | ah_esp_debug_packet(struct ip_vs_protocol *pp, const struct sk_buff *skb, | ||
167 | int offset, const char *msg) | ||
168 | { | ||
169 | #ifdef CONFIG_IP_VS_IPV6 | ||
170 | if (skb->protocol == htons(ETH_P_IPV6)) | ||
171 | ah_esp_debug_packet_v6(pp, skb, offset, msg); | ||
172 | else | ||
173 | #endif | ||
174 | ah_esp_debug_packet_v4(pp, skb, offset, msg); | ||
175 | } | ||
176 | |||
177 | |||
178 | static void ah_esp_init(struct ip_vs_protocol *pp) | ||
179 | { | ||
180 | /* nothing to do now */ | ||
181 | } | ||
182 | |||
183 | |||
184 | static void ah_esp_exit(struct ip_vs_protocol *pp) | ||
185 | { | ||
186 | /* nothing to do now */ | ||
187 | } | ||
188 | |||
189 | |||
190 | #ifdef CONFIG_IP_VS_PROTO_AH | ||
191 | struct ip_vs_protocol ip_vs_protocol_ah = { | ||
192 | .name = "AH", | ||
193 | .protocol = IPPROTO_AH, | ||
194 | .num_states = 1, | ||
195 | .dont_defrag = 1, | ||
196 | .init = ah_esp_init, | ||
197 | .exit = ah_esp_exit, | ||
198 | .conn_schedule = ah_esp_conn_schedule, | ||
199 | .conn_in_get = ah_esp_conn_in_get, | ||
200 | .conn_out_get = ah_esp_conn_out_get, | ||
201 | .snat_handler = NULL, | ||
202 | .dnat_handler = NULL, | ||
203 | .csum_check = NULL, | ||
204 | .state_transition = NULL, | ||
205 | .register_app = NULL, | ||
206 | .unregister_app = NULL, | ||
207 | .app_conn_bind = NULL, | ||
208 | .debug_packet = ah_esp_debug_packet, | ||
209 | .timeout_change = NULL, /* ISAKMP */ | ||
210 | .set_state_timeout = NULL, | ||
211 | }; | ||
212 | #endif | ||
213 | |||
214 | #ifdef CONFIG_IP_VS_PROTO_ESP | ||
215 | struct ip_vs_protocol ip_vs_protocol_esp = { | ||
216 | .name = "ESP", | ||
217 | .protocol = IPPROTO_ESP, | ||
218 | .num_states = 1, | ||
219 | .dont_defrag = 1, | ||
220 | .init = ah_esp_init, | ||
221 | .exit = ah_esp_exit, | ||
222 | .conn_schedule = ah_esp_conn_schedule, | ||
223 | .conn_in_get = ah_esp_conn_in_get, | ||
224 | .conn_out_get = ah_esp_conn_out_get, | ||
225 | .snat_handler = NULL, | ||
226 | .dnat_handler = NULL, | ||
227 | .csum_check = NULL, | ||
228 | .state_transition = NULL, | ||
229 | .register_app = NULL, | ||
230 | .unregister_app = NULL, | ||
231 | .app_conn_bind = NULL, | ||
232 | .debug_packet = ah_esp_debug_packet, | ||
233 | .timeout_change = NULL, /* ISAKMP */ | ||
234 | }; | ||
235 | #endif | ||
diff --git a/net/ipv4/ipvs/ip_vs_proto_esp.c b/net/ipv4/ipvs/ip_vs_proto_esp.c deleted file mode 100644 index 21d70c8ffa54..000000000000 --- a/net/ipv4/ipvs/ip_vs_proto_esp.c +++ /dev/null | |||
@@ -1,176 +0,0 @@ | |||
1 | /* | ||
2 | * ip_vs_proto_esp.c: ESP IPSec load balancing support for IPVS | ||
3 | * | ||
4 | * Authors: Julian Anastasov <ja@ssi.bg>, February 2002 | ||
5 | * Wensong Zhang <wensong@linuxvirtualserver.org> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or | ||
8 | * modify it under the terms of the GNU General Public License | ||
9 | * version 2 as published by the Free Software Foundation; | ||
10 | * | ||
11 | */ | ||
12 | |||
13 | #include <linux/in.h> | ||
14 | #include <linux/ip.h> | ||
15 | #include <linux/module.h> | ||
16 | #include <linux/kernel.h> | ||
17 | #include <linux/netfilter.h> | ||
18 | #include <linux/netfilter_ipv4.h> | ||
19 | |||
20 | #include <net/ip_vs.h> | ||
21 | |||
22 | |||
23 | /* TODO: | ||
24 | |||
25 | struct isakmp_hdr { | ||
26 | __u8 icookie[8]; | ||
27 | __u8 rcookie[8]; | ||
28 | __u8 np; | ||
29 | __u8 version; | ||
30 | __u8 xchgtype; | ||
31 | __u8 flags; | ||
32 | __u32 msgid; | ||
33 | __u32 length; | ||
34 | }; | ||
35 | |||
36 | */ | ||
37 | |||
38 | #define PORT_ISAKMP 500 | ||
39 | |||
40 | |||
41 | static struct ip_vs_conn * | ||
42 | esp_conn_in_get(const struct sk_buff *skb, | ||
43 | struct ip_vs_protocol *pp, | ||
44 | const struct iphdr *iph, | ||
45 | unsigned int proto_off, | ||
46 | int inverse) | ||
47 | { | ||
48 | struct ip_vs_conn *cp; | ||
49 | |||
50 | if (likely(!inverse)) { | ||
51 | cp = ip_vs_conn_in_get(IPPROTO_UDP, | ||
52 | iph->saddr, | ||
53 | htons(PORT_ISAKMP), | ||
54 | iph->daddr, | ||
55 | htons(PORT_ISAKMP)); | ||
56 | } else { | ||
57 | cp = ip_vs_conn_in_get(IPPROTO_UDP, | ||
58 | iph->daddr, | ||
59 | htons(PORT_ISAKMP), | ||
60 | iph->saddr, | ||
61 | htons(PORT_ISAKMP)); | ||
62 | } | ||
63 | |||
64 | if (!cp) { | ||
65 | /* | ||
66 | * We are not sure if the packet is from our | ||
67 | * service, so our conn_schedule hook should return NF_ACCEPT | ||
68 | */ | ||
69 | IP_VS_DBG(12, "Unknown ISAKMP entry for outin packet " | ||
70 | "%s%s %u.%u.%u.%u->%u.%u.%u.%u\n", | ||
71 | inverse ? "ICMP+" : "", | ||
72 | pp->name, | ||
73 | NIPQUAD(iph->saddr), | ||
74 | NIPQUAD(iph->daddr)); | ||
75 | } | ||
76 | |||
77 | return cp; | ||
78 | } | ||
79 | |||
80 | |||
81 | static struct ip_vs_conn * | ||
82 | esp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp, | ||
83 | const struct iphdr *iph, unsigned int proto_off, int inverse) | ||
84 | { | ||
85 | struct ip_vs_conn *cp; | ||
86 | |||
87 | if (likely(!inverse)) { | ||
88 | cp = ip_vs_conn_out_get(IPPROTO_UDP, | ||
89 | iph->saddr, | ||
90 | htons(PORT_ISAKMP), | ||
91 | iph->daddr, | ||
92 | htons(PORT_ISAKMP)); | ||
93 | } else { | ||
94 | cp = ip_vs_conn_out_get(IPPROTO_UDP, | ||
95 | iph->daddr, | ||
96 | htons(PORT_ISAKMP), | ||
97 | iph->saddr, | ||
98 | htons(PORT_ISAKMP)); | ||
99 | } | ||
100 | |||
101 | if (!cp) { | ||
102 | IP_VS_DBG(12, "Unknown ISAKMP entry for inout packet " | ||
103 | "%s%s %u.%u.%u.%u->%u.%u.%u.%u\n", | ||
104 | inverse ? "ICMP+" : "", | ||
105 | pp->name, | ||
106 | NIPQUAD(iph->saddr), | ||
107 | NIPQUAD(iph->daddr)); | ||
108 | } | ||
109 | |||
110 | return cp; | ||
111 | } | ||
112 | |||
113 | |||
114 | static int | ||
115 | esp_conn_schedule(struct sk_buff *skb, struct ip_vs_protocol *pp, | ||
116 | int *verdict, struct ip_vs_conn **cpp) | ||
117 | { | ||
118 | /* | ||
119 | * ESP is only related traffic. Pass the packet to IP stack. | ||
120 | */ | ||
121 | *verdict = NF_ACCEPT; | ||
122 | return 0; | ||
123 | } | ||
124 | |||
125 | |||
126 | static void | ||
127 | esp_debug_packet(struct ip_vs_protocol *pp, const struct sk_buff *skb, | ||
128 | int offset, const char *msg) | ||
129 | { | ||
130 | char buf[256]; | ||
131 | struct iphdr _iph, *ih; | ||
132 | |||
133 | ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph); | ||
134 | if (ih == NULL) | ||
135 | sprintf(buf, "%s TRUNCATED", pp->name); | ||
136 | else | ||
137 | sprintf(buf, "%s %u.%u.%u.%u->%u.%u.%u.%u", | ||
138 | pp->name, NIPQUAD(ih->saddr), | ||
139 | NIPQUAD(ih->daddr)); | ||
140 | |||
141 | printk(KERN_DEBUG "IPVS: %s: %s\n", msg, buf); | ||
142 | } | ||
143 | |||
144 | |||
145 | static void esp_init(struct ip_vs_protocol *pp) | ||
146 | { | ||
147 | /* nothing to do now */ | ||
148 | } | ||
149 | |||
150 | |||
151 | static void esp_exit(struct ip_vs_protocol *pp) | ||
152 | { | ||
153 | /* nothing to do now */ | ||
154 | } | ||
155 | |||
156 | |||
157 | struct ip_vs_protocol ip_vs_protocol_esp = { | ||
158 | .name = "ESP", | ||
159 | .protocol = IPPROTO_ESP, | ||
160 | .num_states = 1, | ||
161 | .dont_defrag = 1, | ||
162 | .init = esp_init, | ||
163 | .exit = esp_exit, | ||
164 | .conn_schedule = esp_conn_schedule, | ||
165 | .conn_in_get = esp_conn_in_get, | ||
166 | .conn_out_get = esp_conn_out_get, | ||
167 | .snat_handler = NULL, | ||
168 | .dnat_handler = NULL, | ||
169 | .csum_check = NULL, | ||
170 | .state_transition = NULL, | ||
171 | .register_app = NULL, | ||
172 | .unregister_app = NULL, | ||
173 | .app_conn_bind = NULL, | ||
174 | .debug_packet = esp_debug_packet, | ||
175 | .timeout_change = NULL, /* ISAKMP */ | ||
176 | }; | ||
diff --git a/net/ipv4/ipvs/ip_vs_proto_tcp.c b/net/ipv4/ipvs/ip_vs_proto_tcp.c index d0ea467986a0..dd4566ea2bff 100644 --- a/net/ipv4/ipvs/ip_vs_proto_tcp.c +++ b/net/ipv4/ipvs/ip_vs_proto_tcp.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <linux/tcp.h> /* for tcphdr */ | 18 | #include <linux/tcp.h> /* for tcphdr */ |
19 | #include <net/ip.h> | 19 | #include <net/ip.h> |
20 | #include <net/tcp.h> /* for csum_tcpudp_magic */ | 20 | #include <net/tcp.h> /* for csum_tcpudp_magic */ |
21 | #include <net/ip6_checksum.h> | ||
21 | #include <linux/netfilter.h> | 22 | #include <linux/netfilter.h> |
22 | #include <linux/netfilter_ipv4.h> | 23 | #include <linux/netfilter_ipv4.h> |
23 | 24 | ||
@@ -25,8 +26,9 @@ | |||
25 | 26 | ||
26 | 27 | ||
27 | static struct ip_vs_conn * | 28 | static struct ip_vs_conn * |
28 | tcp_conn_in_get(const struct sk_buff *skb, struct ip_vs_protocol *pp, | 29 | tcp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp, |
29 | const struct iphdr *iph, unsigned int proto_off, int inverse) | 30 | const struct ip_vs_iphdr *iph, unsigned int proto_off, |
31 | int inverse) | ||
30 | { | 32 | { |
31 | __be16 _ports[2], *pptr; | 33 | __be16 _ports[2], *pptr; |
32 | 34 | ||
@@ -35,19 +37,20 @@ tcp_conn_in_get(const struct sk_buff *skb, struct ip_vs_protocol *pp, | |||
35 | return NULL; | 37 | return NULL; |
36 | 38 | ||
37 | if (likely(!inverse)) { | 39 | if (likely(!inverse)) { |
38 | return ip_vs_conn_in_get(iph->protocol, | 40 | return ip_vs_conn_in_get(af, iph->protocol, |
39 | iph->saddr, pptr[0], | 41 | &iph->saddr, pptr[0], |
40 | iph->daddr, pptr[1]); | 42 | &iph->daddr, pptr[1]); |
41 | } else { | 43 | } else { |
42 | return ip_vs_conn_in_get(iph->protocol, | 44 | return ip_vs_conn_in_get(af, iph->protocol, |
43 | iph->daddr, pptr[1], | 45 | &iph->daddr, pptr[1], |
44 | iph->saddr, pptr[0]); | 46 | &iph->saddr, pptr[0]); |
45 | } | 47 | } |
46 | } | 48 | } |
47 | 49 | ||
48 | static struct ip_vs_conn * | 50 | static struct ip_vs_conn * |
49 | tcp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp, | 51 | tcp_conn_out_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp, |
50 | const struct iphdr *iph, unsigned int proto_off, int inverse) | 52 | const struct ip_vs_iphdr *iph, unsigned int proto_off, |
53 | int inverse) | ||
51 | { | 54 | { |
52 | __be16 _ports[2], *pptr; | 55 | __be16 _ports[2], *pptr; |
53 | 56 | ||
@@ -56,34 +59,36 @@ tcp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp, | |||
56 | return NULL; | 59 | return NULL; |
57 | 60 | ||
58 | if (likely(!inverse)) { | 61 | if (likely(!inverse)) { |
59 | return ip_vs_conn_out_get(iph->protocol, | 62 | return ip_vs_conn_out_get(af, iph->protocol, |
60 | iph->saddr, pptr[0], | 63 | &iph->saddr, pptr[0], |
61 | iph->daddr, pptr[1]); | 64 | &iph->daddr, pptr[1]); |
62 | } else { | 65 | } else { |
63 | return ip_vs_conn_out_get(iph->protocol, | 66 | return ip_vs_conn_out_get(af, iph->protocol, |
64 | iph->daddr, pptr[1], | 67 | &iph->daddr, pptr[1], |
65 | iph->saddr, pptr[0]); | 68 | &iph->saddr, pptr[0]); |
66 | } | 69 | } |
67 | } | 70 | } |
68 | 71 | ||
69 | 72 | ||
70 | static int | 73 | static int |
71 | tcp_conn_schedule(struct sk_buff *skb, | 74 | tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, |
72 | struct ip_vs_protocol *pp, | ||
73 | int *verdict, struct ip_vs_conn **cpp) | 75 | int *verdict, struct ip_vs_conn **cpp) |
74 | { | 76 | { |
75 | struct ip_vs_service *svc; | 77 | struct ip_vs_service *svc; |
76 | struct tcphdr _tcph, *th; | 78 | struct tcphdr _tcph, *th; |
79 | struct ip_vs_iphdr iph; | ||
77 | 80 | ||
78 | th = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph); | 81 | ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); |
82 | |||
83 | th = skb_header_pointer(skb, iph.len, sizeof(_tcph), &_tcph); | ||
79 | if (th == NULL) { | 84 | if (th == NULL) { |
80 | *verdict = NF_DROP; | 85 | *verdict = NF_DROP; |
81 | return 0; | 86 | return 0; |
82 | } | 87 | } |
83 | 88 | ||
84 | if (th->syn && | 89 | if (th->syn && |
85 | (svc = ip_vs_service_get(skb->mark, ip_hdr(skb)->protocol, | 90 | (svc = ip_vs_service_get(af, skb->mark, iph.protocol, &iph.daddr, |
86 | ip_hdr(skb)->daddr, th->dest))) { | 91 | th->dest))) { |
87 | if (ip_vs_todrop()) { | 92 | if (ip_vs_todrop()) { |
88 | /* | 93 | /* |
89 | * It seems that we are very loaded. | 94 | * It seems that we are very loaded. |
@@ -110,22 +115,62 @@ tcp_conn_schedule(struct sk_buff *skb, | |||
110 | 115 | ||
111 | 116 | ||
112 | static inline void | 117 | static inline void |
113 | tcp_fast_csum_update(struct tcphdr *tcph, __be32 oldip, __be32 newip, | 118 | tcp_fast_csum_update(int af, struct tcphdr *tcph, |
119 | const union nf_inet_addr *oldip, | ||
120 | const union nf_inet_addr *newip, | ||
114 | __be16 oldport, __be16 newport) | 121 | __be16 oldport, __be16 newport) |
115 | { | 122 | { |
123 | #ifdef CONFIG_IP_VS_IPV6 | ||
124 | if (af == AF_INET6) | ||
125 | tcph->check = | ||
126 | csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6, | ||
127 | ip_vs_check_diff2(oldport, newport, | ||
128 | ~csum_unfold(tcph->check)))); | ||
129 | else | ||
130 | #endif | ||
116 | tcph->check = | 131 | tcph->check = |
117 | csum_fold(ip_vs_check_diff4(oldip, newip, | 132 | csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip, |
118 | ip_vs_check_diff2(oldport, newport, | 133 | ip_vs_check_diff2(oldport, newport, |
119 | ~csum_unfold(tcph->check)))); | 134 | ~csum_unfold(tcph->check)))); |
120 | } | 135 | } |
121 | 136 | ||
122 | 137 | ||
138 | static inline void | ||
139 | tcp_partial_csum_update(int af, struct tcphdr *tcph, | ||
140 | const union nf_inet_addr *oldip, | ||
141 | const union nf_inet_addr *newip, | ||
142 | __be16 oldlen, __be16 newlen) | ||
143 | { | ||
144 | #ifdef CONFIG_IP_VS_IPV6 | ||
145 | if (af == AF_INET6) | ||
146 | tcph->check = | ||
147 | csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6, | ||
148 | ip_vs_check_diff2(oldlen, newlen, | ||
149 | ~csum_unfold(tcph->check)))); | ||
150 | else | ||
151 | #endif | ||
152 | tcph->check = | ||
153 | csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip, | ||
154 | ip_vs_check_diff2(oldlen, newlen, | ||
155 | ~csum_unfold(tcph->check)))); | ||
156 | } | ||
157 | |||
158 | |||
123 | static int | 159 | static int |
124 | tcp_snat_handler(struct sk_buff *skb, | 160 | tcp_snat_handler(struct sk_buff *skb, |
125 | struct ip_vs_protocol *pp, struct ip_vs_conn *cp) | 161 | struct ip_vs_protocol *pp, struct ip_vs_conn *cp) |
126 | { | 162 | { |
127 | struct tcphdr *tcph; | 163 | struct tcphdr *tcph; |
128 | const unsigned int tcphoff = ip_hdrlen(skb); | 164 | unsigned int tcphoff; |
165 | int oldlen; | ||
166 | |||
167 | #ifdef CONFIG_IP_VS_IPV6 | ||
168 | if (cp->af == AF_INET6) | ||
169 | tcphoff = sizeof(struct ipv6hdr); | ||
170 | else | ||
171 | #endif | ||
172 | tcphoff = ip_hdrlen(skb); | ||
173 | oldlen = skb->len - tcphoff; | ||
129 | 174 | ||
130 | /* csum_check requires unshared skb */ | 175 | /* csum_check requires unshared skb */ |
131 | if (!skb_make_writable(skb, tcphoff+sizeof(*tcph))) | 176 | if (!skb_make_writable(skb, tcphoff+sizeof(*tcph))) |
@@ -133,7 +178,7 @@ tcp_snat_handler(struct sk_buff *skb, | |||
133 | 178 | ||
134 | if (unlikely(cp->app != NULL)) { | 179 | if (unlikely(cp->app != NULL)) { |
135 | /* Some checks before mangling */ | 180 | /* Some checks before mangling */ |
136 | if (pp->csum_check && !pp->csum_check(skb, pp)) | 181 | if (pp->csum_check && !pp->csum_check(cp->af, skb, pp)) |
137 | return 0; | 182 | return 0; |
138 | 183 | ||
139 | /* Call application helper if needed */ | 184 | /* Call application helper if needed */ |
@@ -141,13 +186,17 @@ tcp_snat_handler(struct sk_buff *skb, | |||
141 | return 0; | 186 | return 0; |
142 | } | 187 | } |
143 | 188 | ||
144 | tcph = (void *)ip_hdr(skb) + tcphoff; | 189 | tcph = (void *)skb_network_header(skb) + tcphoff; |
145 | tcph->source = cp->vport; | 190 | tcph->source = cp->vport; |
146 | 191 | ||
147 | /* Adjust TCP checksums */ | 192 | /* Adjust TCP checksums */ |
148 | if (!cp->app) { | 193 | if (skb->ip_summed == CHECKSUM_PARTIAL) { |
194 | tcp_partial_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr, | ||
195 | htonl(oldlen), | ||
196 | htonl(skb->len - tcphoff)); | ||
197 | } else if (!cp->app) { | ||
149 | /* Only port and addr are changed, do fast csum update */ | 198 | /* Only port and addr are changed, do fast csum update */ |
150 | tcp_fast_csum_update(tcph, cp->daddr, cp->vaddr, | 199 | tcp_fast_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr, |
151 | cp->dport, cp->vport); | 200 | cp->dport, cp->vport); |
152 | if (skb->ip_summed == CHECKSUM_COMPLETE) | 201 | if (skb->ip_summed == CHECKSUM_COMPLETE) |
153 | skb->ip_summed = CHECKSUM_NONE; | 202 | skb->ip_summed = CHECKSUM_NONE; |
@@ -155,9 +204,20 @@ tcp_snat_handler(struct sk_buff *skb, | |||
155 | /* full checksum calculation */ | 204 | /* full checksum calculation */ |
156 | tcph->check = 0; | 205 | tcph->check = 0; |
157 | skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0); | 206 | skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0); |
158 | tcph->check = csum_tcpudp_magic(cp->vaddr, cp->caddr, | 207 | #ifdef CONFIG_IP_VS_IPV6 |
159 | skb->len - tcphoff, | 208 | if (cp->af == AF_INET6) |
160 | cp->protocol, skb->csum); | 209 | tcph->check = csum_ipv6_magic(&cp->vaddr.in6, |
210 | &cp->caddr.in6, | ||
211 | skb->len - tcphoff, | ||
212 | cp->protocol, skb->csum); | ||
213 | else | ||
214 | #endif | ||
215 | tcph->check = csum_tcpudp_magic(cp->vaddr.ip, | ||
216 | cp->caddr.ip, | ||
217 | skb->len - tcphoff, | ||
218 | cp->protocol, | ||
219 | skb->csum); | ||
220 | |||
161 | IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n", | 221 | IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n", |
162 | pp->name, tcph->check, | 222 | pp->name, tcph->check, |
163 | (char*)&(tcph->check) - (char*)tcph); | 223 | (char*)&(tcph->check) - (char*)tcph); |
@@ -171,7 +231,16 @@ tcp_dnat_handler(struct sk_buff *skb, | |||
171 | struct ip_vs_protocol *pp, struct ip_vs_conn *cp) | 231 | struct ip_vs_protocol *pp, struct ip_vs_conn *cp) |
172 | { | 232 | { |
173 | struct tcphdr *tcph; | 233 | struct tcphdr *tcph; |
174 | const unsigned int tcphoff = ip_hdrlen(skb); | 234 | unsigned int tcphoff; |
235 | int oldlen; | ||
236 | |||
237 | #ifdef CONFIG_IP_VS_IPV6 | ||
238 | if (cp->af == AF_INET6) | ||
239 | tcphoff = sizeof(struct ipv6hdr); | ||
240 | else | ||
241 | #endif | ||
242 | tcphoff = ip_hdrlen(skb); | ||
243 | oldlen = skb->len - tcphoff; | ||
175 | 244 | ||
176 | /* csum_check requires unshared skb */ | 245 | /* csum_check requires unshared skb */ |
177 | if (!skb_make_writable(skb, tcphoff+sizeof(*tcph))) | 246 | if (!skb_make_writable(skb, tcphoff+sizeof(*tcph))) |
@@ -179,7 +248,7 @@ tcp_dnat_handler(struct sk_buff *skb, | |||
179 | 248 | ||
180 | if (unlikely(cp->app != NULL)) { | 249 | if (unlikely(cp->app != NULL)) { |
181 | /* Some checks before mangling */ | 250 | /* Some checks before mangling */ |
182 | if (pp->csum_check && !pp->csum_check(skb, pp)) | 251 | if (pp->csum_check && !pp->csum_check(cp->af, skb, pp)) |
183 | return 0; | 252 | return 0; |
184 | 253 | ||
185 | /* | 254 | /* |
@@ -190,15 +259,19 @@ tcp_dnat_handler(struct sk_buff *skb, | |||
190 | return 0; | 259 | return 0; |
191 | } | 260 | } |
192 | 261 | ||
193 | tcph = (void *)ip_hdr(skb) + tcphoff; | 262 | tcph = (void *)skb_network_header(skb) + tcphoff; |
194 | tcph->dest = cp->dport; | 263 | tcph->dest = cp->dport; |
195 | 264 | ||
196 | /* | 265 | /* |
197 | * Adjust TCP checksums | 266 | * Adjust TCP checksums |
198 | */ | 267 | */ |
199 | if (!cp->app) { | 268 | if (skb->ip_summed == CHECKSUM_PARTIAL) { |
269 | tcp_partial_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr, | ||
270 | htonl(oldlen), | ||
271 | htonl(skb->len - tcphoff)); | ||
272 | } else if (!cp->app) { | ||
200 | /* Only port and addr are changed, do fast csum update */ | 273 | /* Only port and addr are changed, do fast csum update */ |
201 | tcp_fast_csum_update(tcph, cp->vaddr, cp->daddr, | 274 | tcp_fast_csum_update(cp->af, tcph, &cp->vaddr, &cp->daddr, |
202 | cp->vport, cp->dport); | 275 | cp->vport, cp->dport); |
203 | if (skb->ip_summed == CHECKSUM_COMPLETE) | 276 | if (skb->ip_summed == CHECKSUM_COMPLETE) |
204 | skb->ip_summed = CHECKSUM_NONE; | 277 | skb->ip_summed = CHECKSUM_NONE; |
@@ -206,9 +279,19 @@ tcp_dnat_handler(struct sk_buff *skb, | |||
206 | /* full checksum calculation */ | 279 | /* full checksum calculation */ |
207 | tcph->check = 0; | 280 | tcph->check = 0; |
208 | skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0); | 281 | skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0); |
209 | tcph->check = csum_tcpudp_magic(cp->caddr, cp->daddr, | 282 | #ifdef CONFIG_IP_VS_IPV6 |
210 | skb->len - tcphoff, | 283 | if (cp->af == AF_INET6) |
211 | cp->protocol, skb->csum); | 284 | tcph->check = csum_ipv6_magic(&cp->caddr.in6, |
285 | &cp->daddr.in6, | ||
286 | skb->len - tcphoff, | ||
287 | cp->protocol, skb->csum); | ||
288 | else | ||
289 | #endif | ||
290 | tcph->check = csum_tcpudp_magic(cp->caddr.ip, | ||
291 | cp->daddr.ip, | ||
292 | skb->len - tcphoff, | ||
293 | cp->protocol, | ||
294 | skb->csum); | ||
212 | skb->ip_summed = CHECKSUM_UNNECESSARY; | 295 | skb->ip_summed = CHECKSUM_UNNECESSARY; |
213 | } | 296 | } |
214 | return 1; | 297 | return 1; |
@@ -216,21 +299,43 @@ tcp_dnat_handler(struct sk_buff *skb, | |||
216 | 299 | ||
217 | 300 | ||
218 | static int | 301 | static int |
219 | tcp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp) | 302 | tcp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp) |
220 | { | 303 | { |
221 | const unsigned int tcphoff = ip_hdrlen(skb); | 304 | unsigned int tcphoff; |
305 | |||
306 | #ifdef CONFIG_IP_VS_IPV6 | ||
307 | if (af == AF_INET6) | ||
308 | tcphoff = sizeof(struct ipv6hdr); | ||
309 | else | ||
310 | #endif | ||
311 | tcphoff = ip_hdrlen(skb); | ||
222 | 312 | ||
223 | switch (skb->ip_summed) { | 313 | switch (skb->ip_summed) { |
224 | case CHECKSUM_NONE: | 314 | case CHECKSUM_NONE: |
225 | skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0); | 315 | skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0); |
226 | case CHECKSUM_COMPLETE: | 316 | case CHECKSUM_COMPLETE: |
227 | if (csum_tcpudp_magic(ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, | 317 | #ifdef CONFIG_IP_VS_IPV6 |
228 | skb->len - tcphoff, | 318 | if (af == AF_INET6) { |
229 | ip_hdr(skb)->protocol, skb->csum)) { | 319 | if (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, |
230 | IP_VS_DBG_RL_PKT(0, pp, skb, 0, | 320 | &ipv6_hdr(skb)->daddr, |
231 | "Failed checksum for"); | 321 | skb->len - tcphoff, |
232 | return 0; | 322 | ipv6_hdr(skb)->nexthdr, |
233 | } | 323 | skb->csum)) { |
324 | IP_VS_DBG_RL_PKT(0, pp, skb, 0, | ||
325 | "Failed checksum for"); | ||
326 | return 0; | ||
327 | } | ||
328 | } else | ||
329 | #endif | ||
330 | if (csum_tcpudp_magic(ip_hdr(skb)->saddr, | ||
331 | ip_hdr(skb)->daddr, | ||
332 | skb->len - tcphoff, | ||
333 | ip_hdr(skb)->protocol, | ||
334 | skb->csum)) { | ||
335 | IP_VS_DBG_RL_PKT(0, pp, skb, 0, | ||
336 | "Failed checksum for"); | ||
337 | return 0; | ||
338 | } | ||
234 | break; | 339 | break; |
235 | default: | 340 | default: |
236 | /* No need to checksum. */ | 341 | /* No need to checksum. */ |
@@ -419,19 +524,23 @@ set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp, | |||
419 | if (new_state != cp->state) { | 524 | if (new_state != cp->state) { |
420 | struct ip_vs_dest *dest = cp->dest; | 525 | struct ip_vs_dest *dest = cp->dest; |
421 | 526 | ||
422 | IP_VS_DBG(8, "%s %s [%c%c%c%c] %u.%u.%u.%u:%d->" | 527 | IP_VS_DBG_BUF(8, "%s %s [%c%c%c%c] %s:%d->" |
423 | "%u.%u.%u.%u:%d state: %s->%s conn->refcnt:%d\n", | 528 | "%s:%d state: %s->%s conn->refcnt:%d\n", |
424 | pp->name, | 529 | pp->name, |
425 | (state_off==TCP_DIR_OUTPUT)?"output ":"input ", | 530 | ((state_off == TCP_DIR_OUTPUT) ? |
426 | th->syn? 'S' : '.', | 531 | "output " : "input "), |
427 | th->fin? 'F' : '.', | 532 | th->syn ? 'S' : '.', |
428 | th->ack? 'A' : '.', | 533 | th->fin ? 'F' : '.', |
429 | th->rst? 'R' : '.', | 534 | th->ack ? 'A' : '.', |
430 | NIPQUAD(cp->daddr), ntohs(cp->dport), | 535 | th->rst ? 'R' : '.', |
431 | NIPQUAD(cp->caddr), ntohs(cp->cport), | 536 | IP_VS_DBG_ADDR(cp->af, &cp->daddr), |
432 | tcp_state_name(cp->state), | 537 | ntohs(cp->dport), |
433 | tcp_state_name(new_state), | 538 | IP_VS_DBG_ADDR(cp->af, &cp->caddr), |
434 | atomic_read(&cp->refcnt)); | 539 | ntohs(cp->cport), |
540 | tcp_state_name(cp->state), | ||
541 | tcp_state_name(new_state), | ||
542 | atomic_read(&cp->refcnt)); | ||
543 | |||
435 | if (dest) { | 544 | if (dest) { |
436 | if (!(cp->flags & IP_VS_CONN_F_INACTIVE) && | 545 | if (!(cp->flags & IP_VS_CONN_F_INACTIVE) && |
437 | (new_state != IP_VS_TCP_S_ESTABLISHED)) { | 546 | (new_state != IP_VS_TCP_S_ESTABLISHED)) { |
@@ -461,7 +570,13 @@ tcp_state_transition(struct ip_vs_conn *cp, int direction, | |||
461 | { | 570 | { |
462 | struct tcphdr _tcph, *th; | 571 | struct tcphdr _tcph, *th; |
463 | 572 | ||
464 | th = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph); | 573 | #ifdef CONFIG_IP_VS_IPV6 |
574 | int ihl = cp->af == AF_INET ? ip_hdrlen(skb) : sizeof(struct ipv6hdr); | ||
575 | #else | ||
576 | int ihl = ip_hdrlen(skb); | ||
577 | #endif | ||
578 | |||
579 | th = skb_header_pointer(skb, ihl, sizeof(_tcph), &_tcph); | ||
465 | if (th == NULL) | 580 | if (th == NULL) |
466 | return 0; | 581 | return 0; |
467 | 582 | ||
@@ -546,12 +661,15 @@ tcp_app_conn_bind(struct ip_vs_conn *cp) | |||
546 | break; | 661 | break; |
547 | spin_unlock(&tcp_app_lock); | 662 | spin_unlock(&tcp_app_lock); |
548 | 663 | ||
549 | IP_VS_DBG(9, "%s: Binding conn %u.%u.%u.%u:%u->" | 664 | IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->" |
550 | "%u.%u.%u.%u:%u to app %s on port %u\n", | 665 | "%s:%u to app %s on port %u\n", |
551 | __func__, | 666 | __func__, |
552 | NIPQUAD(cp->caddr), ntohs(cp->cport), | 667 | IP_VS_DBG_ADDR(cp->af, &cp->caddr), |
553 | NIPQUAD(cp->vaddr), ntohs(cp->vport), | 668 | ntohs(cp->cport), |
554 | inc->name, ntohs(inc->port)); | 669 | IP_VS_DBG_ADDR(cp->af, &cp->vaddr), |
670 | ntohs(cp->vport), | ||
671 | inc->name, ntohs(inc->port)); | ||
672 | |||
555 | cp->app = inc; | 673 | cp->app = inc; |
556 | if (inc->init_conn) | 674 | if (inc->init_conn) |
557 | result = inc->init_conn(inc, cp); | 675 | result = inc->init_conn(inc, cp); |
diff --git a/net/ipv4/ipvs/ip_vs_proto_udp.c b/net/ipv4/ipvs/ip_vs_proto_udp.c index c6be5d56823f..6eb6039d6343 100644 --- a/net/ipv4/ipvs/ip_vs_proto_udp.c +++ b/net/ipv4/ipvs/ip_vs_proto_udp.c | |||
@@ -22,10 +22,12 @@ | |||
22 | 22 | ||
23 | #include <net/ip_vs.h> | 23 | #include <net/ip_vs.h> |
24 | #include <net/ip.h> | 24 | #include <net/ip.h> |
25 | #include <net/ip6_checksum.h> | ||
25 | 26 | ||
26 | static struct ip_vs_conn * | 27 | static struct ip_vs_conn * |
27 | udp_conn_in_get(const struct sk_buff *skb, struct ip_vs_protocol *pp, | 28 | udp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp, |
28 | const struct iphdr *iph, unsigned int proto_off, int inverse) | 29 | const struct ip_vs_iphdr *iph, unsigned int proto_off, |
30 | int inverse) | ||
29 | { | 31 | { |
30 | struct ip_vs_conn *cp; | 32 | struct ip_vs_conn *cp; |
31 | __be16 _ports[2], *pptr; | 33 | __be16 _ports[2], *pptr; |
@@ -35,13 +37,13 @@ udp_conn_in_get(const struct sk_buff *skb, struct ip_vs_protocol *pp, | |||
35 | return NULL; | 37 | return NULL; |
36 | 38 | ||
37 | if (likely(!inverse)) { | 39 | if (likely(!inverse)) { |
38 | cp = ip_vs_conn_in_get(iph->protocol, | 40 | cp = ip_vs_conn_in_get(af, iph->protocol, |
39 | iph->saddr, pptr[0], | 41 | &iph->saddr, pptr[0], |
40 | iph->daddr, pptr[1]); | 42 | &iph->daddr, pptr[1]); |
41 | } else { | 43 | } else { |
42 | cp = ip_vs_conn_in_get(iph->protocol, | 44 | cp = ip_vs_conn_in_get(af, iph->protocol, |
43 | iph->daddr, pptr[1], | 45 | &iph->daddr, pptr[1], |
44 | iph->saddr, pptr[0]); | 46 | &iph->saddr, pptr[0]); |
45 | } | 47 | } |
46 | 48 | ||
47 | return cp; | 49 | return cp; |
@@ -49,25 +51,25 @@ udp_conn_in_get(const struct sk_buff *skb, struct ip_vs_protocol *pp, | |||
49 | 51 | ||
50 | 52 | ||
51 | static struct ip_vs_conn * | 53 | static struct ip_vs_conn * |
52 | udp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp, | 54 | udp_conn_out_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp, |
53 | const struct iphdr *iph, unsigned int proto_off, int inverse) | 55 | const struct ip_vs_iphdr *iph, unsigned int proto_off, |
56 | int inverse) | ||
54 | { | 57 | { |
55 | struct ip_vs_conn *cp; | 58 | struct ip_vs_conn *cp; |
56 | __be16 _ports[2], *pptr; | 59 | __be16 _ports[2], *pptr; |
57 | 60 | ||
58 | pptr = skb_header_pointer(skb, ip_hdrlen(skb), | 61 | pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports); |
59 | sizeof(_ports), _ports); | ||
60 | if (pptr == NULL) | 62 | if (pptr == NULL) |
61 | return NULL; | 63 | return NULL; |
62 | 64 | ||
63 | if (likely(!inverse)) { | 65 | if (likely(!inverse)) { |
64 | cp = ip_vs_conn_out_get(iph->protocol, | 66 | cp = ip_vs_conn_out_get(af, iph->protocol, |
65 | iph->saddr, pptr[0], | 67 | &iph->saddr, pptr[0], |
66 | iph->daddr, pptr[1]); | 68 | &iph->daddr, pptr[1]); |
67 | } else { | 69 | } else { |
68 | cp = ip_vs_conn_out_get(iph->protocol, | 70 | cp = ip_vs_conn_out_get(af, iph->protocol, |
69 | iph->daddr, pptr[1], | 71 | &iph->daddr, pptr[1], |
70 | iph->saddr, pptr[0]); | 72 | &iph->saddr, pptr[0]); |
71 | } | 73 | } |
72 | 74 | ||
73 | return cp; | 75 | return cp; |
@@ -75,21 +77,24 @@ udp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp, | |||
75 | 77 | ||
76 | 78 | ||
77 | static int | 79 | static int |
78 | udp_conn_schedule(struct sk_buff *skb, struct ip_vs_protocol *pp, | 80 | udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, |
79 | int *verdict, struct ip_vs_conn **cpp) | 81 | int *verdict, struct ip_vs_conn **cpp) |
80 | { | 82 | { |
81 | struct ip_vs_service *svc; | 83 | struct ip_vs_service *svc; |
82 | struct udphdr _udph, *uh; | 84 | struct udphdr _udph, *uh; |
85 | struct ip_vs_iphdr iph; | ||
86 | |||
87 | ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); | ||
83 | 88 | ||
84 | uh = skb_header_pointer(skb, ip_hdrlen(skb), | 89 | uh = skb_header_pointer(skb, iph.len, sizeof(_udph), &_udph); |
85 | sizeof(_udph), &_udph); | ||
86 | if (uh == NULL) { | 90 | if (uh == NULL) { |
87 | *verdict = NF_DROP; | 91 | *verdict = NF_DROP; |
88 | return 0; | 92 | return 0; |
89 | } | 93 | } |
90 | 94 | ||
91 | if ((svc = ip_vs_service_get(skb->mark, ip_hdr(skb)->protocol, | 95 | svc = ip_vs_service_get(af, skb->mark, iph.protocol, |
92 | ip_hdr(skb)->daddr, uh->dest))) { | 96 | &iph.daddr, uh->dest); |
97 | if (svc) { | ||
93 | if (ip_vs_todrop()) { | 98 | if (ip_vs_todrop()) { |
94 | /* | 99 | /* |
95 | * It seems that we are very loaded. | 100 | * It seems that we are very loaded. |
@@ -116,23 +121,63 @@ udp_conn_schedule(struct sk_buff *skb, struct ip_vs_protocol *pp, | |||
116 | 121 | ||
117 | 122 | ||
118 | static inline void | 123 | static inline void |
119 | udp_fast_csum_update(struct udphdr *uhdr, __be32 oldip, __be32 newip, | 124 | udp_fast_csum_update(int af, struct udphdr *uhdr, |
125 | const union nf_inet_addr *oldip, | ||
126 | const union nf_inet_addr *newip, | ||
120 | __be16 oldport, __be16 newport) | 127 | __be16 oldport, __be16 newport) |
121 | { | 128 | { |
122 | uhdr->check = | 129 | #ifdef CONFIG_IP_VS_IPV6 |
123 | csum_fold(ip_vs_check_diff4(oldip, newip, | 130 | if (af == AF_INET6) |
124 | ip_vs_check_diff2(oldport, newport, | 131 | uhdr->check = |
125 | ~csum_unfold(uhdr->check)))); | 132 | csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6, |
133 | ip_vs_check_diff2(oldport, newport, | ||
134 | ~csum_unfold(uhdr->check)))); | ||
135 | else | ||
136 | #endif | ||
137 | uhdr->check = | ||
138 | csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip, | ||
139 | ip_vs_check_diff2(oldport, newport, | ||
140 | ~csum_unfold(uhdr->check)))); | ||
126 | if (!uhdr->check) | 141 | if (!uhdr->check) |
127 | uhdr->check = CSUM_MANGLED_0; | 142 | uhdr->check = CSUM_MANGLED_0; |
128 | } | 143 | } |
129 | 144 | ||
145 | static inline void | ||
146 | udp_partial_csum_update(int af, struct udphdr *uhdr, | ||
147 | const union nf_inet_addr *oldip, | ||
148 | const union nf_inet_addr *newip, | ||
149 | __be16 oldlen, __be16 newlen) | ||
150 | { | ||
151 | #ifdef CONFIG_IP_VS_IPV6 | ||
152 | if (af == AF_INET6) | ||
153 | uhdr->check = | ||
154 | csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6, | ||
155 | ip_vs_check_diff2(oldlen, newlen, | ||
156 | ~csum_unfold(uhdr->check)))); | ||
157 | else | ||
158 | #endif | ||
159 | uhdr->check = | ||
160 | csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip, | ||
161 | ip_vs_check_diff2(oldlen, newlen, | ||
162 | ~csum_unfold(uhdr->check)))); | ||
163 | } | ||
164 | |||
165 | |||
130 | static int | 166 | static int |
131 | udp_snat_handler(struct sk_buff *skb, | 167 | udp_snat_handler(struct sk_buff *skb, |
132 | struct ip_vs_protocol *pp, struct ip_vs_conn *cp) | 168 | struct ip_vs_protocol *pp, struct ip_vs_conn *cp) |
133 | { | 169 | { |
134 | struct udphdr *udph; | 170 | struct udphdr *udph; |
135 | const unsigned int udphoff = ip_hdrlen(skb); | 171 | unsigned int udphoff; |
172 | int oldlen; | ||
173 | |||
174 | #ifdef CONFIG_IP_VS_IPV6 | ||
175 | if (cp->af == AF_INET6) | ||
176 | udphoff = sizeof(struct ipv6hdr); | ||
177 | else | ||
178 | #endif | ||
179 | udphoff = ip_hdrlen(skb); | ||
180 | oldlen = skb->len - udphoff; | ||
136 | 181 | ||
137 | /* csum_check requires unshared skb */ | 182 | /* csum_check requires unshared skb */ |
138 | if (!skb_make_writable(skb, udphoff+sizeof(*udph))) | 183 | if (!skb_make_writable(skb, udphoff+sizeof(*udph))) |
@@ -140,7 +185,7 @@ udp_snat_handler(struct sk_buff *skb, | |||
140 | 185 | ||
141 | if (unlikely(cp->app != NULL)) { | 186 | if (unlikely(cp->app != NULL)) { |
142 | /* Some checks before mangling */ | 187 | /* Some checks before mangling */ |
143 | if (pp->csum_check && !pp->csum_check(skb, pp)) | 188 | if (pp->csum_check && !pp->csum_check(cp->af, skb, pp)) |
144 | return 0; | 189 | return 0; |
145 | 190 | ||
146 | /* | 191 | /* |
@@ -150,15 +195,19 @@ udp_snat_handler(struct sk_buff *skb, | |||
150 | return 0; | 195 | return 0; |
151 | } | 196 | } |
152 | 197 | ||
153 | udph = (void *)ip_hdr(skb) + udphoff; | 198 | udph = (void *)skb_network_header(skb) + udphoff; |
154 | udph->source = cp->vport; | 199 | udph->source = cp->vport; |
155 | 200 | ||
156 | /* | 201 | /* |
157 | * Adjust UDP checksums | 202 | * Adjust UDP checksums |
158 | */ | 203 | */ |
159 | if (!cp->app && (udph->check != 0)) { | 204 | if (skb->ip_summed == CHECKSUM_PARTIAL) { |
205 | udp_partial_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr, | ||
206 | htonl(oldlen), | ||
207 | htonl(skb->len - udphoff)); | ||
208 | } else if (!cp->app && (udph->check != 0)) { | ||
160 | /* Only port and addr are changed, do fast csum update */ | 209 | /* Only port and addr are changed, do fast csum update */ |
161 | udp_fast_csum_update(udph, cp->daddr, cp->vaddr, | 210 | udp_fast_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr, |
162 | cp->dport, cp->vport); | 211 | cp->dport, cp->vport); |
163 | if (skb->ip_summed == CHECKSUM_COMPLETE) | 212 | if (skb->ip_summed == CHECKSUM_COMPLETE) |
164 | skb->ip_summed = CHECKSUM_NONE; | 213 | skb->ip_summed = CHECKSUM_NONE; |
@@ -166,9 +215,19 @@ udp_snat_handler(struct sk_buff *skb, | |||
166 | /* full checksum calculation */ | 215 | /* full checksum calculation */ |
167 | udph->check = 0; | 216 | udph->check = 0; |
168 | skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0); | 217 | skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0); |
169 | udph->check = csum_tcpudp_magic(cp->vaddr, cp->caddr, | 218 | #ifdef CONFIG_IP_VS_IPV6 |
170 | skb->len - udphoff, | 219 | if (cp->af == AF_INET6) |
171 | cp->protocol, skb->csum); | 220 | udph->check = csum_ipv6_magic(&cp->vaddr.in6, |
221 | &cp->caddr.in6, | ||
222 | skb->len - udphoff, | ||
223 | cp->protocol, skb->csum); | ||
224 | else | ||
225 | #endif | ||
226 | udph->check = csum_tcpudp_magic(cp->vaddr.ip, | ||
227 | cp->caddr.ip, | ||
228 | skb->len - udphoff, | ||
229 | cp->protocol, | ||
230 | skb->csum); | ||
172 | if (udph->check == 0) | 231 | if (udph->check == 0) |
173 | udph->check = CSUM_MANGLED_0; | 232 | udph->check = CSUM_MANGLED_0; |
174 | IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n", | 233 | IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n", |
@@ -184,7 +243,16 @@ udp_dnat_handler(struct sk_buff *skb, | |||
184 | struct ip_vs_protocol *pp, struct ip_vs_conn *cp) | 243 | struct ip_vs_protocol *pp, struct ip_vs_conn *cp) |
185 | { | 244 | { |
186 | struct udphdr *udph; | 245 | struct udphdr *udph; |
187 | unsigned int udphoff = ip_hdrlen(skb); | 246 | unsigned int udphoff; |
247 | int oldlen; | ||
248 | |||
249 | #ifdef CONFIG_IP_VS_IPV6 | ||
250 | if (cp->af == AF_INET6) | ||
251 | udphoff = sizeof(struct ipv6hdr); | ||
252 | else | ||
253 | #endif | ||
254 | udphoff = ip_hdrlen(skb); | ||
255 | oldlen = skb->len - udphoff; | ||
188 | 256 | ||
189 | /* csum_check requires unshared skb */ | 257 | /* csum_check requires unshared skb */ |
190 | if (!skb_make_writable(skb, udphoff+sizeof(*udph))) | 258 | if (!skb_make_writable(skb, udphoff+sizeof(*udph))) |
@@ -192,7 +260,7 @@ udp_dnat_handler(struct sk_buff *skb, | |||
192 | 260 | ||
193 | if (unlikely(cp->app != NULL)) { | 261 | if (unlikely(cp->app != NULL)) { |
194 | /* Some checks before mangling */ | 262 | /* Some checks before mangling */ |
195 | if (pp->csum_check && !pp->csum_check(skb, pp)) | 263 | if (pp->csum_check && !pp->csum_check(cp->af, skb, pp)) |
196 | return 0; | 264 | return 0; |
197 | 265 | ||
198 | /* | 266 | /* |
@@ -203,15 +271,19 @@ udp_dnat_handler(struct sk_buff *skb, | |||
203 | return 0; | 271 | return 0; |
204 | } | 272 | } |
205 | 273 | ||
206 | udph = (void *)ip_hdr(skb) + udphoff; | 274 | udph = (void *)skb_network_header(skb) + udphoff; |
207 | udph->dest = cp->dport; | 275 | udph->dest = cp->dport; |
208 | 276 | ||
209 | /* | 277 | /* |
210 | * Adjust UDP checksums | 278 | * Adjust UDP checksums |
211 | */ | 279 | */ |
212 | if (!cp->app && (udph->check != 0)) { | 280 | if (skb->ip_summed == CHECKSUM_PARTIAL) { |
281 | udp_partial_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr, | ||
282 | htonl(oldlen), | ||
283 | htonl(skb->len - udphoff)); | ||
284 | } else if (!cp->app && (udph->check != 0)) { | ||
213 | /* Only port and addr are changed, do fast csum update */ | 285 | /* Only port and addr are changed, do fast csum update */ |
214 | udp_fast_csum_update(udph, cp->vaddr, cp->daddr, | 286 | udp_fast_csum_update(cp->af, udph, &cp->vaddr, &cp->daddr, |
215 | cp->vport, cp->dport); | 287 | cp->vport, cp->dport); |
216 | if (skb->ip_summed == CHECKSUM_COMPLETE) | 288 | if (skb->ip_summed == CHECKSUM_COMPLETE) |
217 | skb->ip_summed = CHECKSUM_NONE; | 289 | skb->ip_summed = CHECKSUM_NONE; |
@@ -219,9 +291,19 @@ udp_dnat_handler(struct sk_buff *skb, | |||
219 | /* full checksum calculation */ | 291 | /* full checksum calculation */ |
220 | udph->check = 0; | 292 | udph->check = 0; |
221 | skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0); | 293 | skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0); |
222 | udph->check = csum_tcpudp_magic(cp->caddr, cp->daddr, | 294 | #ifdef CONFIG_IP_VS_IPV6 |
223 | skb->len - udphoff, | 295 | if (cp->af == AF_INET6) |
224 | cp->protocol, skb->csum); | 296 | udph->check = csum_ipv6_magic(&cp->caddr.in6, |
297 | &cp->daddr.in6, | ||
298 | skb->len - udphoff, | ||
299 | cp->protocol, skb->csum); | ||
300 | else | ||
301 | #endif | ||
302 | udph->check = csum_tcpudp_magic(cp->caddr.ip, | ||
303 | cp->daddr.ip, | ||
304 | skb->len - udphoff, | ||
305 | cp->protocol, | ||
306 | skb->csum); | ||
225 | if (udph->check == 0) | 307 | if (udph->check == 0) |
226 | udph->check = CSUM_MANGLED_0; | 308 | udph->check = CSUM_MANGLED_0; |
227 | skb->ip_summed = CHECKSUM_UNNECESSARY; | 309 | skb->ip_summed = CHECKSUM_UNNECESSARY; |
@@ -231,10 +313,17 @@ udp_dnat_handler(struct sk_buff *skb, | |||
231 | 313 | ||
232 | 314 | ||
233 | static int | 315 | static int |
234 | udp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp) | 316 | udp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp) |
235 | { | 317 | { |
236 | struct udphdr _udph, *uh; | 318 | struct udphdr _udph, *uh; |
237 | const unsigned int udphoff = ip_hdrlen(skb); | 319 | unsigned int udphoff; |
320 | |||
321 | #ifdef CONFIG_IP_VS_IPV6 | ||
322 | if (af == AF_INET6) | ||
323 | udphoff = sizeof(struct ipv6hdr); | ||
324 | else | ||
325 | #endif | ||
326 | udphoff = ip_hdrlen(skb); | ||
238 | 327 | ||
239 | uh = skb_header_pointer(skb, udphoff, sizeof(_udph), &_udph); | 328 | uh = skb_header_pointer(skb, udphoff, sizeof(_udph), &_udph); |
240 | if (uh == NULL) | 329 | if (uh == NULL) |
@@ -246,15 +335,28 @@ udp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp) | |||
246 | skb->csum = skb_checksum(skb, udphoff, | 335 | skb->csum = skb_checksum(skb, udphoff, |
247 | skb->len - udphoff, 0); | 336 | skb->len - udphoff, 0); |
248 | case CHECKSUM_COMPLETE: | 337 | case CHECKSUM_COMPLETE: |
249 | if (csum_tcpudp_magic(ip_hdr(skb)->saddr, | 338 | #ifdef CONFIG_IP_VS_IPV6 |
250 | ip_hdr(skb)->daddr, | 339 | if (af == AF_INET6) { |
251 | skb->len - udphoff, | 340 | if (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, |
252 | ip_hdr(skb)->protocol, | 341 | &ipv6_hdr(skb)->daddr, |
253 | skb->csum)) { | 342 | skb->len - udphoff, |
254 | IP_VS_DBG_RL_PKT(0, pp, skb, 0, | 343 | ipv6_hdr(skb)->nexthdr, |
255 | "Failed checksum for"); | 344 | skb->csum)) { |
256 | return 0; | 345 | IP_VS_DBG_RL_PKT(0, pp, skb, 0, |
257 | } | 346 | "Failed checksum for"); |
347 | return 0; | ||
348 | } | ||
349 | } else | ||
350 | #endif | ||
351 | if (csum_tcpudp_magic(ip_hdr(skb)->saddr, | ||
352 | ip_hdr(skb)->daddr, | ||
353 | skb->len - udphoff, | ||
354 | ip_hdr(skb)->protocol, | ||
355 | skb->csum)) { | ||
356 | IP_VS_DBG_RL_PKT(0, pp, skb, 0, | ||
357 | "Failed checksum for"); | ||
358 | return 0; | ||
359 | } | ||
258 | break; | 360 | break; |
259 | default: | 361 | default: |
260 | /* No need to checksum. */ | 362 | /* No need to checksum. */ |
@@ -340,12 +442,15 @@ static int udp_app_conn_bind(struct ip_vs_conn *cp) | |||
340 | break; | 442 | break; |
341 | spin_unlock(&udp_app_lock); | 443 | spin_unlock(&udp_app_lock); |
342 | 444 | ||
343 | IP_VS_DBG(9, "%s: Binding conn %u.%u.%u.%u:%u->" | 445 | IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->" |
344 | "%u.%u.%u.%u:%u to app %s on port %u\n", | 446 | "%s:%u to app %s on port %u\n", |
345 | __func__, | 447 | __func__, |
346 | NIPQUAD(cp->caddr), ntohs(cp->cport), | 448 | IP_VS_DBG_ADDR(cp->af, &cp->caddr), |
347 | NIPQUAD(cp->vaddr), ntohs(cp->vport), | 449 | ntohs(cp->cport), |
348 | inc->name, ntohs(inc->port)); | 450 | IP_VS_DBG_ADDR(cp->af, &cp->vaddr), |
451 | ntohs(cp->vport), | ||
452 | inc->name, ntohs(inc->port)); | ||
453 | |||
349 | cp->app = inc; | 454 | cp->app = inc; |
350 | if (inc->init_conn) | 455 | if (inc->init_conn) |
351 | result = inc->init_conn(inc, cp); | 456 | result = inc->init_conn(inc, cp); |
diff --git a/net/ipv4/ipvs/ip_vs_rr.c b/net/ipv4/ipvs/ip_vs_rr.c index 358110d17e59..a22195f68ac4 100644 --- a/net/ipv4/ipvs/ip_vs_rr.c +++ b/net/ipv4/ipvs/ip_vs_rr.c | |||
@@ -32,12 +32,6 @@ static int ip_vs_rr_init_svc(struct ip_vs_service *svc) | |||
32 | } | 32 | } |
33 | 33 | ||
34 | 34 | ||
35 | static int ip_vs_rr_done_svc(struct ip_vs_service *svc) | ||
36 | { | ||
37 | return 0; | ||
38 | } | ||
39 | |||
40 | |||
41 | static int ip_vs_rr_update_svc(struct ip_vs_service *svc) | 35 | static int ip_vs_rr_update_svc(struct ip_vs_service *svc) |
42 | { | 36 | { |
43 | svc->sched_data = &svc->destinations; | 37 | svc->sched_data = &svc->destinations; |
@@ -80,11 +74,11 @@ ip_vs_rr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) | |||
80 | out: | 74 | out: |
81 | svc->sched_data = q; | 75 | svc->sched_data = q; |
82 | write_unlock(&svc->sched_lock); | 76 | write_unlock(&svc->sched_lock); |
83 | IP_VS_DBG(6, "RR: server %u.%u.%u.%u:%u " | 77 | IP_VS_DBG_BUF(6, "RR: server %s:%u " |
84 | "activeconns %d refcnt %d weight %d\n", | 78 | "activeconns %d refcnt %d weight %d\n", |
85 | NIPQUAD(dest->addr), ntohs(dest->port), | 79 | IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port), |
86 | atomic_read(&dest->activeconns), | 80 | atomic_read(&dest->activeconns), |
87 | atomic_read(&dest->refcnt), atomic_read(&dest->weight)); | 81 | atomic_read(&dest->refcnt), atomic_read(&dest->weight)); |
88 | 82 | ||
89 | return dest; | 83 | return dest; |
90 | } | 84 | } |
@@ -95,8 +89,10 @@ static struct ip_vs_scheduler ip_vs_rr_scheduler = { | |||
95 | .refcnt = ATOMIC_INIT(0), | 89 | .refcnt = ATOMIC_INIT(0), |
96 | .module = THIS_MODULE, | 90 | .module = THIS_MODULE, |
97 | .n_list = LIST_HEAD_INIT(ip_vs_rr_scheduler.n_list), | 91 | .n_list = LIST_HEAD_INIT(ip_vs_rr_scheduler.n_list), |
92 | #ifdef CONFIG_IP_VS_IPV6 | ||
93 | .supports_ipv6 = 1, | ||
94 | #endif | ||
98 | .init_service = ip_vs_rr_init_svc, | 95 | .init_service = ip_vs_rr_init_svc, |
99 | .done_service = ip_vs_rr_done_svc, | ||
100 | .update_service = ip_vs_rr_update_svc, | 96 | .update_service = ip_vs_rr_update_svc, |
101 | .schedule = ip_vs_rr_schedule, | 97 | .schedule = ip_vs_rr_schedule, |
102 | }; | 98 | }; |
diff --git a/net/ipv4/ipvs/ip_vs_sed.c b/net/ipv4/ipvs/ip_vs_sed.c index 77663d84cbd1..7d2f22f04b83 100644 --- a/net/ipv4/ipvs/ip_vs_sed.c +++ b/net/ipv4/ipvs/ip_vs_sed.c | |||
@@ -41,27 +41,6 @@ | |||
41 | #include <net/ip_vs.h> | 41 | #include <net/ip_vs.h> |
42 | 42 | ||
43 | 43 | ||
44 | static int | ||
45 | ip_vs_sed_init_svc(struct ip_vs_service *svc) | ||
46 | { | ||
47 | return 0; | ||
48 | } | ||
49 | |||
50 | |||
51 | static int | ||
52 | ip_vs_sed_done_svc(struct ip_vs_service *svc) | ||
53 | { | ||
54 | return 0; | ||
55 | } | ||
56 | |||
57 | |||
58 | static int | ||
59 | ip_vs_sed_update_svc(struct ip_vs_service *svc) | ||
60 | { | ||
61 | return 0; | ||
62 | } | ||
63 | |||
64 | |||
65 | static inline unsigned int | 44 | static inline unsigned int |
66 | ip_vs_sed_dest_overhead(struct ip_vs_dest *dest) | 45 | ip_vs_sed_dest_overhead(struct ip_vs_dest *dest) |
67 | { | 46 | { |
@@ -122,12 +101,12 @@ ip_vs_sed_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) | |||
122 | } | 101 | } |
123 | } | 102 | } |
124 | 103 | ||
125 | IP_VS_DBG(6, "SED: server %u.%u.%u.%u:%u " | 104 | IP_VS_DBG_BUF(6, "SED: server %s:%u " |
126 | "activeconns %d refcnt %d weight %d overhead %d\n", | 105 | "activeconns %d refcnt %d weight %d overhead %d\n", |
127 | NIPQUAD(least->addr), ntohs(least->port), | 106 | IP_VS_DBG_ADDR(svc->af, &least->addr), ntohs(least->port), |
128 | atomic_read(&least->activeconns), | 107 | atomic_read(&least->activeconns), |
129 | atomic_read(&least->refcnt), | 108 | atomic_read(&least->refcnt), |
130 | atomic_read(&least->weight), loh); | 109 | atomic_read(&least->weight), loh); |
131 | 110 | ||
132 | return least; | 111 | return least; |
133 | } | 112 | } |
@@ -139,9 +118,9 @@ static struct ip_vs_scheduler ip_vs_sed_scheduler = | |||
139 | .refcnt = ATOMIC_INIT(0), | 118 | .refcnt = ATOMIC_INIT(0), |
140 | .module = THIS_MODULE, | 119 | .module = THIS_MODULE, |
141 | .n_list = LIST_HEAD_INIT(ip_vs_sed_scheduler.n_list), | 120 | .n_list = LIST_HEAD_INIT(ip_vs_sed_scheduler.n_list), |
142 | .init_service = ip_vs_sed_init_svc, | 121 | #ifdef CONFIG_IP_VS_IPV6 |
143 | .done_service = ip_vs_sed_done_svc, | 122 | .supports_ipv6 = 1, |
144 | .update_service = ip_vs_sed_update_svc, | 123 | #endif |
145 | .schedule = ip_vs_sed_schedule, | 124 | .schedule = ip_vs_sed_schedule, |
146 | }; | 125 | }; |
147 | 126 | ||
diff --git a/net/ipv4/ipvs/ip_vs_sh.c b/net/ipv4/ipvs/ip_vs_sh.c index 7b979e228056..1d96de27fefd 100644 --- a/net/ipv4/ipvs/ip_vs_sh.c +++ b/net/ipv4/ipvs/ip_vs_sh.c | |||
@@ -215,7 +215,7 @@ ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) | |||
215 | IP_VS_DBG(6, "SH: source IP address %u.%u.%u.%u " | 215 | IP_VS_DBG(6, "SH: source IP address %u.%u.%u.%u " |
216 | "--> server %u.%u.%u.%u:%d\n", | 216 | "--> server %u.%u.%u.%u:%d\n", |
217 | NIPQUAD(iph->saddr), | 217 | NIPQUAD(iph->saddr), |
218 | NIPQUAD(dest->addr), | 218 | NIPQUAD(dest->addr.ip), |
219 | ntohs(dest->port)); | 219 | ntohs(dest->port)); |
220 | 220 | ||
221 | return dest; | 221 | return dest; |
@@ -231,6 +231,9 @@ static struct ip_vs_scheduler ip_vs_sh_scheduler = | |||
231 | .refcnt = ATOMIC_INIT(0), | 231 | .refcnt = ATOMIC_INIT(0), |
232 | .module = THIS_MODULE, | 232 | .module = THIS_MODULE, |
233 | .n_list = LIST_HEAD_INIT(ip_vs_sh_scheduler.n_list), | 233 | .n_list = LIST_HEAD_INIT(ip_vs_sh_scheduler.n_list), |
234 | #ifdef CONFIG_IP_VS_IPV6 | ||
235 | .supports_ipv6 = 0, | ||
236 | #endif | ||
234 | .init_service = ip_vs_sh_init_svc, | 237 | .init_service = ip_vs_sh_init_svc, |
235 | .done_service = ip_vs_sh_done_svc, | 238 | .done_service = ip_vs_sh_done_svc, |
236 | .update_service = ip_vs_sh_update_svc, | 239 | .update_service = ip_vs_sh_update_svc, |
diff --git a/net/ipv4/ipvs/ip_vs_sync.c b/net/ipv4/ipvs/ip_vs_sync.c index a652da2c3200..28237a5f62e2 100644 --- a/net/ipv4/ipvs/ip_vs_sync.c +++ b/net/ipv4/ipvs/ip_vs_sync.c | |||
@@ -256,9 +256,9 @@ void ip_vs_sync_conn(struct ip_vs_conn *cp) | |||
256 | s->cport = cp->cport; | 256 | s->cport = cp->cport; |
257 | s->vport = cp->vport; | 257 | s->vport = cp->vport; |
258 | s->dport = cp->dport; | 258 | s->dport = cp->dport; |
259 | s->caddr = cp->caddr; | 259 | s->caddr = cp->caddr.ip; |
260 | s->vaddr = cp->vaddr; | 260 | s->vaddr = cp->vaddr.ip; |
261 | s->daddr = cp->daddr; | 261 | s->daddr = cp->daddr.ip; |
262 | s->flags = htons(cp->flags & ~IP_VS_CONN_F_HASHED); | 262 | s->flags = htons(cp->flags & ~IP_VS_CONN_F_HASHED); |
263 | s->state = htons(cp->state); | 263 | s->state = htons(cp->state); |
264 | if (cp->flags & IP_VS_CONN_F_SEQ_MASK) { | 264 | if (cp->flags & IP_VS_CONN_F_SEQ_MASK) { |
@@ -366,21 +366,28 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen) | |||
366 | } | 366 | } |
367 | 367 | ||
368 | if (!(flags & IP_VS_CONN_F_TEMPLATE)) | 368 | if (!(flags & IP_VS_CONN_F_TEMPLATE)) |
369 | cp = ip_vs_conn_in_get(s->protocol, | 369 | cp = ip_vs_conn_in_get(AF_INET, s->protocol, |
370 | s->caddr, s->cport, | 370 | (union nf_inet_addr *)&s->caddr, |
371 | s->vaddr, s->vport); | 371 | s->cport, |
372 | (union nf_inet_addr *)&s->vaddr, | ||
373 | s->vport); | ||
372 | else | 374 | else |
373 | cp = ip_vs_ct_in_get(s->protocol, | 375 | cp = ip_vs_ct_in_get(AF_INET, s->protocol, |
374 | s->caddr, s->cport, | 376 | (union nf_inet_addr *)&s->caddr, |
375 | s->vaddr, s->vport); | 377 | s->cport, |
378 | (union nf_inet_addr *)&s->vaddr, | ||
379 | s->vport); | ||
376 | if (!cp) { | 380 | if (!cp) { |
377 | /* | 381 | /* |
378 | * Find the appropriate destination for the connection. | 382 | * Find the appropriate destination for the connection. |
379 | * If it is not found the connection will remain unbound | 383 | * If it is not found the connection will remain unbound |
380 | * but still handled. | 384 | * but still handled. |
381 | */ | 385 | */ |
382 | dest = ip_vs_find_dest(s->daddr, s->dport, | 386 | dest = ip_vs_find_dest(AF_INET, |
383 | s->vaddr, s->vport, | 387 | (union nf_inet_addr *)&s->daddr, |
388 | s->dport, | ||
389 | (union nf_inet_addr *)&s->vaddr, | ||
390 | s->vport, | ||
384 | s->protocol); | 391 | s->protocol); |
385 | /* Set the approprite ativity flag */ | 392 | /* Set the approprite ativity flag */ |
386 | if (s->protocol == IPPROTO_TCP) { | 393 | if (s->protocol == IPPROTO_TCP) { |
@@ -389,10 +396,13 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen) | |||
389 | else | 396 | else |
390 | flags &= ~IP_VS_CONN_F_INACTIVE; | 397 | flags &= ~IP_VS_CONN_F_INACTIVE; |
391 | } | 398 | } |
392 | cp = ip_vs_conn_new(s->protocol, | 399 | cp = ip_vs_conn_new(AF_INET, s->protocol, |
393 | s->caddr, s->cport, | 400 | (union nf_inet_addr *)&s->caddr, |
394 | s->vaddr, s->vport, | 401 | s->cport, |
395 | s->daddr, s->dport, | 402 | (union nf_inet_addr *)&s->vaddr, |
403 | s->vport, | ||
404 | (union nf_inet_addr *)&s->daddr, | ||
405 | s->dport, | ||
396 | flags, dest); | 406 | flags, dest); |
397 | if (dest) | 407 | if (dest) |
398 | atomic_dec(&dest->refcnt); | 408 | atomic_dec(&dest->refcnt); |
diff --git a/net/ipv4/ipvs/ip_vs_wlc.c b/net/ipv4/ipvs/ip_vs_wlc.c index 9b0ef86bb1f7..8c596e712599 100644 --- a/net/ipv4/ipvs/ip_vs_wlc.c +++ b/net/ipv4/ipvs/ip_vs_wlc.c | |||
@@ -25,27 +25,6 @@ | |||
25 | #include <net/ip_vs.h> | 25 | #include <net/ip_vs.h> |
26 | 26 | ||
27 | 27 | ||
28 | static int | ||
29 | ip_vs_wlc_init_svc(struct ip_vs_service *svc) | ||
30 | { | ||
31 | return 0; | ||
32 | } | ||
33 | |||
34 | |||
35 | static int | ||
36 | ip_vs_wlc_done_svc(struct ip_vs_service *svc) | ||
37 | { | ||
38 | return 0; | ||
39 | } | ||
40 | |||
41 | |||
42 | static int | ||
43 | ip_vs_wlc_update_svc(struct ip_vs_service *svc) | ||
44 | { | ||
45 | return 0; | ||
46 | } | ||
47 | |||
48 | |||
49 | static inline unsigned int | 28 | static inline unsigned int |
50 | ip_vs_wlc_dest_overhead(struct ip_vs_dest *dest) | 29 | ip_vs_wlc_dest_overhead(struct ip_vs_dest *dest) |
51 | { | 30 | { |
@@ -110,12 +89,12 @@ ip_vs_wlc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) | |||
110 | } | 89 | } |
111 | } | 90 | } |
112 | 91 | ||
113 | IP_VS_DBG(6, "WLC: server %u.%u.%u.%u:%u " | 92 | IP_VS_DBG_BUF(6, "WLC: server %s:%u " |
114 | "activeconns %d refcnt %d weight %d overhead %d\n", | 93 | "activeconns %d refcnt %d weight %d overhead %d\n", |
115 | NIPQUAD(least->addr), ntohs(least->port), | 94 | IP_VS_DBG_ADDR(svc->af, &least->addr), ntohs(least->port), |
116 | atomic_read(&least->activeconns), | 95 | atomic_read(&least->activeconns), |
117 | atomic_read(&least->refcnt), | 96 | atomic_read(&least->refcnt), |
118 | atomic_read(&least->weight), loh); | 97 | atomic_read(&least->weight), loh); |
119 | 98 | ||
120 | return least; | 99 | return least; |
121 | } | 100 | } |
@@ -127,9 +106,9 @@ static struct ip_vs_scheduler ip_vs_wlc_scheduler = | |||
127 | .refcnt = ATOMIC_INIT(0), | 106 | .refcnt = ATOMIC_INIT(0), |
128 | .module = THIS_MODULE, | 107 | .module = THIS_MODULE, |
129 | .n_list = LIST_HEAD_INIT(ip_vs_wlc_scheduler.n_list), | 108 | .n_list = LIST_HEAD_INIT(ip_vs_wlc_scheduler.n_list), |
130 | .init_service = ip_vs_wlc_init_svc, | 109 | #ifdef CONFIG_IP_VS_IPV6 |
131 | .done_service = ip_vs_wlc_done_svc, | 110 | .supports_ipv6 = 1, |
132 | .update_service = ip_vs_wlc_update_svc, | 111 | #endif |
133 | .schedule = ip_vs_wlc_schedule, | 112 | .schedule = ip_vs_wlc_schedule, |
134 | }; | 113 | }; |
135 | 114 | ||
diff --git a/net/ipv4/ipvs/ip_vs_wrr.c b/net/ipv4/ipvs/ip_vs_wrr.c index 0d86a79b87b5..7ea92fed50bf 100644 --- a/net/ipv4/ipvs/ip_vs_wrr.c +++ b/net/ipv4/ipvs/ip_vs_wrr.c | |||
@@ -195,12 +195,12 @@ ip_vs_wrr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) | |||
195 | } | 195 | } |
196 | } | 196 | } |
197 | 197 | ||
198 | IP_VS_DBG(6, "WRR: server %u.%u.%u.%u:%u " | 198 | IP_VS_DBG_BUF(6, "WRR: server %s:%u " |
199 | "activeconns %d refcnt %d weight %d\n", | 199 | "activeconns %d refcnt %d weight %d\n", |
200 | NIPQUAD(dest->addr), ntohs(dest->port), | 200 | IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port), |
201 | atomic_read(&dest->activeconns), | 201 | atomic_read(&dest->activeconns), |
202 | atomic_read(&dest->refcnt), | 202 | atomic_read(&dest->refcnt), |
203 | atomic_read(&dest->weight)); | 203 | atomic_read(&dest->weight)); |
204 | 204 | ||
205 | out: | 205 | out: |
206 | write_unlock(&svc->sched_lock); | 206 | write_unlock(&svc->sched_lock); |
@@ -213,6 +213,9 @@ static struct ip_vs_scheduler ip_vs_wrr_scheduler = { | |||
213 | .refcnt = ATOMIC_INIT(0), | 213 | .refcnt = ATOMIC_INIT(0), |
214 | .module = THIS_MODULE, | 214 | .module = THIS_MODULE, |
215 | .n_list = LIST_HEAD_INIT(ip_vs_wrr_scheduler.n_list), | 215 | .n_list = LIST_HEAD_INIT(ip_vs_wrr_scheduler.n_list), |
216 | #ifdef CONFIG_IP_VS_IPV6 | ||
217 | .supports_ipv6 = 1, | ||
218 | #endif | ||
216 | .init_service = ip_vs_wrr_init_svc, | 219 | .init_service = ip_vs_wrr_init_svc, |
217 | .done_service = ip_vs_wrr_done_svc, | 220 | .done_service = ip_vs_wrr_done_svc, |
218 | .update_service = ip_vs_wrr_update_svc, | 221 | .update_service = ip_vs_wrr_update_svc, |
diff --git a/net/ipv4/ipvs/ip_vs_xmit.c b/net/ipv4/ipvs/ip_vs_xmit.c index 9892d4aca42e..02ddc2b3ce2e 100644 --- a/net/ipv4/ipvs/ip_vs_xmit.c +++ b/net/ipv4/ipvs/ip_vs_xmit.c | |||
@@ -20,6 +20,9 @@ | |||
20 | #include <net/udp.h> | 20 | #include <net/udp.h> |
21 | #include <net/icmp.h> /* for icmp_send */ | 21 | #include <net/icmp.h> /* for icmp_send */ |
22 | #include <net/route.h> /* for ip_route_output */ | 22 | #include <net/route.h> /* for ip_route_output */ |
23 | #include <net/ipv6.h> | ||
24 | #include <net/ip6_route.h> | ||
25 | #include <linux/icmpv6.h> | ||
23 | #include <linux/netfilter.h> | 26 | #include <linux/netfilter.h> |
24 | #include <linux/netfilter_ipv4.h> | 27 | #include <linux/netfilter_ipv4.h> |
25 | 28 | ||
@@ -47,7 +50,8 @@ __ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos, u32 cookie) | |||
47 | 50 | ||
48 | if (!dst) | 51 | if (!dst) |
49 | return NULL; | 52 | return NULL; |
50 | if ((dst->obsolete || rtos != dest->dst_rtos) && | 53 | if ((dst->obsolete |
54 | || (dest->af == AF_INET && rtos != dest->dst_rtos)) && | ||
51 | dst->ops->check(dst, cookie) == NULL) { | 55 | dst->ops->check(dst, cookie) == NULL) { |
52 | dest->dst_cache = NULL; | 56 | dest->dst_cache = NULL; |
53 | dst_release(dst); | 57 | dst_release(dst); |
@@ -71,7 +75,7 @@ __ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos) | |||
71 | .oif = 0, | 75 | .oif = 0, |
72 | .nl_u = { | 76 | .nl_u = { |
73 | .ip4_u = { | 77 | .ip4_u = { |
74 | .daddr = dest->addr, | 78 | .daddr = dest->addr.ip, |
75 | .saddr = 0, | 79 | .saddr = 0, |
76 | .tos = rtos, } }, | 80 | .tos = rtos, } }, |
77 | }; | 81 | }; |
@@ -80,12 +84,12 @@ __ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos) | |||
80 | spin_unlock(&dest->dst_lock); | 84 | spin_unlock(&dest->dst_lock); |
81 | IP_VS_DBG_RL("ip_route_output error, " | 85 | IP_VS_DBG_RL("ip_route_output error, " |
82 | "dest: %u.%u.%u.%u\n", | 86 | "dest: %u.%u.%u.%u\n", |
83 | NIPQUAD(dest->addr)); | 87 | NIPQUAD(dest->addr.ip)); |
84 | return NULL; | 88 | return NULL; |
85 | } | 89 | } |
86 | __ip_vs_dst_set(dest, rtos, dst_clone(&rt->u.dst)); | 90 | __ip_vs_dst_set(dest, rtos, dst_clone(&rt->u.dst)); |
87 | IP_VS_DBG(10, "new dst %u.%u.%u.%u, refcnt=%d, rtos=%X\n", | 91 | IP_VS_DBG(10, "new dst %u.%u.%u.%u, refcnt=%d, rtos=%X\n", |
88 | NIPQUAD(dest->addr), | 92 | NIPQUAD(dest->addr.ip), |
89 | atomic_read(&rt->u.dst.__refcnt), rtos); | 93 | atomic_read(&rt->u.dst.__refcnt), rtos); |
90 | } | 94 | } |
91 | spin_unlock(&dest->dst_lock); | 95 | spin_unlock(&dest->dst_lock); |
@@ -94,14 +98,14 @@ __ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos) | |||
94 | .oif = 0, | 98 | .oif = 0, |
95 | .nl_u = { | 99 | .nl_u = { |
96 | .ip4_u = { | 100 | .ip4_u = { |
97 | .daddr = cp->daddr, | 101 | .daddr = cp->daddr.ip, |
98 | .saddr = 0, | 102 | .saddr = 0, |
99 | .tos = rtos, } }, | 103 | .tos = rtos, } }, |
100 | }; | 104 | }; |
101 | 105 | ||
102 | if (ip_route_output_key(&init_net, &rt, &fl)) { | 106 | if (ip_route_output_key(&init_net, &rt, &fl)) { |
103 | IP_VS_DBG_RL("ip_route_output error, dest: " | 107 | IP_VS_DBG_RL("ip_route_output error, dest: " |
104 | "%u.%u.%u.%u\n", NIPQUAD(cp->daddr)); | 108 | "%u.%u.%u.%u\n", NIPQUAD(cp->daddr.ip)); |
105 | return NULL; | 109 | return NULL; |
106 | } | 110 | } |
107 | } | 111 | } |
@@ -109,6 +113,70 @@ __ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos) | |||
109 | return rt; | 113 | return rt; |
110 | } | 114 | } |
111 | 115 | ||
116 | #ifdef CONFIG_IP_VS_IPV6 | ||
117 | static struct rt6_info * | ||
118 | __ip_vs_get_out_rt_v6(struct ip_vs_conn *cp) | ||
119 | { | ||
120 | struct rt6_info *rt; /* Route to the other host */ | ||
121 | struct ip_vs_dest *dest = cp->dest; | ||
122 | |||
123 | if (dest) { | ||
124 | spin_lock(&dest->dst_lock); | ||
125 | rt = (struct rt6_info *)__ip_vs_dst_check(dest, 0, 0); | ||
126 | if (!rt) { | ||
127 | struct flowi fl = { | ||
128 | .oif = 0, | ||
129 | .nl_u = { | ||
130 | .ip6_u = { | ||
131 | .daddr = dest->addr.in6, | ||
132 | .saddr = { | ||
133 | .s6_addr32 = | ||
134 | { 0, 0, 0, 0 }, | ||
135 | }, | ||
136 | }, | ||
137 | }, | ||
138 | }; | ||
139 | |||
140 | rt = (struct rt6_info *)ip6_route_output(&init_net, | ||
141 | NULL, &fl); | ||
142 | if (!rt) { | ||
143 | spin_unlock(&dest->dst_lock); | ||
144 | IP_VS_DBG_RL("ip6_route_output error, " | ||
145 | "dest: " NIP6_FMT "\n", | ||
146 | NIP6(dest->addr.in6)); | ||
147 | return NULL; | ||
148 | } | ||
149 | __ip_vs_dst_set(dest, 0, dst_clone(&rt->u.dst)); | ||
150 | IP_VS_DBG(10, "new dst " NIP6_FMT ", refcnt=%d\n", | ||
151 | NIP6(dest->addr.in6), | ||
152 | atomic_read(&rt->u.dst.__refcnt)); | ||
153 | } | ||
154 | spin_unlock(&dest->dst_lock); | ||
155 | } else { | ||
156 | struct flowi fl = { | ||
157 | .oif = 0, | ||
158 | .nl_u = { | ||
159 | .ip6_u = { | ||
160 | .daddr = cp->daddr.in6, | ||
161 | .saddr = { | ||
162 | .s6_addr32 = { 0, 0, 0, 0 }, | ||
163 | }, | ||
164 | }, | ||
165 | }, | ||
166 | }; | ||
167 | |||
168 | rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl); | ||
169 | if (!rt) { | ||
170 | IP_VS_DBG_RL("ip6_route_output error, dest: " | ||
171 | NIP6_FMT "\n", NIP6(cp->daddr.in6)); | ||
172 | return NULL; | ||
173 | } | ||
174 | } | ||
175 | |||
176 | return rt; | ||
177 | } | ||
178 | #endif | ||
179 | |||
112 | 180 | ||
113 | /* | 181 | /* |
114 | * Release dest->dst_cache before a dest is removed | 182 | * Release dest->dst_cache before a dest is removed |
@@ -123,11 +191,11 @@ ip_vs_dst_reset(struct ip_vs_dest *dest) | |||
123 | dst_release(old_dst); | 191 | dst_release(old_dst); |
124 | } | 192 | } |
125 | 193 | ||
126 | #define IP_VS_XMIT(skb, rt) \ | 194 | #define IP_VS_XMIT(pf, skb, rt) \ |
127 | do { \ | 195 | do { \ |
128 | (skb)->ipvs_property = 1; \ | 196 | (skb)->ipvs_property = 1; \ |
129 | skb_forward_csum(skb); \ | 197 | skb_forward_csum(skb); \ |
130 | NF_HOOK(PF_INET, NF_INET_LOCAL_OUT, (skb), NULL, \ | 198 | NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL, \ |
131 | (rt)->u.dst.dev, dst_output); \ | 199 | (rt)->u.dst.dev, dst_output); \ |
132 | } while (0) | 200 | } while (0) |
133 | 201 | ||
@@ -200,7 +268,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
200 | /* Another hack: avoid icmp_send in ip_fragment */ | 268 | /* Another hack: avoid icmp_send in ip_fragment */ |
201 | skb->local_df = 1; | 269 | skb->local_df = 1; |
202 | 270 | ||
203 | IP_VS_XMIT(skb, rt); | 271 | IP_VS_XMIT(PF_INET, skb, rt); |
204 | 272 | ||
205 | LeaveFunction(10); | 273 | LeaveFunction(10); |
206 | return NF_STOLEN; | 274 | return NF_STOLEN; |
@@ -213,6 +281,70 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
213 | return NF_STOLEN; | 281 | return NF_STOLEN; |
214 | } | 282 | } |
215 | 283 | ||
284 | #ifdef CONFIG_IP_VS_IPV6 | ||
285 | int | ||
286 | ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, | ||
287 | struct ip_vs_protocol *pp) | ||
288 | { | ||
289 | struct rt6_info *rt; /* Route to the other host */ | ||
290 | struct ipv6hdr *iph = ipv6_hdr(skb); | ||
291 | int mtu; | ||
292 | struct flowi fl = { | ||
293 | .oif = 0, | ||
294 | .nl_u = { | ||
295 | .ip6_u = { | ||
296 | .daddr = iph->daddr, | ||
297 | .saddr = { .s6_addr32 = {0, 0, 0, 0} }, } }, | ||
298 | }; | ||
299 | |||
300 | EnterFunction(10); | ||
301 | |||
302 | rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl); | ||
303 | if (!rt) { | ||
304 | IP_VS_DBG_RL("ip_vs_bypass_xmit_v6(): ip6_route_output error, " | ||
305 | "dest: " NIP6_FMT "\n", NIP6(iph->daddr)); | ||
306 | goto tx_error_icmp; | ||
307 | } | ||
308 | |||
309 | /* MTU checking */ | ||
310 | mtu = dst_mtu(&rt->u.dst); | ||
311 | if (skb->len > mtu) { | ||
312 | dst_release(&rt->u.dst); | ||
313 | icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev); | ||
314 | IP_VS_DBG_RL("ip_vs_bypass_xmit_v6(): frag needed\n"); | ||
315 | goto tx_error; | ||
316 | } | ||
317 | |||
318 | /* | ||
319 | * Call ip_send_check because we are not sure it is called | ||
320 | * after ip_defrag. Is copy-on-write needed? | ||
321 | */ | ||
322 | skb = skb_share_check(skb, GFP_ATOMIC); | ||
323 | if (unlikely(skb == NULL)) { | ||
324 | dst_release(&rt->u.dst); | ||
325 | return NF_STOLEN; | ||
326 | } | ||
327 | |||
328 | /* drop old route */ | ||
329 | dst_release(skb->dst); | ||
330 | skb->dst = &rt->u.dst; | ||
331 | |||
332 | /* Another hack: avoid icmp_send in ip_fragment */ | ||
333 | skb->local_df = 1; | ||
334 | |||
335 | IP_VS_XMIT(PF_INET6, skb, rt); | ||
336 | |||
337 | LeaveFunction(10); | ||
338 | return NF_STOLEN; | ||
339 | |||
340 | tx_error_icmp: | ||
341 | dst_link_failure(skb); | ||
342 | tx_error: | ||
343 | kfree_skb(skb); | ||
344 | LeaveFunction(10); | ||
345 | return NF_STOLEN; | ||
346 | } | ||
347 | #endif | ||
216 | 348 | ||
217 | /* | 349 | /* |
218 | * NAT transmitter (only for outside-to-inside nat forwarding) | 350 | * NAT transmitter (only for outside-to-inside nat forwarding) |
@@ -264,7 +396,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
264 | /* mangle the packet */ | 396 | /* mangle the packet */ |
265 | if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp)) | 397 | if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp)) |
266 | goto tx_error; | 398 | goto tx_error; |
267 | ip_hdr(skb)->daddr = cp->daddr; | 399 | ip_hdr(skb)->daddr = cp->daddr.ip; |
268 | ip_send_check(ip_hdr(skb)); | 400 | ip_send_check(ip_hdr(skb)); |
269 | 401 | ||
270 | IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT"); | 402 | IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT"); |
@@ -276,7 +408,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
276 | /* Another hack: avoid icmp_send in ip_fragment */ | 408 | /* Another hack: avoid icmp_send in ip_fragment */ |
277 | skb->local_df = 1; | 409 | skb->local_df = 1; |
278 | 410 | ||
279 | IP_VS_XMIT(skb, rt); | 411 | IP_VS_XMIT(PF_INET, skb, rt); |
280 | 412 | ||
281 | LeaveFunction(10); | 413 | LeaveFunction(10); |
282 | return NF_STOLEN; | 414 | return NF_STOLEN; |
@@ -292,6 +424,83 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
292 | goto tx_error; | 424 | goto tx_error; |
293 | } | 425 | } |
294 | 426 | ||
427 | #ifdef CONFIG_IP_VS_IPV6 | ||
428 | int | ||
429 | ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, | ||
430 | struct ip_vs_protocol *pp) | ||
431 | { | ||
432 | struct rt6_info *rt; /* Route to the other host */ | ||
433 | int mtu; | ||
434 | |||
435 | EnterFunction(10); | ||
436 | |||
437 | /* check if it is a connection of no-client-port */ | ||
438 | if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) { | ||
439 | __be16 _pt, *p; | ||
440 | p = skb_header_pointer(skb, sizeof(struct ipv6hdr), | ||
441 | sizeof(_pt), &_pt); | ||
442 | if (p == NULL) | ||
443 | goto tx_error; | ||
444 | ip_vs_conn_fill_cport(cp, *p); | ||
445 | IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p)); | ||
446 | } | ||
447 | |||
448 | rt = __ip_vs_get_out_rt_v6(cp); | ||
449 | if (!rt) | ||
450 | goto tx_error_icmp; | ||
451 | |||
452 | /* MTU checking */ | ||
453 | mtu = dst_mtu(&rt->u.dst); | ||
454 | if (skb->len > mtu) { | ||
455 | dst_release(&rt->u.dst); | ||
456 | icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev); | ||
457 | IP_VS_DBG_RL_PKT(0, pp, skb, 0, | ||
458 | "ip_vs_nat_xmit_v6(): frag needed for"); | ||
459 | goto tx_error; | ||
460 | } | ||
461 | |||
462 | /* copy-on-write the packet before mangling it */ | ||
463 | if (!skb_make_writable(skb, sizeof(struct ipv6hdr))) | ||
464 | goto tx_error_put; | ||
465 | |||
466 | if (skb_cow(skb, rt->u.dst.dev->hard_header_len)) | ||
467 | goto tx_error_put; | ||
468 | |||
469 | /* drop old route */ | ||
470 | dst_release(skb->dst); | ||
471 | skb->dst = &rt->u.dst; | ||
472 | |||
473 | /* mangle the packet */ | ||
474 | if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp)) | ||
475 | goto tx_error; | ||
476 | ipv6_hdr(skb)->daddr = cp->daddr.in6; | ||
477 | |||
478 | IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT"); | ||
479 | |||
480 | /* FIXME: when application helper enlarges the packet and the length | ||
481 | is larger than the MTU of outgoing device, there will be still | ||
482 | MTU problem. */ | ||
483 | |||
484 | /* Another hack: avoid icmp_send in ip_fragment */ | ||
485 | skb->local_df = 1; | ||
486 | |||
487 | IP_VS_XMIT(PF_INET6, skb, rt); | ||
488 | |||
489 | LeaveFunction(10); | ||
490 | return NF_STOLEN; | ||
491 | |||
492 | tx_error_icmp: | ||
493 | dst_link_failure(skb); | ||
494 | tx_error: | ||
495 | LeaveFunction(10); | ||
496 | kfree_skb(skb); | ||
497 | return NF_STOLEN; | ||
498 | tx_error_put: | ||
499 | dst_release(&rt->u.dst); | ||
500 | goto tx_error; | ||
501 | } | ||
502 | #endif | ||
503 | |||
295 | 504 | ||
296 | /* | 505 | /* |
297 | * IP Tunneling transmitter | 506 | * IP Tunneling transmitter |
@@ -423,6 +632,112 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
423 | return NF_STOLEN; | 632 | return NF_STOLEN; |
424 | } | 633 | } |
425 | 634 | ||
635 | #ifdef CONFIG_IP_VS_IPV6 | ||
636 | int | ||
637 | ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, | ||
638 | struct ip_vs_protocol *pp) | ||
639 | { | ||
640 | struct rt6_info *rt; /* Route to the other host */ | ||
641 | struct net_device *tdev; /* Device to other host */ | ||
642 | struct ipv6hdr *old_iph = ipv6_hdr(skb); | ||
643 | sk_buff_data_t old_transport_header = skb->transport_header; | ||
644 | struct ipv6hdr *iph; /* Our new IP header */ | ||
645 | unsigned int max_headroom; /* The extra header space needed */ | ||
646 | int mtu; | ||
647 | |||
648 | EnterFunction(10); | ||
649 | |||
650 | if (skb->protocol != htons(ETH_P_IPV6)) { | ||
651 | IP_VS_DBG_RL("ip_vs_tunnel_xmit_v6(): protocol error, " | ||
652 | "ETH_P_IPV6: %d, skb protocol: %d\n", | ||
653 | htons(ETH_P_IPV6), skb->protocol); | ||
654 | goto tx_error; | ||
655 | } | ||
656 | |||
657 | rt = __ip_vs_get_out_rt_v6(cp); | ||
658 | if (!rt) | ||
659 | goto tx_error_icmp; | ||
660 | |||
661 | tdev = rt->u.dst.dev; | ||
662 | |||
663 | mtu = dst_mtu(&rt->u.dst) - sizeof(struct ipv6hdr); | ||
664 | /* TODO IPv6: do we need this check in IPv6? */ | ||
665 | if (mtu < 1280) { | ||
666 | dst_release(&rt->u.dst); | ||
667 | IP_VS_DBG_RL("ip_vs_tunnel_xmit_v6(): mtu less than 1280\n"); | ||
668 | goto tx_error; | ||
669 | } | ||
670 | if (skb->dst) | ||
671 | skb->dst->ops->update_pmtu(skb->dst, mtu); | ||
672 | |||
673 | if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr)) { | ||
674 | icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev); | ||
675 | dst_release(&rt->u.dst); | ||
676 | IP_VS_DBG_RL("ip_vs_tunnel_xmit_v6(): frag needed\n"); | ||
677 | goto tx_error; | ||
678 | } | ||
679 | |||
680 | /* | ||
681 | * Okay, now see if we can stuff it in the buffer as-is. | ||
682 | */ | ||
683 | max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct ipv6hdr); | ||
684 | |||
685 | if (skb_headroom(skb) < max_headroom | ||
686 | || skb_cloned(skb) || skb_shared(skb)) { | ||
687 | struct sk_buff *new_skb = | ||
688 | skb_realloc_headroom(skb, max_headroom); | ||
689 | if (!new_skb) { | ||
690 | dst_release(&rt->u.dst); | ||
691 | kfree_skb(skb); | ||
692 | IP_VS_ERR_RL("ip_vs_tunnel_xmit_v6(): no memory\n"); | ||
693 | return NF_STOLEN; | ||
694 | } | ||
695 | kfree_skb(skb); | ||
696 | skb = new_skb; | ||
697 | old_iph = ipv6_hdr(skb); | ||
698 | } | ||
699 | |||
700 | skb->transport_header = old_transport_header; | ||
701 | |||
702 | skb_push(skb, sizeof(struct ipv6hdr)); | ||
703 | skb_reset_network_header(skb); | ||
704 | memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); | ||
705 | |||
706 | /* drop old route */ | ||
707 | dst_release(skb->dst); | ||
708 | skb->dst = &rt->u.dst; | ||
709 | |||
710 | /* | ||
711 | * Push down and install the IPIP header. | ||
712 | */ | ||
713 | iph = ipv6_hdr(skb); | ||
714 | iph->version = 6; | ||
715 | iph->nexthdr = IPPROTO_IPV6; | ||
716 | iph->payload_len = old_iph->payload_len + sizeof(old_iph); | ||
717 | iph->priority = old_iph->priority; | ||
718 | memset(&iph->flow_lbl, 0, sizeof(iph->flow_lbl)); | ||
719 | iph->daddr = rt->rt6i_dst.addr; | ||
720 | iph->saddr = cp->vaddr.in6; /* rt->rt6i_src.addr; */ | ||
721 | iph->hop_limit = old_iph->hop_limit; | ||
722 | |||
723 | /* Another hack: avoid icmp_send in ip_fragment */ | ||
724 | skb->local_df = 1; | ||
725 | |||
726 | ip6_local_out(skb); | ||
727 | |||
728 | LeaveFunction(10); | ||
729 | |||
730 | return NF_STOLEN; | ||
731 | |||
732 | tx_error_icmp: | ||
733 | dst_link_failure(skb); | ||
734 | tx_error: | ||
735 | kfree_skb(skb); | ||
736 | LeaveFunction(10); | ||
737 | return NF_STOLEN; | ||
738 | } | ||
739 | #endif | ||
740 | |||
426 | 741 | ||
427 | /* | 742 | /* |
428 | * Direct Routing transmitter | 743 | * Direct Routing transmitter |
@@ -467,7 +782,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
467 | /* Another hack: avoid icmp_send in ip_fragment */ | 782 | /* Another hack: avoid icmp_send in ip_fragment */ |
468 | skb->local_df = 1; | 783 | skb->local_df = 1; |
469 | 784 | ||
470 | IP_VS_XMIT(skb, rt); | 785 | IP_VS_XMIT(PF_INET, skb, rt); |
471 | 786 | ||
472 | LeaveFunction(10); | 787 | LeaveFunction(10); |
473 | return NF_STOLEN; | 788 | return NF_STOLEN; |
@@ -480,6 +795,60 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
480 | return NF_STOLEN; | 795 | return NF_STOLEN; |
481 | } | 796 | } |
482 | 797 | ||
798 | #ifdef CONFIG_IP_VS_IPV6 | ||
799 | int | ||
800 | ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, | ||
801 | struct ip_vs_protocol *pp) | ||
802 | { | ||
803 | struct rt6_info *rt; /* Route to the other host */ | ||
804 | int mtu; | ||
805 | |||
806 | EnterFunction(10); | ||
807 | |||
808 | rt = __ip_vs_get_out_rt_v6(cp); | ||
809 | if (!rt) | ||
810 | goto tx_error_icmp; | ||
811 | |||
812 | /* MTU checking */ | ||
813 | mtu = dst_mtu(&rt->u.dst); | ||
814 | if (skb->len > mtu) { | ||
815 | icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev); | ||
816 | dst_release(&rt->u.dst); | ||
817 | IP_VS_DBG_RL("ip_vs_dr_xmit_v6(): frag needed\n"); | ||
818 | goto tx_error; | ||
819 | } | ||
820 | |||
821 | /* | ||
822 | * Call ip_send_check because we are not sure it is called | ||
823 | * after ip_defrag. Is copy-on-write needed? | ||
824 | */ | ||
825 | skb = skb_share_check(skb, GFP_ATOMIC); | ||
826 | if (unlikely(skb == NULL)) { | ||
827 | dst_release(&rt->u.dst); | ||
828 | return NF_STOLEN; | ||
829 | } | ||
830 | |||
831 | /* drop old route */ | ||
832 | dst_release(skb->dst); | ||
833 | skb->dst = &rt->u.dst; | ||
834 | |||
835 | /* Another hack: avoid icmp_send in ip_fragment */ | ||
836 | skb->local_df = 1; | ||
837 | |||
838 | IP_VS_XMIT(PF_INET6, skb, rt); | ||
839 | |||
840 | LeaveFunction(10); | ||
841 | return NF_STOLEN; | ||
842 | |||
843 | tx_error_icmp: | ||
844 | dst_link_failure(skb); | ||
845 | tx_error: | ||
846 | kfree_skb(skb); | ||
847 | LeaveFunction(10); | ||
848 | return NF_STOLEN; | ||
849 | } | ||
850 | #endif | ||
851 | |||
483 | 852 | ||
484 | /* | 853 | /* |
485 | * ICMP packet transmitter | 854 | * ICMP packet transmitter |
@@ -540,7 +909,7 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
540 | /* Another hack: avoid icmp_send in ip_fragment */ | 909 | /* Another hack: avoid icmp_send in ip_fragment */ |
541 | skb->local_df = 1; | 910 | skb->local_df = 1; |
542 | 911 | ||
543 | IP_VS_XMIT(skb, rt); | 912 | IP_VS_XMIT(PF_INET, skb, rt); |
544 | 913 | ||
545 | rc = NF_STOLEN; | 914 | rc = NF_STOLEN; |
546 | goto out; | 915 | goto out; |
@@ -557,3 +926,79 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
557 | ip_rt_put(rt); | 926 | ip_rt_put(rt); |
558 | goto tx_error; | 927 | goto tx_error; |
559 | } | 928 | } |
929 | |||
930 | #ifdef CONFIG_IP_VS_IPV6 | ||
931 | int | ||
932 | ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, | ||
933 | struct ip_vs_protocol *pp, int offset) | ||
934 | { | ||
935 | struct rt6_info *rt; /* Route to the other host */ | ||
936 | int mtu; | ||
937 | int rc; | ||
938 | |||
939 | EnterFunction(10); | ||
940 | |||
941 | /* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be | ||
942 | forwarded directly here, because there is no need to | ||
943 | translate address/port back */ | ||
944 | if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) { | ||
945 | if (cp->packet_xmit) | ||
946 | rc = cp->packet_xmit(skb, cp, pp); | ||
947 | else | ||
948 | rc = NF_ACCEPT; | ||
949 | /* do not touch skb anymore */ | ||
950 | atomic_inc(&cp->in_pkts); | ||
951 | goto out; | ||
952 | } | ||
953 | |||
954 | /* | ||
955 | * mangle and send the packet here (only for VS/NAT) | ||
956 | */ | ||
957 | |||
958 | rt = __ip_vs_get_out_rt_v6(cp); | ||
959 | if (!rt) | ||
960 | goto tx_error_icmp; | ||
961 | |||
962 | /* MTU checking */ | ||
963 | mtu = dst_mtu(&rt->u.dst); | ||
964 | if (skb->len > mtu) { | ||
965 | dst_release(&rt->u.dst); | ||
966 | icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev); | ||
967 | IP_VS_DBG_RL("ip_vs_in_icmp(): frag needed\n"); | ||
968 | goto tx_error; | ||
969 | } | ||
970 | |||
971 | /* copy-on-write the packet before mangling it */ | ||
972 | if (!skb_make_writable(skb, offset)) | ||
973 | goto tx_error_put; | ||
974 | |||
975 | if (skb_cow(skb, rt->u.dst.dev->hard_header_len)) | ||
976 | goto tx_error_put; | ||
977 | |||
978 | /* drop the old route when skb is not shared */ | ||
979 | dst_release(skb->dst); | ||
980 | skb->dst = &rt->u.dst; | ||
981 | |||
982 | ip_vs_nat_icmp_v6(skb, pp, cp, 0); | ||
983 | |||
984 | /* Another hack: avoid icmp_send in ip_fragment */ | ||
985 | skb->local_df = 1; | ||
986 | |||
987 | IP_VS_XMIT(PF_INET6, skb, rt); | ||
988 | |||
989 | rc = NF_STOLEN; | ||
990 | goto out; | ||
991 | |||
992 | tx_error_icmp: | ||
993 | dst_link_failure(skb); | ||
994 | tx_error: | ||
995 | dev_kfree_skb(skb); | ||
996 | rc = NF_STOLEN; | ||
997 | out: | ||
998 | LeaveFunction(10); | ||
999 | return rc; | ||
1000 | tx_error_put: | ||
1001 | dst_release(&rt->u.dst); | ||
1002 | goto tx_error; | ||
1003 | } | ||
1004 | #endif | ||
diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 6ee5354c9aa1..f62187bb6d08 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c | |||
@@ -282,6 +282,8 @@ static struct rtable *rt_cache_get_first(struct seq_file *seq) | |||
282 | struct rtable *r = NULL; | 282 | struct rtable *r = NULL; |
283 | 283 | ||
284 | for (st->bucket = rt_hash_mask; st->bucket >= 0; --st->bucket) { | 284 | for (st->bucket = rt_hash_mask; st->bucket >= 0; --st->bucket) { |
285 | if (!rt_hash_table[st->bucket].chain) | ||
286 | continue; | ||
285 | rcu_read_lock_bh(); | 287 | rcu_read_lock_bh(); |
286 | r = rcu_dereference(rt_hash_table[st->bucket].chain); | 288 | r = rcu_dereference(rt_hash_table[st->bucket].chain); |
287 | while (r) { | 289 | while (r) { |
@@ -299,11 +301,14 @@ static struct rtable *__rt_cache_get_next(struct seq_file *seq, | |||
299 | struct rtable *r) | 301 | struct rtable *r) |
300 | { | 302 | { |
301 | struct rt_cache_iter_state *st = seq->private; | 303 | struct rt_cache_iter_state *st = seq->private; |
304 | |||
302 | r = r->u.dst.rt_next; | 305 | r = r->u.dst.rt_next; |
303 | while (!r) { | 306 | while (!r) { |
304 | rcu_read_unlock_bh(); | 307 | rcu_read_unlock_bh(); |
305 | if (--st->bucket < 0) | 308 | do { |
306 | break; | 309 | if (--st->bucket < 0) |
310 | return NULL; | ||
311 | } while (!rt_hash_table[st->bucket].chain); | ||
307 | rcu_read_lock_bh(); | 312 | rcu_read_lock_bh(); |
308 | r = rt_hash_table[st->bucket].chain; | 313 | r = rt_hash_table[st->bucket].chain; |
309 | } | 314 | } |
@@ -2840,7 +2845,9 @@ int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb) | |||
2840 | if (s_h < 0) | 2845 | if (s_h < 0) |
2841 | s_h = 0; | 2846 | s_h = 0; |
2842 | s_idx = idx = cb->args[1]; | 2847 | s_idx = idx = cb->args[1]; |
2843 | for (h = s_h; h <= rt_hash_mask; h++) { | 2848 | for (h = s_h; h <= rt_hash_mask; h++, s_idx = 0) { |
2849 | if (!rt_hash_table[h].chain) | ||
2850 | continue; | ||
2844 | rcu_read_lock_bh(); | 2851 | rcu_read_lock_bh(); |
2845 | for (rt = rcu_dereference(rt_hash_table[h].chain), idx = 0; rt; | 2852 | for (rt = rcu_dereference(rt_hash_table[h].chain), idx = 0; rt; |
2846 | rt = rcu_dereference(rt->u.dst.rt_next), idx++) { | 2853 | rt = rcu_dereference(rt->u.dst.rt_next), idx++) { |
@@ -2859,7 +2866,6 @@ int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb) | |||
2859 | dst_release(xchg(&skb->dst, NULL)); | 2866 | dst_release(xchg(&skb->dst, NULL)); |
2860 | } | 2867 | } |
2861 | rcu_read_unlock_bh(); | 2868 | rcu_read_unlock_bh(); |
2862 | s_idx = 0; | ||
2863 | } | 2869 | } |
2864 | 2870 | ||
2865 | done: | 2871 | done: |
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 67ccce2a96bd..3b76bce769dd 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -979,6 +979,39 @@ static void tcp_update_reordering(struct sock *sk, const int metric, | |||
979 | } | 979 | } |
980 | } | 980 | } |
981 | 981 | ||
982 | /* This must be called before lost_out is incremented */ | ||
983 | static void tcp_verify_retransmit_hint(struct tcp_sock *tp, struct sk_buff *skb) | ||
984 | { | ||
985 | if ((tp->retransmit_skb_hint == NULL) || | ||
986 | before(TCP_SKB_CB(skb)->seq, | ||
987 | TCP_SKB_CB(tp->retransmit_skb_hint)->seq)) | ||
988 | tp->retransmit_skb_hint = skb; | ||
989 | |||
990 | if (!tp->lost_out || | ||
991 | after(TCP_SKB_CB(skb)->end_seq, tp->retransmit_high)) | ||
992 | tp->retransmit_high = TCP_SKB_CB(skb)->end_seq; | ||
993 | } | ||
994 | |||
995 | static void tcp_skb_mark_lost(struct tcp_sock *tp, struct sk_buff *skb) | ||
996 | { | ||
997 | if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_ACKED))) { | ||
998 | tcp_verify_retransmit_hint(tp, skb); | ||
999 | |||
1000 | tp->lost_out += tcp_skb_pcount(skb); | ||
1001 | TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; | ||
1002 | } | ||
1003 | } | ||
1004 | |||
1005 | void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, struct sk_buff *skb) | ||
1006 | { | ||
1007 | tcp_verify_retransmit_hint(tp, skb); | ||
1008 | |||
1009 | if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_ACKED))) { | ||
1010 | tp->lost_out += tcp_skb_pcount(skb); | ||
1011 | TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; | ||
1012 | } | ||
1013 | } | ||
1014 | |||
982 | /* This procedure tags the retransmission queue when SACKs arrive. | 1015 | /* This procedure tags the retransmission queue when SACKs arrive. |
983 | * | 1016 | * |
984 | * We have three tag bits: SACKED(S), RETRANS(R) and LOST(L). | 1017 | * We have three tag bits: SACKED(S), RETRANS(R) and LOST(L). |
@@ -1155,13 +1188,7 @@ static void tcp_mark_lost_retrans(struct sock *sk) | |||
1155 | TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; | 1188 | TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; |
1156 | tp->retrans_out -= tcp_skb_pcount(skb); | 1189 | tp->retrans_out -= tcp_skb_pcount(skb); |
1157 | 1190 | ||
1158 | /* clear lost hint */ | 1191 | tcp_skb_mark_lost_uncond_verify(tp, skb); |
1159 | tp->retransmit_skb_hint = NULL; | ||
1160 | |||
1161 | if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_ACKED))) { | ||
1162 | tp->lost_out += tcp_skb_pcount(skb); | ||
1163 | TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; | ||
1164 | } | ||
1165 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSTRETRANSMIT); | 1192 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSTRETRANSMIT); |
1166 | } else { | 1193 | } else { |
1167 | if (before(ack_seq, new_low_seq)) | 1194 | if (before(ack_seq, new_low_seq)) |
@@ -1271,9 +1298,6 @@ static int tcp_sacktag_one(struct sk_buff *skb, struct sock *sk, | |||
1271 | ~(TCPCB_LOST|TCPCB_SACKED_RETRANS); | 1298 | ~(TCPCB_LOST|TCPCB_SACKED_RETRANS); |
1272 | tp->lost_out -= tcp_skb_pcount(skb); | 1299 | tp->lost_out -= tcp_skb_pcount(skb); |
1273 | tp->retrans_out -= tcp_skb_pcount(skb); | 1300 | tp->retrans_out -= tcp_skb_pcount(skb); |
1274 | |||
1275 | /* clear lost hint */ | ||
1276 | tp->retransmit_skb_hint = NULL; | ||
1277 | } | 1301 | } |
1278 | } else { | 1302 | } else { |
1279 | if (!(sacked & TCPCB_RETRANS)) { | 1303 | if (!(sacked & TCPCB_RETRANS)) { |
@@ -1292,9 +1316,6 @@ static int tcp_sacktag_one(struct sk_buff *skb, struct sock *sk, | |||
1292 | if (sacked & TCPCB_LOST) { | 1316 | if (sacked & TCPCB_LOST) { |
1293 | TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST; | 1317 | TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST; |
1294 | tp->lost_out -= tcp_skb_pcount(skb); | 1318 | tp->lost_out -= tcp_skb_pcount(skb); |
1295 | |||
1296 | /* clear lost hint */ | ||
1297 | tp->retransmit_skb_hint = NULL; | ||
1298 | } | 1319 | } |
1299 | } | 1320 | } |
1300 | 1321 | ||
@@ -1324,7 +1345,6 @@ static int tcp_sacktag_one(struct sk_buff *skb, struct sock *sk, | |||
1324 | if (dup_sack && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)) { | 1345 | if (dup_sack && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)) { |
1325 | TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; | 1346 | TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; |
1326 | tp->retrans_out -= tcp_skb_pcount(skb); | 1347 | tp->retrans_out -= tcp_skb_pcount(skb); |
1327 | tp->retransmit_skb_hint = NULL; | ||
1328 | } | 1348 | } |
1329 | 1349 | ||
1330 | return flag; | 1350 | return flag; |
@@ -1726,6 +1746,8 @@ int tcp_use_frto(struct sock *sk) | |||
1726 | return 0; | 1746 | return 0; |
1727 | 1747 | ||
1728 | skb = tcp_write_queue_head(sk); | 1748 | skb = tcp_write_queue_head(sk); |
1749 | if (tcp_skb_is_last(sk, skb)) | ||
1750 | return 1; | ||
1729 | skb = tcp_write_queue_next(sk, skb); /* Skips head */ | 1751 | skb = tcp_write_queue_next(sk, skb); /* Skips head */ |
1730 | tcp_for_write_queue_from(skb, sk) { | 1752 | tcp_for_write_queue_from(skb, sk) { |
1731 | if (skb == tcp_send_head(sk)) | 1753 | if (skb == tcp_send_head(sk)) |
@@ -1867,6 +1889,7 @@ static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag) | |||
1867 | if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) { | 1889 | if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) { |
1868 | TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; | 1890 | TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; |
1869 | tp->lost_out += tcp_skb_pcount(skb); | 1891 | tp->lost_out += tcp_skb_pcount(skb); |
1892 | tp->retransmit_high = TCP_SKB_CB(skb)->end_seq; | ||
1870 | } | 1893 | } |
1871 | } | 1894 | } |
1872 | tcp_verify_left_out(tp); | 1895 | tcp_verify_left_out(tp); |
@@ -1883,7 +1906,7 @@ static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag) | |||
1883 | tp->high_seq = tp->snd_nxt; | 1906 | tp->high_seq = tp->snd_nxt; |
1884 | TCP_ECN_queue_cwr(tp); | 1907 | TCP_ECN_queue_cwr(tp); |
1885 | 1908 | ||
1886 | tcp_clear_retrans_hints_partial(tp); | 1909 | tcp_clear_all_retrans_hints(tp); |
1887 | } | 1910 | } |
1888 | 1911 | ||
1889 | static void tcp_clear_retrans_partial(struct tcp_sock *tp) | 1912 | static void tcp_clear_retrans_partial(struct tcp_sock *tp) |
@@ -1934,12 +1957,11 @@ void tcp_enter_loss(struct sock *sk, int how) | |||
1934 | /* Push undo marker, if it was plain RTO and nothing | 1957 | /* Push undo marker, if it was plain RTO and nothing |
1935 | * was retransmitted. */ | 1958 | * was retransmitted. */ |
1936 | tp->undo_marker = tp->snd_una; | 1959 | tp->undo_marker = tp->snd_una; |
1937 | tcp_clear_retrans_hints_partial(tp); | ||
1938 | } else { | 1960 | } else { |
1939 | tp->sacked_out = 0; | 1961 | tp->sacked_out = 0; |
1940 | tp->fackets_out = 0; | 1962 | tp->fackets_out = 0; |
1941 | tcp_clear_all_retrans_hints(tp); | ||
1942 | } | 1963 | } |
1964 | tcp_clear_all_retrans_hints(tp); | ||
1943 | 1965 | ||
1944 | tcp_for_write_queue(skb, sk) { | 1966 | tcp_for_write_queue(skb, sk) { |
1945 | if (skb == tcp_send_head(sk)) | 1967 | if (skb == tcp_send_head(sk)) |
@@ -1952,6 +1974,7 @@ void tcp_enter_loss(struct sock *sk, int how) | |||
1952 | TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED; | 1974 | TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED; |
1953 | TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; | 1975 | TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; |
1954 | tp->lost_out += tcp_skb_pcount(skb); | 1976 | tp->lost_out += tcp_skb_pcount(skb); |
1977 | tp->retransmit_high = TCP_SKB_CB(skb)->end_seq; | ||
1955 | } | 1978 | } |
1956 | } | 1979 | } |
1957 | tcp_verify_left_out(tp); | 1980 | tcp_verify_left_out(tp); |
@@ -2157,19 +2180,6 @@ static int tcp_time_to_recover(struct sock *sk) | |||
2157 | return 0; | 2180 | return 0; |
2158 | } | 2181 | } |
2159 | 2182 | ||
2160 | /* RFC: This is from the original, I doubt that this is necessary at all: | ||
2161 | * clear xmit_retrans hint if seq of this skb is beyond hint. How could we | ||
2162 | * retransmitted past LOST markings in the first place? I'm not fully sure | ||
2163 | * about undo and end of connection cases, which can cause R without L? | ||
2164 | */ | ||
2165 | static void tcp_verify_retransmit_hint(struct tcp_sock *tp, struct sk_buff *skb) | ||
2166 | { | ||
2167 | if ((tp->retransmit_skb_hint != NULL) && | ||
2168 | before(TCP_SKB_CB(skb)->seq, | ||
2169 | TCP_SKB_CB(tp->retransmit_skb_hint)->seq)) | ||
2170 | tp->retransmit_skb_hint = NULL; | ||
2171 | } | ||
2172 | |||
2173 | /* Mark head of queue up as lost. With RFC3517 SACK, the packets is | 2183 | /* Mark head of queue up as lost. With RFC3517 SACK, the packets is |
2174 | * is against sacked "cnt", otherwise it's against facked "cnt" | 2184 | * is against sacked "cnt", otherwise it's against facked "cnt" |
2175 | */ | 2185 | */ |
@@ -2217,11 +2227,7 @@ static void tcp_mark_head_lost(struct sock *sk, int packets) | |||
2217 | cnt = packets; | 2227 | cnt = packets; |
2218 | } | 2228 | } |
2219 | 2229 | ||
2220 | if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_SACKED_ACKED|TCPCB_LOST))) { | 2230 | tcp_skb_mark_lost(tp, skb); |
2221 | TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; | ||
2222 | tp->lost_out += tcp_skb_pcount(skb); | ||
2223 | tcp_verify_retransmit_hint(tp, skb); | ||
2224 | } | ||
2225 | } | 2231 | } |
2226 | tcp_verify_left_out(tp); | 2232 | tcp_verify_left_out(tp); |
2227 | } | 2233 | } |
@@ -2263,11 +2269,7 @@ static void tcp_update_scoreboard(struct sock *sk, int fast_rexmit) | |||
2263 | if (!tcp_skb_timedout(sk, skb)) | 2269 | if (!tcp_skb_timedout(sk, skb)) |
2264 | break; | 2270 | break; |
2265 | 2271 | ||
2266 | if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_SACKED_ACKED|TCPCB_LOST))) { | 2272 | tcp_skb_mark_lost(tp, skb); |
2267 | TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; | ||
2268 | tp->lost_out += tcp_skb_pcount(skb); | ||
2269 | tcp_verify_retransmit_hint(tp, skb); | ||
2270 | } | ||
2271 | } | 2273 | } |
2272 | 2274 | ||
2273 | tp->scoreboard_skb_hint = skb; | 2275 | tp->scoreboard_skb_hint = skb; |
@@ -2378,10 +2380,6 @@ static void tcp_undo_cwr(struct sock *sk, const int undo) | |||
2378 | } | 2380 | } |
2379 | tcp_moderate_cwnd(tp); | 2381 | tcp_moderate_cwnd(tp); |
2380 | tp->snd_cwnd_stamp = tcp_time_stamp; | 2382 | tp->snd_cwnd_stamp = tcp_time_stamp; |
2381 | |||
2382 | /* There is something screwy going on with the retrans hints after | ||
2383 | an undo */ | ||
2384 | tcp_clear_all_retrans_hints(tp); | ||
2385 | } | 2383 | } |
2386 | 2384 | ||
2387 | static inline int tcp_may_undo(struct tcp_sock *tp) | 2385 | static inline int tcp_may_undo(struct tcp_sock *tp) |
@@ -2848,6 +2846,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets) | |||
2848 | int flag = 0; | 2846 | int flag = 0; |
2849 | u32 pkts_acked = 0; | 2847 | u32 pkts_acked = 0; |
2850 | u32 reord = tp->packets_out; | 2848 | u32 reord = tp->packets_out; |
2849 | u32 prior_sacked = tp->sacked_out; | ||
2851 | s32 seq_rtt = -1; | 2850 | s32 seq_rtt = -1; |
2852 | s32 ca_seq_rtt = -1; | 2851 | s32 ca_seq_rtt = -1; |
2853 | ktime_t last_ackt = net_invalid_timestamp(); | 2852 | ktime_t last_ackt = net_invalid_timestamp(); |
@@ -2929,7 +2928,11 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets) | |||
2929 | 2928 | ||
2930 | tcp_unlink_write_queue(skb, sk); | 2929 | tcp_unlink_write_queue(skb, sk); |
2931 | sk_wmem_free_skb(sk, skb); | 2930 | sk_wmem_free_skb(sk, skb); |
2932 | tcp_clear_all_retrans_hints(tp); | 2931 | tp->scoreboard_skb_hint = NULL; |
2932 | if (skb == tp->retransmit_skb_hint) | ||
2933 | tp->retransmit_skb_hint = NULL; | ||
2934 | if (skb == tp->lost_skb_hint) | ||
2935 | tp->lost_skb_hint = NULL; | ||
2933 | } | 2936 | } |
2934 | 2937 | ||
2935 | if (skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) | 2938 | if (skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) |
@@ -2948,6 +2951,15 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets) | |||
2948 | /* Non-retransmitted hole got filled? That's reordering */ | 2951 | /* Non-retransmitted hole got filled? That's reordering */ |
2949 | if (reord < prior_fackets) | 2952 | if (reord < prior_fackets) |
2950 | tcp_update_reordering(sk, tp->fackets_out - reord, 0); | 2953 | tcp_update_reordering(sk, tp->fackets_out - reord, 0); |
2954 | |||
2955 | /* No need to care for underflows here because | ||
2956 | * the lost_skb_hint gets NULLed if we're past it | ||
2957 | * (or something non-trivial happened) | ||
2958 | */ | ||
2959 | if (tcp_is_fack(tp)) | ||
2960 | tp->lost_cnt_hint -= pkts_acked; | ||
2961 | else | ||
2962 | tp->lost_cnt_hint -= prior_sacked - tp->sacked_out; | ||
2951 | } | 2963 | } |
2952 | 2964 | ||
2953 | tp->fackets_out -= min(pkts_acked, tp->fackets_out); | 2965 | tp->fackets_out -= min(pkts_acked, tp->fackets_out); |
@@ -3442,6 +3454,22 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, | |||
3442 | } | 3454 | } |
3443 | } | 3455 | } |
3444 | 3456 | ||
3457 | static int tcp_parse_aligned_timestamp(struct tcp_sock *tp, struct tcphdr *th) | ||
3458 | { | ||
3459 | __be32 *ptr = (__be32 *)(th + 1); | ||
3460 | |||
3461 | if (*ptr == htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | ||
3462 | | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) { | ||
3463 | tp->rx_opt.saw_tstamp = 1; | ||
3464 | ++ptr; | ||
3465 | tp->rx_opt.rcv_tsval = ntohl(*ptr); | ||
3466 | ++ptr; | ||
3467 | tp->rx_opt.rcv_tsecr = ntohl(*ptr); | ||
3468 | return 1; | ||
3469 | } | ||
3470 | return 0; | ||
3471 | } | ||
3472 | |||
3445 | /* Fast parse options. This hopes to only see timestamps. | 3473 | /* Fast parse options. This hopes to only see timestamps. |
3446 | * If it is wrong it falls back on tcp_parse_options(). | 3474 | * If it is wrong it falls back on tcp_parse_options(). |
3447 | */ | 3475 | */ |
@@ -3453,16 +3481,8 @@ static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th, | |||
3453 | return 0; | 3481 | return 0; |
3454 | } else if (tp->rx_opt.tstamp_ok && | 3482 | } else if (tp->rx_opt.tstamp_ok && |
3455 | th->doff == (sizeof(struct tcphdr)>>2)+(TCPOLEN_TSTAMP_ALIGNED>>2)) { | 3483 | th->doff == (sizeof(struct tcphdr)>>2)+(TCPOLEN_TSTAMP_ALIGNED>>2)) { |
3456 | __be32 *ptr = (__be32 *)(th + 1); | 3484 | if (tcp_parse_aligned_timestamp(tp, th)) |
3457 | if (*ptr == htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | ||
3458 | | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) { | ||
3459 | tp->rx_opt.saw_tstamp = 1; | ||
3460 | ++ptr; | ||
3461 | tp->rx_opt.rcv_tsval = ntohl(*ptr); | ||
3462 | ++ptr; | ||
3463 | tp->rx_opt.rcv_tsecr = ntohl(*ptr); | ||
3464 | return 1; | 3485 | return 1; |
3465 | } | ||
3466 | } | 3486 | } |
3467 | tcp_parse_options(skb, &tp->rx_opt, 1); | 3487 | tcp_parse_options(skb, &tp->rx_opt, 1); |
3468 | return 1; | 3488 | return 1; |
@@ -4138,7 +4158,7 @@ drop: | |||
4138 | skb1 = skb1->prev; | 4158 | skb1 = skb1->prev; |
4139 | } | 4159 | } |
4140 | } | 4160 | } |
4141 | __skb_insert(skb, skb1, skb1->next, &tp->out_of_order_queue); | 4161 | __skb_queue_after(&tp->out_of_order_queue, skb1, skb); |
4142 | 4162 | ||
4143 | /* And clean segments covered by new one as whole. */ | 4163 | /* And clean segments covered by new one as whole. */ |
4144 | while ((skb1 = skb->next) != | 4164 | while ((skb1 = skb->next) != |
@@ -4161,6 +4181,18 @@ add_sack: | |||
4161 | } | 4181 | } |
4162 | } | 4182 | } |
4163 | 4183 | ||
4184 | static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb, | ||
4185 | struct sk_buff_head *list) | ||
4186 | { | ||
4187 | struct sk_buff *next = skb->next; | ||
4188 | |||
4189 | __skb_unlink(skb, list); | ||
4190 | __kfree_skb(skb); | ||
4191 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRCVCOLLAPSED); | ||
4192 | |||
4193 | return next; | ||
4194 | } | ||
4195 | |||
4164 | /* Collapse contiguous sequence of skbs head..tail with | 4196 | /* Collapse contiguous sequence of skbs head..tail with |
4165 | * sequence numbers start..end. | 4197 | * sequence numbers start..end. |
4166 | * Segments with FIN/SYN are not collapsed (only because this | 4198 | * Segments with FIN/SYN are not collapsed (only because this |
@@ -4178,11 +4210,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list, | |||
4178 | for (skb = head; skb != tail;) { | 4210 | for (skb = head; skb != tail;) { |
4179 | /* No new bits? It is possible on ofo queue. */ | 4211 | /* No new bits? It is possible on ofo queue. */ |
4180 | if (!before(start, TCP_SKB_CB(skb)->end_seq)) { | 4212 | if (!before(start, TCP_SKB_CB(skb)->end_seq)) { |
4181 | struct sk_buff *next = skb->next; | 4213 | skb = tcp_collapse_one(sk, skb, list); |
4182 | __skb_unlink(skb, list); | ||
4183 | __kfree_skb(skb); | ||
4184 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRCVCOLLAPSED); | ||
4185 | skb = next; | ||
4186 | continue; | 4214 | continue; |
4187 | } | 4215 | } |
4188 | 4216 | ||
@@ -4228,7 +4256,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list, | |||
4228 | memcpy(nskb->head, skb->head, header); | 4256 | memcpy(nskb->head, skb->head, header); |
4229 | memcpy(nskb->cb, skb->cb, sizeof(skb->cb)); | 4257 | memcpy(nskb->cb, skb->cb, sizeof(skb->cb)); |
4230 | TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start; | 4258 | TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start; |
4231 | __skb_insert(nskb, skb->prev, skb, list); | 4259 | __skb_queue_before(list, skb, nskb); |
4232 | skb_set_owner_r(nskb, sk); | 4260 | skb_set_owner_r(nskb, sk); |
4233 | 4261 | ||
4234 | /* Copy data, releasing collapsed skbs. */ | 4262 | /* Copy data, releasing collapsed skbs. */ |
@@ -4246,11 +4274,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list, | |||
4246 | start += size; | 4274 | start += size; |
4247 | } | 4275 | } |
4248 | if (!before(start, TCP_SKB_CB(skb)->end_seq)) { | 4276 | if (!before(start, TCP_SKB_CB(skb)->end_seq)) { |
4249 | struct sk_buff *next = skb->next; | 4277 | skb = tcp_collapse_one(sk, skb, list); |
4250 | __skb_unlink(skb, list); | ||
4251 | __kfree_skb(skb); | ||
4252 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRCVCOLLAPSED); | ||
4253 | skb = next; | ||
4254 | if (skb == tail || | 4278 | if (skb == tail || |
4255 | tcp_hdr(skb)->syn || | 4279 | tcp_hdr(skb)->syn || |
4256 | tcp_hdr(skb)->fin) | 4280 | tcp_hdr(skb)->fin) |
@@ -4691,6 +4715,67 @@ out: | |||
4691 | } | 4715 | } |
4692 | #endif /* CONFIG_NET_DMA */ | 4716 | #endif /* CONFIG_NET_DMA */ |
4693 | 4717 | ||
4718 | /* Does PAWS and seqno based validation of an incoming segment, flags will | ||
4719 | * play significant role here. | ||
4720 | */ | ||
4721 | static int tcp_validate_incoming(struct sock *sk, struct sk_buff *skb, | ||
4722 | struct tcphdr *th, int syn_inerr) | ||
4723 | { | ||
4724 | struct tcp_sock *tp = tcp_sk(sk); | ||
4725 | |||
4726 | /* RFC1323: H1. Apply PAWS check first. */ | ||
4727 | if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp && | ||
4728 | tcp_paws_discard(sk, skb)) { | ||
4729 | if (!th->rst) { | ||
4730 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED); | ||
4731 | tcp_send_dupack(sk, skb); | ||
4732 | goto discard; | ||
4733 | } | ||
4734 | /* Reset is accepted even if it did not pass PAWS. */ | ||
4735 | } | ||
4736 | |||
4737 | /* Step 1: check sequence number */ | ||
4738 | if (!tcp_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq)) { | ||
4739 | /* RFC793, page 37: "In all states except SYN-SENT, all reset | ||
4740 | * (RST) segments are validated by checking their SEQ-fields." | ||
4741 | * And page 69: "If an incoming segment is not acceptable, | ||
4742 | * an acknowledgment should be sent in reply (unless the RST | ||
4743 | * bit is set, if so drop the segment and return)". | ||
4744 | */ | ||
4745 | if (!th->rst) | ||
4746 | tcp_send_dupack(sk, skb); | ||
4747 | goto discard; | ||
4748 | } | ||
4749 | |||
4750 | /* Step 2: check RST bit */ | ||
4751 | if (th->rst) { | ||
4752 | tcp_reset(sk); | ||
4753 | goto discard; | ||
4754 | } | ||
4755 | |||
4756 | /* ts_recent update must be made after we are sure that the packet | ||
4757 | * is in window. | ||
4758 | */ | ||
4759 | tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq); | ||
4760 | |||
4761 | /* step 3: check security and precedence [ignored] */ | ||
4762 | |||
4763 | /* step 4: Check for a SYN in window. */ | ||
4764 | if (th->syn && !before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) { | ||
4765 | if (syn_inerr) | ||
4766 | TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS); | ||
4767 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONSYN); | ||
4768 | tcp_reset(sk); | ||
4769 | return -1; | ||
4770 | } | ||
4771 | |||
4772 | return 1; | ||
4773 | |||
4774 | discard: | ||
4775 | __kfree_skb(skb); | ||
4776 | return 0; | ||
4777 | } | ||
4778 | |||
4694 | /* | 4779 | /* |
4695 | * TCP receive function for the ESTABLISHED state. | 4780 | * TCP receive function for the ESTABLISHED state. |
4696 | * | 4781 | * |
@@ -4718,6 +4803,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, | |||
4718 | struct tcphdr *th, unsigned len) | 4803 | struct tcphdr *th, unsigned len) |
4719 | { | 4804 | { |
4720 | struct tcp_sock *tp = tcp_sk(sk); | 4805 | struct tcp_sock *tp = tcp_sk(sk); |
4806 | int res; | ||
4721 | 4807 | ||
4722 | /* | 4808 | /* |
4723 | * Header prediction. | 4809 | * Header prediction. |
@@ -4756,19 +4842,10 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, | |||
4756 | 4842 | ||
4757 | /* Check timestamp */ | 4843 | /* Check timestamp */ |
4758 | if (tcp_header_len == sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) { | 4844 | if (tcp_header_len == sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) { |
4759 | __be32 *ptr = (__be32 *)(th + 1); | ||
4760 | |||
4761 | /* No? Slow path! */ | 4845 | /* No? Slow path! */ |
4762 | if (*ptr != htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | 4846 | if (!tcp_parse_aligned_timestamp(tp, th)) |
4763 | | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) | ||
4764 | goto slow_path; | 4847 | goto slow_path; |
4765 | 4848 | ||
4766 | tp->rx_opt.saw_tstamp = 1; | ||
4767 | ++ptr; | ||
4768 | tp->rx_opt.rcv_tsval = ntohl(*ptr); | ||
4769 | ++ptr; | ||
4770 | tp->rx_opt.rcv_tsecr = ntohl(*ptr); | ||
4771 | |||
4772 | /* If PAWS failed, check it more carefully in slow path */ | 4849 | /* If PAWS failed, check it more carefully in slow path */ |
4773 | if ((s32)(tp->rx_opt.rcv_tsval - tp->rx_opt.ts_recent) < 0) | 4850 | if ((s32)(tp->rx_opt.rcv_tsval - tp->rx_opt.ts_recent) < 0) |
4774 | goto slow_path; | 4851 | goto slow_path; |
@@ -4899,51 +4976,12 @@ slow_path: | |||
4899 | goto csum_error; | 4976 | goto csum_error; |
4900 | 4977 | ||
4901 | /* | 4978 | /* |
4902 | * RFC1323: H1. Apply PAWS check first. | ||
4903 | */ | ||
4904 | if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp && | ||
4905 | tcp_paws_discard(sk, skb)) { | ||
4906 | if (!th->rst) { | ||
4907 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED); | ||
4908 | tcp_send_dupack(sk, skb); | ||
4909 | goto discard; | ||
4910 | } | ||
4911 | /* Resets are accepted even if PAWS failed. | ||
4912 | |||
4913 | ts_recent update must be made after we are sure | ||
4914 | that the packet is in window. | ||
4915 | */ | ||
4916 | } | ||
4917 | |||
4918 | /* | ||
4919 | * Standard slow path. | 4979 | * Standard slow path. |
4920 | */ | 4980 | */ |
4921 | 4981 | ||
4922 | if (!tcp_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq)) { | 4982 | res = tcp_validate_incoming(sk, skb, th, 1); |
4923 | /* RFC793, page 37: "In all states except SYN-SENT, all reset | 4983 | if (res <= 0) |
4924 | * (RST) segments are validated by checking their SEQ-fields." | 4984 | return -res; |
4925 | * And page 69: "If an incoming segment is not acceptable, | ||
4926 | * an acknowledgment should be sent in reply (unless the RST bit | ||
4927 | * is set, if so drop the segment and return)". | ||
4928 | */ | ||
4929 | if (!th->rst) | ||
4930 | tcp_send_dupack(sk, skb); | ||
4931 | goto discard; | ||
4932 | } | ||
4933 | |||
4934 | if (th->rst) { | ||
4935 | tcp_reset(sk); | ||
4936 | goto discard; | ||
4937 | } | ||
4938 | |||
4939 | tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq); | ||
4940 | |||
4941 | if (th->syn && !before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) { | ||
4942 | TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS); | ||
4943 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONSYN); | ||
4944 | tcp_reset(sk); | ||
4945 | return 1; | ||
4946 | } | ||
4947 | 4985 | ||
4948 | step5: | 4986 | step5: |
4949 | if (th->ack) | 4987 | if (th->ack) |
@@ -5225,6 +5263,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | |||
5225 | struct tcp_sock *tp = tcp_sk(sk); | 5263 | struct tcp_sock *tp = tcp_sk(sk); |
5226 | struct inet_connection_sock *icsk = inet_csk(sk); | 5264 | struct inet_connection_sock *icsk = inet_csk(sk); |
5227 | int queued = 0; | 5265 | int queued = 0; |
5266 | int res; | ||
5228 | 5267 | ||
5229 | tp->rx_opt.saw_tstamp = 0; | 5268 | tp->rx_opt.saw_tstamp = 0; |
5230 | 5269 | ||
@@ -5277,42 +5316,9 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | |||
5277 | return 0; | 5316 | return 0; |
5278 | } | 5317 | } |
5279 | 5318 | ||
5280 | if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp && | 5319 | res = tcp_validate_incoming(sk, skb, th, 0); |
5281 | tcp_paws_discard(sk, skb)) { | 5320 | if (res <= 0) |
5282 | if (!th->rst) { | 5321 | return -res; |
5283 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED); | ||
5284 | tcp_send_dupack(sk, skb); | ||
5285 | goto discard; | ||
5286 | } | ||
5287 | /* Reset is accepted even if it did not pass PAWS. */ | ||
5288 | } | ||
5289 | |||
5290 | /* step 1: check sequence number */ | ||
5291 | if (!tcp_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq)) { | ||
5292 | if (!th->rst) | ||
5293 | tcp_send_dupack(sk, skb); | ||
5294 | goto discard; | ||
5295 | } | ||
5296 | |||
5297 | /* step 2: check RST bit */ | ||
5298 | if (th->rst) { | ||
5299 | tcp_reset(sk); | ||
5300 | goto discard; | ||
5301 | } | ||
5302 | |||
5303 | tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq); | ||
5304 | |||
5305 | /* step 3: check security and precedence [ignored] */ | ||
5306 | |||
5307 | /* step 4: | ||
5308 | * | ||
5309 | * Check for a SYN in window. | ||
5310 | */ | ||
5311 | if (th->syn && !before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) { | ||
5312 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONSYN); | ||
5313 | tcp_reset(sk); | ||
5314 | return 1; | ||
5315 | } | ||
5316 | 5322 | ||
5317 | /* step 5: check the ACK field */ | 5323 | /* step 5: check the ACK field */ |
5318 | if (th->ack) { | 5324 | if (th->ack) { |
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 44c1e934824b..44aef1c1f373 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c | |||
@@ -1364,6 +1364,10 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, | |||
1364 | tcp_mtup_init(newsk); | 1364 | tcp_mtup_init(newsk); |
1365 | tcp_sync_mss(newsk, dst_mtu(dst)); | 1365 | tcp_sync_mss(newsk, dst_mtu(dst)); |
1366 | newtp->advmss = dst_metric(dst, RTAX_ADVMSS); | 1366 | newtp->advmss = dst_metric(dst, RTAX_ADVMSS); |
1367 | if (tcp_sk(sk)->rx_opt.user_mss && | ||
1368 | tcp_sk(sk)->rx_opt.user_mss < newtp->advmss) | ||
1369 | newtp->advmss = tcp_sk(sk)->rx_opt.user_mss; | ||
1370 | |||
1367 | tcp_initialize_rcv_mss(newsk); | 1371 | tcp_initialize_rcv_mss(newsk); |
1368 | 1372 | ||
1369 | #ifdef CONFIG_TCP_MD5SIG | 1373 | #ifdef CONFIG_TCP_MD5SIG |
@@ -1946,6 +1950,12 @@ static void *listening_get_idx(struct seq_file *seq, loff_t *pos) | |||
1946 | return rc; | 1950 | return rc; |
1947 | } | 1951 | } |
1948 | 1952 | ||
1953 | static inline int empty_bucket(struct tcp_iter_state *st) | ||
1954 | { | ||
1955 | return hlist_empty(&tcp_hashinfo.ehash[st->bucket].chain) && | ||
1956 | hlist_empty(&tcp_hashinfo.ehash[st->bucket].twchain); | ||
1957 | } | ||
1958 | |||
1949 | static void *established_get_first(struct seq_file *seq) | 1959 | static void *established_get_first(struct seq_file *seq) |
1950 | { | 1960 | { |
1951 | struct tcp_iter_state* st = seq->private; | 1961 | struct tcp_iter_state* st = seq->private; |
@@ -1958,6 +1968,10 @@ static void *established_get_first(struct seq_file *seq) | |||
1958 | struct inet_timewait_sock *tw; | 1968 | struct inet_timewait_sock *tw; |
1959 | rwlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket); | 1969 | rwlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket); |
1960 | 1970 | ||
1971 | /* Lockless fast path for the common case of empty buckets */ | ||
1972 | if (empty_bucket(st)) | ||
1973 | continue; | ||
1974 | |||
1961 | read_lock_bh(lock); | 1975 | read_lock_bh(lock); |
1962 | sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) { | 1976 | sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) { |
1963 | if (sk->sk_family != st->family || | 1977 | if (sk->sk_family != st->family || |
@@ -2008,13 +2022,15 @@ get_tw: | |||
2008 | read_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); | 2022 | read_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); |
2009 | st->state = TCP_SEQ_STATE_ESTABLISHED; | 2023 | st->state = TCP_SEQ_STATE_ESTABLISHED; |
2010 | 2024 | ||
2011 | if (++st->bucket < tcp_hashinfo.ehash_size) { | 2025 | /* Look for next non empty bucket */ |
2012 | read_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); | 2026 | while (++st->bucket < tcp_hashinfo.ehash_size && |
2013 | sk = sk_head(&tcp_hashinfo.ehash[st->bucket].chain); | 2027 | empty_bucket(st)) |
2014 | } else { | 2028 | ; |
2015 | cur = NULL; | 2029 | if (st->bucket >= tcp_hashinfo.ehash_size) |
2016 | goto out; | 2030 | return NULL; |
2017 | } | 2031 | |
2032 | read_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); | ||
2033 | sk = sk_head(&tcp_hashinfo.ehash[st->bucket].chain); | ||
2018 | } else | 2034 | } else |
2019 | sk = sk_next(sk); | 2035 | sk = sk_next(sk); |
2020 | 2036 | ||
@@ -2376,6 +2392,7 @@ static int __net_init tcp_sk_init(struct net *net) | |||
2376 | static void __net_exit tcp_sk_exit(struct net *net) | 2392 | static void __net_exit tcp_sk_exit(struct net *net) |
2377 | { | 2393 | { |
2378 | inet_ctl_sock_destroy(net->ipv4.tcp_sock); | 2394 | inet_ctl_sock_destroy(net->ipv4.tcp_sock); |
2395 | inet_twsk_purge(net, &tcp_hashinfo, &tcp_death_row, AF_INET); | ||
2379 | } | 2396 | } |
2380 | 2397 | ||
2381 | static struct pernet_operations __net_initdata tcp_sk_ops = { | 2398 | static struct pernet_operations __net_initdata tcp_sk_ops = { |
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 8165f5aa8c71..a8499ef3234a 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c | |||
@@ -1824,6 +1824,8 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, | |||
1824 | 1824 | ||
1825 | /* changed transmit queue under us so clear hints */ | 1825 | /* changed transmit queue under us so clear hints */ |
1826 | tcp_clear_retrans_hints_partial(tp); | 1826 | tcp_clear_retrans_hints_partial(tp); |
1827 | if (next_skb == tp->retransmit_skb_hint) | ||
1828 | tp->retransmit_skb_hint = skb; | ||
1827 | 1829 | ||
1828 | sk_wmem_free_skb(sk, next_skb); | 1830 | sk_wmem_free_skb(sk, next_skb); |
1829 | } | 1831 | } |
@@ -1838,7 +1840,7 @@ void tcp_simple_retransmit(struct sock *sk) | |||
1838 | struct tcp_sock *tp = tcp_sk(sk); | 1840 | struct tcp_sock *tp = tcp_sk(sk); |
1839 | struct sk_buff *skb; | 1841 | struct sk_buff *skb; |
1840 | unsigned int mss = tcp_current_mss(sk, 0); | 1842 | unsigned int mss = tcp_current_mss(sk, 0); |
1841 | int lost = 0; | 1843 | u32 prior_lost = tp->lost_out; |
1842 | 1844 | ||
1843 | tcp_for_write_queue(skb, sk) { | 1845 | tcp_for_write_queue(skb, sk) { |
1844 | if (skb == tcp_send_head(sk)) | 1846 | if (skb == tcp_send_head(sk)) |
@@ -1849,17 +1851,13 @@ void tcp_simple_retransmit(struct sock *sk) | |||
1849 | TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; | 1851 | TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; |
1850 | tp->retrans_out -= tcp_skb_pcount(skb); | 1852 | tp->retrans_out -= tcp_skb_pcount(skb); |
1851 | } | 1853 | } |
1852 | if (!(TCP_SKB_CB(skb)->sacked & TCPCB_LOST)) { | 1854 | tcp_skb_mark_lost_uncond_verify(tp, skb); |
1853 | TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; | ||
1854 | tp->lost_out += tcp_skb_pcount(skb); | ||
1855 | lost = 1; | ||
1856 | } | ||
1857 | } | 1855 | } |
1858 | } | 1856 | } |
1859 | 1857 | ||
1860 | tcp_clear_all_retrans_hints(tp); | 1858 | tcp_clear_retrans_hints_partial(tp); |
1861 | 1859 | ||
1862 | if (!lost) | 1860 | if (prior_lost == tp->lost_out) |
1863 | return; | 1861 | return; |
1864 | 1862 | ||
1865 | if (tcp_is_reno(tp)) | 1863 | if (tcp_is_reno(tp)) |
@@ -1934,8 +1932,8 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) | |||
1934 | /* Collapse two adjacent packets if worthwhile and we can. */ | 1932 | /* Collapse two adjacent packets if worthwhile and we can. */ |
1935 | if (!(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_SYN) && | 1933 | if (!(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_SYN) && |
1936 | (skb->len < (cur_mss >> 1)) && | 1934 | (skb->len < (cur_mss >> 1)) && |
1937 | (tcp_write_queue_next(sk, skb) != tcp_send_head(sk)) && | ||
1938 | (!tcp_skb_is_last(sk, skb)) && | 1935 | (!tcp_skb_is_last(sk, skb)) && |
1936 | (tcp_write_queue_next(sk, skb) != tcp_send_head(sk)) && | ||
1939 | (skb_shinfo(skb)->nr_frags == 0 && | 1937 | (skb_shinfo(skb)->nr_frags == 0 && |
1940 | skb_shinfo(tcp_write_queue_next(sk, skb))->nr_frags == 0) && | 1938 | skb_shinfo(tcp_write_queue_next(sk, skb))->nr_frags == 0) && |
1941 | (tcp_skb_pcount(skb) == 1 && | 1939 | (tcp_skb_pcount(skb) == 1 && |
@@ -1996,86 +1994,18 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) | |||
1996 | return err; | 1994 | return err; |
1997 | } | 1995 | } |
1998 | 1996 | ||
1999 | /* This gets called after a retransmit timeout, and the initially | 1997 | static int tcp_can_forward_retransmit(struct sock *sk) |
2000 | * retransmitted data is acknowledged. It tries to continue | ||
2001 | * resending the rest of the retransmit queue, until either | ||
2002 | * we've sent it all or the congestion window limit is reached. | ||
2003 | * If doing SACK, the first ACK which comes back for a timeout | ||
2004 | * based retransmit packet might feed us FACK information again. | ||
2005 | * If so, we use it to avoid unnecessarily retransmissions. | ||
2006 | */ | ||
2007 | void tcp_xmit_retransmit_queue(struct sock *sk) | ||
2008 | { | 1998 | { |
2009 | const struct inet_connection_sock *icsk = inet_csk(sk); | 1999 | const struct inet_connection_sock *icsk = inet_csk(sk); |
2010 | struct tcp_sock *tp = tcp_sk(sk); | 2000 | struct tcp_sock *tp = tcp_sk(sk); |
2011 | struct sk_buff *skb; | ||
2012 | int packet_cnt; | ||
2013 | |||
2014 | if (tp->retransmit_skb_hint) { | ||
2015 | skb = tp->retransmit_skb_hint; | ||
2016 | packet_cnt = tp->retransmit_cnt_hint; | ||
2017 | } else { | ||
2018 | skb = tcp_write_queue_head(sk); | ||
2019 | packet_cnt = 0; | ||
2020 | } | ||
2021 | |||
2022 | /* First pass: retransmit lost packets. */ | ||
2023 | if (tp->lost_out) { | ||
2024 | tcp_for_write_queue_from(skb, sk) { | ||
2025 | __u8 sacked = TCP_SKB_CB(skb)->sacked; | ||
2026 | |||
2027 | if (skb == tcp_send_head(sk)) | ||
2028 | break; | ||
2029 | /* we could do better than to assign each time */ | ||
2030 | tp->retransmit_skb_hint = skb; | ||
2031 | tp->retransmit_cnt_hint = packet_cnt; | ||
2032 | |||
2033 | /* Assume this retransmit will generate | ||
2034 | * only one packet for congestion window | ||
2035 | * calculation purposes. This works because | ||
2036 | * tcp_retransmit_skb() will chop up the | ||
2037 | * packet to be MSS sized and all the | ||
2038 | * packet counting works out. | ||
2039 | */ | ||
2040 | if (tcp_packets_in_flight(tp) >= tp->snd_cwnd) | ||
2041 | return; | ||
2042 | |||
2043 | if (sacked & TCPCB_LOST) { | ||
2044 | if (!(sacked & (TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))) { | ||
2045 | int mib_idx; | ||
2046 | |||
2047 | if (tcp_retransmit_skb(sk, skb)) { | ||
2048 | tp->retransmit_skb_hint = NULL; | ||
2049 | return; | ||
2050 | } | ||
2051 | if (icsk->icsk_ca_state != TCP_CA_Loss) | ||
2052 | mib_idx = LINUX_MIB_TCPFASTRETRANS; | ||
2053 | else | ||
2054 | mib_idx = LINUX_MIB_TCPSLOWSTARTRETRANS; | ||
2055 | NET_INC_STATS_BH(sock_net(sk), mib_idx); | ||
2056 | |||
2057 | if (skb == tcp_write_queue_head(sk)) | ||
2058 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, | ||
2059 | inet_csk(sk)->icsk_rto, | ||
2060 | TCP_RTO_MAX); | ||
2061 | } | ||
2062 | |||
2063 | packet_cnt += tcp_skb_pcount(skb); | ||
2064 | if (packet_cnt >= tp->lost_out) | ||
2065 | break; | ||
2066 | } | ||
2067 | } | ||
2068 | } | ||
2069 | |||
2070 | /* OK, demanded retransmission is finished. */ | ||
2071 | 2001 | ||
2072 | /* Forward retransmissions are possible only during Recovery. */ | 2002 | /* Forward retransmissions are possible only during Recovery. */ |
2073 | if (icsk->icsk_ca_state != TCP_CA_Recovery) | 2003 | if (icsk->icsk_ca_state != TCP_CA_Recovery) |
2074 | return; | 2004 | return 0; |
2075 | 2005 | ||
2076 | /* No forward retransmissions in Reno are possible. */ | 2006 | /* No forward retransmissions in Reno are possible. */ |
2077 | if (tcp_is_reno(tp)) | 2007 | if (tcp_is_reno(tp)) |
2078 | return; | 2008 | return 0; |
2079 | 2009 | ||
2080 | /* Yeah, we have to make difficult choice between forward transmission | 2010 | /* Yeah, we have to make difficult choice between forward transmission |
2081 | * and retransmission... Both ways have their merits... | 2011 | * and retransmission... Both ways have their merits... |
@@ -2086,43 +2016,104 @@ void tcp_xmit_retransmit_queue(struct sock *sk) | |||
2086 | */ | 2016 | */ |
2087 | 2017 | ||
2088 | if (tcp_may_send_now(sk)) | 2018 | if (tcp_may_send_now(sk)) |
2089 | return; | 2019 | return 0; |
2090 | 2020 | ||
2091 | /* If nothing is SACKed, highest_sack in the loop won't be valid */ | 2021 | return 1; |
2092 | if (!tp->sacked_out) | 2022 | } |
2093 | return; | ||
2094 | 2023 | ||
2095 | if (tp->forward_skb_hint) | 2024 | /* This gets called after a retransmit timeout, and the initially |
2096 | skb = tp->forward_skb_hint; | 2025 | * retransmitted data is acknowledged. It tries to continue |
2097 | else | 2026 | * resending the rest of the retransmit queue, until either |
2027 | * we've sent it all or the congestion window limit is reached. | ||
2028 | * If doing SACK, the first ACK which comes back for a timeout | ||
2029 | * based retransmit packet might feed us FACK information again. | ||
2030 | * If so, we use it to avoid unnecessarily retransmissions. | ||
2031 | */ | ||
2032 | void tcp_xmit_retransmit_queue(struct sock *sk) | ||
2033 | { | ||
2034 | const struct inet_connection_sock *icsk = inet_csk(sk); | ||
2035 | struct tcp_sock *tp = tcp_sk(sk); | ||
2036 | struct sk_buff *skb; | ||
2037 | struct sk_buff *hole = NULL; | ||
2038 | u32 last_lost; | ||
2039 | int mib_idx; | ||
2040 | int fwd_rexmitting = 0; | ||
2041 | |||
2042 | if (!tp->lost_out) | ||
2043 | tp->retransmit_high = tp->snd_una; | ||
2044 | |||
2045 | if (tp->retransmit_skb_hint) { | ||
2046 | skb = tp->retransmit_skb_hint; | ||
2047 | last_lost = TCP_SKB_CB(skb)->end_seq; | ||
2048 | if (after(last_lost, tp->retransmit_high)) | ||
2049 | last_lost = tp->retransmit_high; | ||
2050 | } else { | ||
2098 | skb = tcp_write_queue_head(sk); | 2051 | skb = tcp_write_queue_head(sk); |
2052 | last_lost = tp->snd_una; | ||
2053 | } | ||
2099 | 2054 | ||
2055 | /* First pass: retransmit lost packets. */ | ||
2100 | tcp_for_write_queue_from(skb, sk) { | 2056 | tcp_for_write_queue_from(skb, sk) { |
2101 | if (skb == tcp_send_head(sk)) | 2057 | __u8 sacked = TCP_SKB_CB(skb)->sacked; |
2102 | break; | ||
2103 | tp->forward_skb_hint = skb; | ||
2104 | 2058 | ||
2105 | if (!before(TCP_SKB_CB(skb)->seq, tcp_highest_sack_seq(tp))) | 2059 | if (skb == tcp_send_head(sk)) |
2106 | break; | 2060 | break; |
2061 | /* we could do better than to assign each time */ | ||
2062 | if (hole == NULL) | ||
2063 | tp->retransmit_skb_hint = skb; | ||
2107 | 2064 | ||
2065 | /* Assume this retransmit will generate | ||
2066 | * only one packet for congestion window | ||
2067 | * calculation purposes. This works because | ||
2068 | * tcp_retransmit_skb() will chop up the | ||
2069 | * packet to be MSS sized and all the | ||
2070 | * packet counting works out. | ||
2071 | */ | ||
2108 | if (tcp_packets_in_flight(tp) >= tp->snd_cwnd) | 2072 | if (tcp_packets_in_flight(tp) >= tp->snd_cwnd) |
2109 | break; | 2073 | return; |
2074 | |||
2075 | if (fwd_rexmitting) { | ||
2076 | begin_fwd: | ||
2077 | if (!before(TCP_SKB_CB(skb)->seq, tcp_highest_sack_seq(tp))) | ||
2078 | break; | ||
2079 | mib_idx = LINUX_MIB_TCPFORWARDRETRANS; | ||
2080 | |||
2081 | } else if (!before(TCP_SKB_CB(skb)->seq, tp->retransmit_high)) { | ||
2082 | tp->retransmit_high = last_lost; | ||
2083 | if (!tcp_can_forward_retransmit(sk)) | ||
2084 | break; | ||
2085 | /* Backtrack if necessary to non-L'ed skb */ | ||
2086 | if (hole != NULL) { | ||
2087 | skb = hole; | ||
2088 | hole = NULL; | ||
2089 | } | ||
2090 | fwd_rexmitting = 1; | ||
2091 | goto begin_fwd; | ||
2110 | 2092 | ||
2111 | if (TCP_SKB_CB(skb)->sacked & TCPCB_TAGBITS) | 2093 | } else if (!(sacked & TCPCB_LOST)) { |
2094 | if (hole == NULL && !(sacked & TCPCB_SACKED_RETRANS)) | ||
2095 | hole = skb; | ||
2112 | continue; | 2096 | continue; |
2113 | 2097 | ||
2114 | /* Ok, retransmit it. */ | 2098 | } else { |
2115 | if (tcp_retransmit_skb(sk, skb)) { | 2099 | last_lost = TCP_SKB_CB(skb)->end_seq; |
2116 | tp->forward_skb_hint = NULL; | 2100 | if (icsk->icsk_ca_state != TCP_CA_Loss) |
2117 | break; | 2101 | mib_idx = LINUX_MIB_TCPFASTRETRANS; |
2102 | else | ||
2103 | mib_idx = LINUX_MIB_TCPSLOWSTARTRETRANS; | ||
2118 | } | 2104 | } |
2119 | 2105 | ||
2106 | if (sacked & (TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS)) | ||
2107 | continue; | ||
2108 | |||
2109 | if (tcp_retransmit_skb(sk, skb)) | ||
2110 | return; | ||
2111 | NET_INC_STATS_BH(sock_net(sk), mib_idx); | ||
2112 | |||
2120 | if (skb == tcp_write_queue_head(sk)) | 2113 | if (skb == tcp_write_queue_head(sk)) |
2121 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, | 2114 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, |
2122 | inet_csk(sk)->icsk_rto, | 2115 | inet_csk(sk)->icsk_rto, |
2123 | TCP_RTO_MAX); | 2116 | TCP_RTO_MAX); |
2124 | |||
2125 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFORWARDRETRANS); | ||
2126 | } | 2117 | } |
2127 | } | 2118 | } |
2128 | 2119 | ||
@@ -2241,6 +2232,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, | |||
2241 | struct sk_buff *skb; | 2232 | struct sk_buff *skb; |
2242 | struct tcp_md5sig_key *md5; | 2233 | struct tcp_md5sig_key *md5; |
2243 | __u8 *md5_hash_location; | 2234 | __u8 *md5_hash_location; |
2235 | int mss; | ||
2244 | 2236 | ||
2245 | skb = sock_wmalloc(sk, MAX_TCP_HEADER + 15, 1, GFP_ATOMIC); | 2237 | skb = sock_wmalloc(sk, MAX_TCP_HEADER + 15, 1, GFP_ATOMIC); |
2246 | if (skb == NULL) | 2238 | if (skb == NULL) |
@@ -2251,13 +2243,17 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, | |||
2251 | 2243 | ||
2252 | skb->dst = dst_clone(dst); | 2244 | skb->dst = dst_clone(dst); |
2253 | 2245 | ||
2246 | mss = dst_metric(dst, RTAX_ADVMSS); | ||
2247 | if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss) | ||
2248 | mss = tp->rx_opt.user_mss; | ||
2249 | |||
2254 | if (req->rcv_wnd == 0) { /* ignored for retransmitted syns */ | 2250 | if (req->rcv_wnd == 0) { /* ignored for retransmitted syns */ |
2255 | __u8 rcv_wscale; | 2251 | __u8 rcv_wscale; |
2256 | /* Set this up on the first call only */ | 2252 | /* Set this up on the first call only */ |
2257 | req->window_clamp = tp->window_clamp ? : dst_metric(dst, RTAX_WINDOW); | 2253 | req->window_clamp = tp->window_clamp ? : dst_metric(dst, RTAX_WINDOW); |
2258 | /* tcp_full_space because it is guaranteed to be the first packet */ | 2254 | /* tcp_full_space because it is guaranteed to be the first packet */ |
2259 | tcp_select_initial_window(tcp_full_space(sk), | 2255 | tcp_select_initial_window(tcp_full_space(sk), |
2260 | dst_metric(dst, RTAX_ADVMSS) - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0), | 2256 | mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0), |
2261 | &req->rcv_wnd, | 2257 | &req->rcv_wnd, |
2262 | &req->window_clamp, | 2258 | &req->window_clamp, |
2263 | ireq->wscale_ok, | 2259 | ireq->wscale_ok, |
@@ -2267,8 +2263,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, | |||
2267 | 2263 | ||
2268 | memset(&opts, 0, sizeof(opts)); | 2264 | memset(&opts, 0, sizeof(opts)); |
2269 | TCP_SKB_CB(skb)->when = tcp_time_stamp; | 2265 | TCP_SKB_CB(skb)->when = tcp_time_stamp; |
2270 | tcp_header_size = tcp_synack_options(sk, req, | 2266 | tcp_header_size = tcp_synack_options(sk, req, mss, |
2271 | dst_metric(dst, RTAX_ADVMSS), | ||
2272 | skb, &opts, &md5) + | 2267 | skb, &opts, &md5) + |
2273 | sizeof(struct tcphdr); | 2268 | sizeof(struct tcphdr); |
2274 | 2269 | ||
@@ -2342,6 +2337,9 @@ static void tcp_connect_init(struct sock *sk) | |||
2342 | if (!tp->window_clamp) | 2337 | if (!tp->window_clamp) |
2343 | tp->window_clamp = dst_metric(dst, RTAX_WINDOW); | 2338 | tp->window_clamp = dst_metric(dst, RTAX_WINDOW); |
2344 | tp->advmss = dst_metric(dst, RTAX_ADVMSS); | 2339 | tp->advmss = dst_metric(dst, RTAX_ADVMSS); |
2340 | if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < tp->advmss) | ||
2341 | tp->advmss = tp->rx_opt.user_mss; | ||
2342 | |||
2345 | tcp_initialize_rcv_mss(sk); | 2343 | tcp_initialize_rcv_mss(sk); |
2346 | 2344 | ||
2347 | tcp_select_initial_window(tcp_full_space(sk), | 2345 | tcp_select_initial_window(tcp_full_space(sk), |