diff options
-rw-r--r-- | Documentation/networking/ip-sysctl.txt | 23 | ||||
-rw-r--r-- | include/linux/sysctl.h | 1 | ||||
-rw-r--r-- | include/net/inetpeer.h | 1 | ||||
-rw-r--r-- | include/net/ip.h | 2 | ||||
-rw-r--r-- | net/ipv4/inetpeer.c | 1 | ||||
-rw-r--r-- | net/ipv4/ip_fragment.c | 68 | ||||
-rw-r--r-- | net/ipv4/ip_output.c | 1 | ||||
-rw-r--r-- | net/ipv4/sysctl_net_ipv4.c | 10 |
8 files changed, 106 insertions, 1 deletions
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index ebc09a159f62..2b7cf19a06ad 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt | |||
@@ -46,6 +46,29 @@ ipfrag_secret_interval - INTEGER | |||
46 | for the hash secret) for IP fragments. | 46 | for the hash secret) for IP fragments. |
47 | Default: 600 | 47 | Default: 600 |
48 | 48 | ||
49 | ipfrag_max_dist - INTEGER | ||
50 | ipfrag_max_dist is a non-negative integer value which defines the | ||
51 | maximum "disorder" which is allowed among fragments which share a | ||
52 | common IP source address. Note that reordering of packets is | ||
53 | not unusual, but if a large number of fragments arrive from a source | ||
54 | IP address while a particular fragment queue remains incomplete, it | ||
55 | probably indicates that one or more fragments belonging to that queue | ||
56 | have been lost. When ipfrag_max_dist is positive, an additional check | ||
57 | is done on fragments before they are added to a reassembly queue - if | ||
58 | ipfrag_max_dist (or more) fragments have arrived from a particular IP | ||
59 | address between additions to any IP fragment queue using that source | ||
60 | address, it's presumed that one or more fragments in the queue are | ||
61 | lost. The existing fragment queue will be dropped, and a new one | ||
62 | started. An ipfrag_max_dist value of zero disables this check. | ||
63 | |||
64 | Using a very small value, e.g. 1 or 2, for ipfrag_max_dist can | ||
65 | result in unnecessarily dropping fragment queues when normal | ||
66 | reordering of packets occurs, which could lead to poor application | ||
67 | performance. Using a very large value, e.g. 50000, increases the | ||
68 | likelihood of incorrectly reassembling IP fragments that originate | ||
69 | from different IP datagrams, which could result in data corruption. | ||
70 | Default: 64 | ||
71 | |||
49 | INET peer storage: | 72 | INET peer storage: |
50 | 73 | ||
51 | inet_peer_threshold - INTEGER | 74 | inet_peer_threshold - INTEGER |
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 4be34ef8c2f7..93fa765e47d3 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h | |||
@@ -390,6 +390,7 @@ enum | |||
390 | NET_IPV4_ICMP_ERRORS_USE_INBOUND_IFADDR=109, | 390 | NET_IPV4_ICMP_ERRORS_USE_INBOUND_IFADDR=109, |
391 | NET_TCP_CONG_CONTROL=110, | 391 | NET_TCP_CONG_CONTROL=110, |
392 | NET_TCP_ABC=111, | 392 | NET_TCP_ABC=111, |
393 | NET_IPV4_IPFRAG_MAX_DIST=112, | ||
393 | }; | 394 | }; |
394 | 395 | ||
395 | enum { | 396 | enum { |
diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h index 7fda471002b6..0965515f40cf 100644 --- a/include/net/inetpeer.h +++ b/include/net/inetpeer.h | |||
@@ -25,6 +25,7 @@ struct inet_peer | |||
25 | __u32 v4daddr; /* peer's address */ | 25 | __u32 v4daddr; /* peer's address */ |
26 | __u16 avl_height; | 26 | __u16 avl_height; |
27 | __u16 ip_id_count; /* IP ID for the next packet */ | 27 | __u16 ip_id_count; /* IP ID for the next packet */ |
28 | atomic_t rid; /* Frag reception counter */ | ||
28 | __u32 tcp_ts; | 29 | __u32 tcp_ts; |
29 | unsigned long tcp_ts_stamp; | 30 | unsigned long tcp_ts_stamp; |
30 | }; | 31 | }; |
diff --git a/include/net/ip.h b/include/net/ip.h index e4563bbee6ea..4d6294ba038e 100644 --- a/include/net/ip.h +++ b/include/net/ip.h | |||
@@ -45,6 +45,7 @@ struct inet_skb_parm | |||
45 | #define IPSKB_TRANSLATED 2 | 45 | #define IPSKB_TRANSLATED 2 |
46 | #define IPSKB_FORWARDED 4 | 46 | #define IPSKB_FORWARDED 4 |
47 | #define IPSKB_XFRM_TUNNEL_SIZE 8 | 47 | #define IPSKB_XFRM_TUNNEL_SIZE 8 |
48 | #define IPSKB_FRAG_COMPLETE 16 | ||
48 | }; | 49 | }; |
49 | 50 | ||
50 | struct ipcm_cookie | 51 | struct ipcm_cookie |
@@ -168,6 +169,7 @@ extern int sysctl_ipfrag_high_thresh; | |||
168 | extern int sysctl_ipfrag_low_thresh; | 169 | extern int sysctl_ipfrag_low_thresh; |
169 | extern int sysctl_ipfrag_time; | 170 | extern int sysctl_ipfrag_time; |
170 | extern int sysctl_ipfrag_secret_interval; | 171 | extern int sysctl_ipfrag_secret_interval; |
172 | extern int sysctl_ipfrag_max_dist; | ||
171 | 173 | ||
172 | /* From inetpeer.c */ | 174 | /* From inetpeer.c */ |
173 | extern int inet_peer_threshold; | 175 | extern int inet_peer_threshold; |
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index 2fc3fd38924f..ce5fe3f74a3d 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c | |||
@@ -401,6 +401,7 @@ struct inet_peer *inet_getpeer(__u32 daddr, int create) | |||
401 | return NULL; | 401 | return NULL; |
402 | n->v4daddr = daddr; | 402 | n->v4daddr = daddr; |
403 | atomic_set(&n->refcnt, 1); | 403 | atomic_set(&n->refcnt, 1); |
404 | atomic_set(&n->rid, 0); | ||
404 | n->ip_id_count = secure_ip_id(daddr); | 405 | n->ip_id_count = secure_ip_id(daddr); |
405 | n->tcp_ts_stamp = 0; | 406 | n->tcp_ts_stamp = 0; |
406 | 407 | ||
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 8ce0ce2ee48e..ce2b70ce4018 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c | |||
@@ -22,6 +22,7 @@ | |||
22 | * Patrick McHardy : LRU queue of frag heads for evictor. | 22 | * Patrick McHardy : LRU queue of frag heads for evictor. |
23 | */ | 23 | */ |
24 | 24 | ||
25 | #include <linux/compiler.h> | ||
25 | #include <linux/config.h> | 26 | #include <linux/config.h> |
26 | #include <linux/module.h> | 27 | #include <linux/module.h> |
27 | #include <linux/types.h> | 28 | #include <linux/types.h> |
@@ -38,6 +39,7 @@ | |||
38 | #include <net/ip.h> | 39 | #include <net/ip.h> |
39 | #include <net/icmp.h> | 40 | #include <net/icmp.h> |
40 | #include <net/checksum.h> | 41 | #include <net/checksum.h> |
42 | #include <net/inetpeer.h> | ||
41 | #include <linux/tcp.h> | 43 | #include <linux/tcp.h> |
42 | #include <linux/udp.h> | 44 | #include <linux/udp.h> |
43 | #include <linux/inet.h> | 45 | #include <linux/inet.h> |
@@ -56,6 +58,8 @@ | |||
56 | int sysctl_ipfrag_high_thresh = 256*1024; | 58 | int sysctl_ipfrag_high_thresh = 256*1024; |
57 | int sysctl_ipfrag_low_thresh = 192*1024; | 59 | int sysctl_ipfrag_low_thresh = 192*1024; |
58 | 60 | ||
61 | int sysctl_ipfrag_max_dist = 64; | ||
62 | |||
59 | /* Important NOTE! Fragment queue must be destroyed before MSL expires. | 63 | /* Important NOTE! Fragment queue must be destroyed before MSL expires. |
60 | * RFC791 is wrong proposing to prolongate timer each fragment arrival by TTL. | 64 | * RFC791 is wrong proposing to prolongate timer each fragment arrival by TTL. |
61 | */ | 65 | */ |
@@ -89,8 +93,10 @@ struct ipq { | |||
89 | spinlock_t lock; | 93 | spinlock_t lock; |
90 | atomic_t refcnt; | 94 | atomic_t refcnt; |
91 | struct timer_list timer; /* when will this queue expire? */ | 95 | struct timer_list timer; /* when will this queue expire? */ |
92 | int iif; | ||
93 | struct timeval stamp; | 96 | struct timeval stamp; |
97 | int iif; | ||
98 | unsigned int rid; | ||
99 | struct inet_peer *peer; | ||
94 | }; | 100 | }; |
95 | 101 | ||
96 | /* Hash table. */ | 102 | /* Hash table. */ |
@@ -195,6 +201,9 @@ static void ip_frag_destroy(struct ipq *qp, int *work) | |||
195 | BUG_TRAP(qp->last_in&COMPLETE); | 201 | BUG_TRAP(qp->last_in&COMPLETE); |
196 | BUG_TRAP(del_timer(&qp->timer) == 0); | 202 | BUG_TRAP(del_timer(&qp->timer) == 0); |
197 | 203 | ||
204 | if (qp->peer) | ||
205 | inet_putpeer(qp->peer); | ||
206 | |||
198 | /* Release all fragment data. */ | 207 | /* Release all fragment data. */ |
199 | fp = qp->fragments; | 208 | fp = qp->fragments; |
200 | while (fp) { | 209 | while (fp) { |
@@ -353,6 +362,7 @@ static struct ipq *ip_frag_create(unsigned hash, struct iphdr *iph, u32 user) | |||
353 | qp->meat = 0; | 362 | qp->meat = 0; |
354 | qp->fragments = NULL; | 363 | qp->fragments = NULL; |
355 | qp->iif = 0; | 364 | qp->iif = 0; |
365 | qp->peer = sysctl_ipfrag_max_dist ? inet_getpeer(iph->saddr, 1) : NULL; | ||
356 | 366 | ||
357 | /* Initialize a timer for this entry. */ | 367 | /* Initialize a timer for this entry. */ |
358 | init_timer(&qp->timer); | 368 | init_timer(&qp->timer); |
@@ -398,6 +408,56 @@ static inline struct ipq *ip_find(struct iphdr *iph, u32 user) | |||
398 | return ip_frag_create(hash, iph, user); | 408 | return ip_frag_create(hash, iph, user); |
399 | } | 409 | } |
400 | 410 | ||
411 | /* Is the fragment too far ahead to be part of ipq? */ | ||
412 | static inline int ip_frag_too_far(struct ipq *qp) | ||
413 | { | ||
414 | struct inet_peer *peer = qp->peer; | ||
415 | unsigned int max = sysctl_ipfrag_max_dist; | ||
416 | unsigned int start, end; | ||
417 | |||
418 | int rc; | ||
419 | |||
420 | if (!peer || !max) | ||
421 | return 0; | ||
422 | |||
423 | start = qp->rid; | ||
424 | end = atomic_inc_return(&peer->rid); | ||
425 | qp->rid = end; | ||
426 | |||
427 | rc = qp->fragments && (end - start) > max; | ||
428 | |||
429 | if (rc) { | ||
430 | IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS); | ||
431 | } | ||
432 | |||
433 | return rc; | ||
434 | } | ||
435 | |||
436 | static int ip_frag_reinit(struct ipq *qp) | ||
437 | { | ||
438 | struct sk_buff *fp; | ||
439 | |||
440 | if (!mod_timer(&qp->timer, jiffies + sysctl_ipfrag_time)) { | ||
441 | atomic_inc(&qp->refcnt); | ||
442 | return -ETIMEDOUT; | ||
443 | } | ||
444 | |||
445 | fp = qp->fragments; | ||
446 | do { | ||
447 | struct sk_buff *xp = fp->next; | ||
448 | frag_kfree_skb(fp, NULL); | ||
449 | fp = xp; | ||
450 | } while (fp); | ||
451 | |||
452 | qp->last_in = 0; | ||
453 | qp->len = 0; | ||
454 | qp->meat = 0; | ||
455 | qp->fragments = NULL; | ||
456 | qp->iif = 0; | ||
457 | |||
458 | return 0; | ||
459 | } | ||
460 | |||
401 | /* Add new segment to existing queue. */ | 461 | /* Add new segment to existing queue. */ |
402 | static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb) | 462 | static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb) |
403 | { | 463 | { |
@@ -408,6 +468,12 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb) | |||
408 | if (qp->last_in & COMPLETE) | 468 | if (qp->last_in & COMPLETE) |
409 | goto err; | 469 | goto err; |
410 | 470 | ||
471 | if (!(IPCB(skb)->flags & IPSKB_FRAG_COMPLETE) && | ||
472 | unlikely(ip_frag_too_far(qp)) && unlikely(ip_frag_reinit(qp))) { | ||
473 | ipq_kill(qp); | ||
474 | goto err; | ||
475 | } | ||
476 | |||
411 | offset = ntohs(skb->nh.iph->frag_off); | 477 | offset = ntohs(skb->nh.iph->frag_off); |
412 | flags = offset & ~IP_OFFSET; | 478 | flags = offset & ~IP_OFFSET; |
413 | offset &= IP_OFFSET; | 479 | offset &= IP_OFFSET; |
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index eba64e2bd397..2a830de3a699 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c | |||
@@ -445,6 +445,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*)) | |||
445 | 445 | ||
446 | hlen = iph->ihl * 4; | 446 | hlen = iph->ihl * 4; |
447 | mtu = dst_mtu(&rt->u.dst) - hlen; /* Size of data space */ | 447 | mtu = dst_mtu(&rt->u.dst) - hlen; /* Size of data space */ |
448 | IPCB(skb)->flags |= IPSKB_FRAG_COMPLETE; | ||
448 | 449 | ||
449 | /* When frag_list is given, use it. First, check its validity: | 450 | /* When frag_list is given, use it. First, check its validity: |
450 | * some transformers could create wrong frag_list or break existing | 451 | * some transformers could create wrong frag_list or break existing |
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 01444a02b48b..dbf82955aabe 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c | |||
@@ -22,6 +22,7 @@ | |||
22 | extern int sysctl_ip_nonlocal_bind; | 22 | extern int sysctl_ip_nonlocal_bind; |
23 | 23 | ||
24 | #ifdef CONFIG_SYSCTL | 24 | #ifdef CONFIG_SYSCTL |
25 | static int zero; | ||
25 | static int tcp_retr1_max = 255; | 26 | static int tcp_retr1_max = 255; |
26 | static int ip_local_port_range_min[] = { 1, 1 }; | 27 | static int ip_local_port_range_min[] = { 1, 1 }; |
27 | static int ip_local_port_range_max[] = { 65535, 65535 }; | 28 | static int ip_local_port_range_max[] = { 65535, 65535 }; |
@@ -614,6 +615,15 @@ ctl_table ipv4_table[] = { | |||
614 | .strategy = &sysctl_jiffies | 615 | .strategy = &sysctl_jiffies |
615 | }, | 616 | }, |
616 | { | 617 | { |
618 | .ctl_name = NET_IPV4_IPFRAG_MAX_DIST, | ||
619 | .procname = "ipfrag_max_dist", | ||
620 | .data = &sysctl_ipfrag_max_dist, | ||
621 | .maxlen = sizeof(int), | ||
622 | .mode = 0644, | ||
623 | .proc_handler = &proc_dointvec_minmax, | ||
624 | .extra1 = &zero | ||
625 | }, | ||
626 | { | ||
617 | .ctl_name = NET_TCP_NO_METRICS_SAVE, | 627 | .ctl_name = NET_TCP_NO_METRICS_SAVE, |
618 | .procname = "tcp_no_metrics_save", | 628 | .procname = "tcp_no_metrics_save", |
619 | .data = &sysctl_tcp_nometrics_save, | 629 | .data = &sysctl_tcp_nometrics_save, |