aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/Makefile3
-rw-r--r--net/ipv4/inet_fragment.c44
-rw-r--r--net/ipv4/ip_fragment.c109
-rw-r--r--net/ipv4/proc.c4
4 files changed, 101 insertions, 59 deletions
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index a02c36d0a13e..93fe3966805d 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -10,7 +10,8 @@ obj-y := route.o inetpeer.o protocol.o \
10 tcp_minisocks.o tcp_cong.o \ 10 tcp_minisocks.o tcp_cong.o \
11 datagram.o raw.o udp.o udplite.o \ 11 datagram.o raw.o udp.o udplite.o \
12 arp.o icmp.o devinet.o af_inet.o igmp.o \ 12 arp.o icmp.o devinet.o af_inet.o igmp.o \
13 sysctl_net_ipv4.o fib_frontend.o fib_semantics.o 13 sysctl_net_ipv4.o fib_frontend.o fib_semantics.o \
14 inet_fragment.o
14 15
15obj-$(CONFIG_IP_FIB_HASH) += fib_hash.o 16obj-$(CONFIG_IP_FIB_HASH) += fib_hash.o
16obj-$(CONFIG_IP_FIB_TRIE) += fib_trie.o 17obj-$(CONFIG_IP_FIB_TRIE) += fib_trie.o
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
new file mode 100644
index 000000000000..69623ff4e4c6
--- /dev/null
+++ b/net/ipv4/inet_fragment.c
@@ -0,0 +1,44 @@
1/*
2 * inet fragments management
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Pavel Emelyanov <xemul@openvz.org>
10 * Started as consolidation of ipv4/ip_fragment.c,
11 * ipv6/reassembly. and ipv6 nf conntrack reassembly
12 */
13
14#include <linux/list.h>
15#include <linux/spinlock.h>
16#include <linux/module.h>
17#include <linux/timer.h>
18#include <linux/mm.h>
19
20#include <net/inet_frag.h>
21
22void inet_frags_init(struct inet_frags *f)
23{
24 int i;
25
26 for (i = 0; i < INETFRAGS_HASHSZ; i++)
27 INIT_HLIST_HEAD(&f->hash[i]);
28
29 INIT_LIST_HEAD(&f->lru_list);
30 rwlock_init(&f->lock);
31
32 f->rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^
33 (jiffies ^ (jiffies >> 6)));
34
35 f->nqueues = 0;
36 atomic_set(&f->mem, 0);
37
38}
39EXPORT_SYMBOL(inet_frags_init);
40
41void inet_frags_fini(struct inet_frags *f)
42{
43}
44EXPORT_SYMBOL(inet_frags_fini);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 428eaa502ec2..321e694b72e8 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -87,16 +87,17 @@ struct ipq {
87 struct inet_peer *peer; 87 struct inet_peer *peer;
88}; 88};
89 89
90/* Hash table. */ 90static struct inet_frags ip4_frags;
91 91
92#define IPQ_HASHSZ 64 92int ip_frag_nqueues(void)
93{
94 return ip4_frags.nqueues;
95}
93 96
94/* Per-bucket lock is easy to add now. */ 97int ip_frag_mem(void)
95static struct hlist_head ipq_hash[IPQ_HASHSZ]; 98{
96static DEFINE_RWLOCK(ipfrag_lock); 99 return atomic_read(&ip4_frags.mem);
97static u32 ipfrag_hash_rnd; 100}
98static LIST_HEAD(ipq_lru_list);
99int ip_frag_nqueues = 0;
100 101
101static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, 102static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
102 struct net_device *dev); 103 struct net_device *dev);
@@ -105,24 +106,23 @@ static __inline__ void __ipq_unlink(struct ipq *qp)
105{ 106{
106 hlist_del(&qp->q.list); 107 hlist_del(&qp->q.list);
107 list_del(&qp->q.lru_list); 108 list_del(&qp->q.lru_list);
108 ip_frag_nqueues--; 109 ip4_frags.nqueues--;
109} 110}
110 111
111static __inline__ void ipq_unlink(struct ipq *ipq) 112static __inline__ void ipq_unlink(struct ipq *ipq)
112{ 113{
113 write_lock(&ipfrag_lock); 114 write_lock(&ip4_frags.lock);
114 __ipq_unlink(ipq); 115 __ipq_unlink(ipq);
115 write_unlock(&ipfrag_lock); 116 write_unlock(&ip4_frags.lock);
116} 117}
117 118
118static unsigned int ipqhashfn(__be16 id, __be32 saddr, __be32 daddr, u8 prot) 119static unsigned int ipqhashfn(__be16 id, __be32 saddr, __be32 daddr, u8 prot)
119{ 120{
120 return jhash_3words((__force u32)id << 16 | prot, 121 return jhash_3words((__force u32)id << 16 | prot,
121 (__force u32)saddr, (__force u32)daddr, 122 (__force u32)saddr, (__force u32)daddr,
122 ipfrag_hash_rnd) & (IPQ_HASHSZ - 1); 123 ip4_frags.rnd) & (INETFRAGS_HASHSZ - 1);
123} 124}
124 125
125static struct timer_list ipfrag_secret_timer;
126int sysctl_ipfrag_secret_interval __read_mostly = 10 * 60 * HZ; 126int sysctl_ipfrag_secret_interval __read_mostly = 10 * 60 * HZ;
127 127
128static void ipfrag_secret_rebuild(unsigned long dummy) 128static void ipfrag_secret_rebuild(unsigned long dummy)
@@ -130,13 +130,13 @@ static void ipfrag_secret_rebuild(unsigned long dummy)
130 unsigned long now = jiffies; 130 unsigned long now = jiffies;
131 int i; 131 int i;
132 132
133 write_lock(&ipfrag_lock); 133 write_lock(&ip4_frags.lock);
134 get_random_bytes(&ipfrag_hash_rnd, sizeof(u32)); 134 get_random_bytes(&ip4_frags.rnd, sizeof(u32));
135 for (i = 0; i < IPQ_HASHSZ; i++) { 135 for (i = 0; i < INETFRAGS_HASHSZ; i++) {
136 struct ipq *q; 136 struct ipq *q;
137 struct hlist_node *p, *n; 137 struct hlist_node *p, *n;
138 138
139 hlist_for_each_entry_safe(q, p, n, &ipq_hash[i], q.list) { 139 hlist_for_each_entry_safe(q, p, n, &ip4_frags.hash[i], q.list) {
140 unsigned int hval = ipqhashfn(q->id, q->saddr, 140 unsigned int hval = ipqhashfn(q->id, q->saddr,
141 q->daddr, q->protocol); 141 q->daddr, q->protocol);
142 142
@@ -144,23 +144,21 @@ static void ipfrag_secret_rebuild(unsigned long dummy)
144 hlist_del(&q->q.list); 144 hlist_del(&q->q.list);
145 145
146 /* Relink to new hash chain. */ 146 /* Relink to new hash chain. */
147 hlist_add_head(&q->q.list, &ipq_hash[hval]); 147 hlist_add_head(&q->q.list, &ip4_frags.hash[hval]);
148 } 148 }
149 } 149 }
150 } 150 }
151 write_unlock(&ipfrag_lock); 151 write_unlock(&ip4_frags.lock);
152 152
153 mod_timer(&ipfrag_secret_timer, now + sysctl_ipfrag_secret_interval); 153 mod_timer(&ip4_frags.secret_timer, now + sysctl_ipfrag_secret_interval);
154} 154}
155 155
156atomic_t ip_frag_mem = ATOMIC_INIT(0); /* Memory used for fragments */
157
158/* Memory Tracking Functions. */ 156/* Memory Tracking Functions. */
159static __inline__ void frag_kfree_skb(struct sk_buff *skb, int *work) 157static __inline__ void frag_kfree_skb(struct sk_buff *skb, int *work)
160{ 158{
161 if (work) 159 if (work)
162 *work -= skb->truesize; 160 *work -= skb->truesize;
163 atomic_sub(skb->truesize, &ip_frag_mem); 161 atomic_sub(skb->truesize, &ip4_frags.mem);
164 kfree_skb(skb); 162 kfree_skb(skb);
165} 163}
166 164
@@ -168,7 +166,7 @@ static __inline__ void frag_free_queue(struct ipq *qp, int *work)
168{ 166{
169 if (work) 167 if (work)
170 *work -= sizeof(struct ipq); 168 *work -= sizeof(struct ipq);
171 atomic_sub(sizeof(struct ipq), &ip_frag_mem); 169 atomic_sub(sizeof(struct ipq), &ip4_frags.mem);
172 kfree(qp); 170 kfree(qp);
173} 171}
174 172
@@ -178,7 +176,7 @@ static __inline__ struct ipq *frag_alloc_queue(void)
178 176
179 if (!qp) 177 if (!qp)
180 return NULL; 178 return NULL;
181 atomic_add(sizeof(struct ipq), &ip_frag_mem); 179 atomic_add(sizeof(struct ipq), &ip4_frags.mem);
182 return qp; 180 return qp;
183} 181}
184 182
@@ -239,20 +237,20 @@ static void ip_evictor(void)
239 struct list_head *tmp; 237 struct list_head *tmp;
240 int work; 238 int work;
241 239
242 work = atomic_read(&ip_frag_mem) - sysctl_ipfrag_low_thresh; 240 work = atomic_read(&ip4_frags.mem) - sysctl_ipfrag_low_thresh;
243 if (work <= 0) 241 if (work <= 0)
244 return; 242 return;
245 243
246 while (work > 0) { 244 while (work > 0) {
247 read_lock(&ipfrag_lock); 245 read_lock(&ip4_frags.lock);
248 if (list_empty(&ipq_lru_list)) { 246 if (list_empty(&ip4_frags.lru_list)) {
249 read_unlock(&ipfrag_lock); 247 read_unlock(&ip4_frags.lock);
250 return; 248 return;
251 } 249 }
252 tmp = ipq_lru_list.next; 250 tmp = ip4_frags.lru_list.next;
253 qp = list_entry(tmp, struct ipq, q.lru_list); 251 qp = list_entry(tmp, struct ipq, q.lru_list);
254 atomic_inc(&qp->q.refcnt); 252 atomic_inc(&qp->q.refcnt);
255 read_unlock(&ipfrag_lock); 253 read_unlock(&ip4_frags.lock);
256 254
257 spin_lock(&qp->q.lock); 255 spin_lock(&qp->q.lock);
258 if (!(qp->q.last_in&COMPLETE)) 256 if (!(qp->q.last_in&COMPLETE))
@@ -304,7 +302,7 @@ static struct ipq *ip_frag_intern(struct ipq *qp_in)
304#endif 302#endif
305 unsigned int hash; 303 unsigned int hash;
306 304
307 write_lock(&ipfrag_lock); 305 write_lock(&ip4_frags.lock);
308 hash = ipqhashfn(qp_in->id, qp_in->saddr, qp_in->daddr, 306 hash = ipqhashfn(qp_in->id, qp_in->saddr, qp_in->daddr,
309 qp_in->protocol); 307 qp_in->protocol);
310#ifdef CONFIG_SMP 308#ifdef CONFIG_SMP
@@ -312,14 +310,14 @@ static struct ipq *ip_frag_intern(struct ipq *qp_in)
312 * such entry could be created on other cpu, while we 310 * such entry could be created on other cpu, while we
313 * promoted read lock to write lock. 311 * promoted read lock to write lock.
314 */ 312 */
315 hlist_for_each_entry(qp, n, &ipq_hash[hash], q.list) { 313 hlist_for_each_entry(qp, n, &ip4_frags.hash[hash], q.list) {
316 if (qp->id == qp_in->id && 314 if (qp->id == qp_in->id &&
317 qp->saddr == qp_in->saddr && 315 qp->saddr == qp_in->saddr &&
318 qp->daddr == qp_in->daddr && 316 qp->daddr == qp_in->daddr &&
319 qp->protocol == qp_in->protocol && 317 qp->protocol == qp_in->protocol &&
320 qp->user == qp_in->user) { 318 qp->user == qp_in->user) {
321 atomic_inc(&qp->q.refcnt); 319 atomic_inc(&qp->q.refcnt);
322 write_unlock(&ipfrag_lock); 320 write_unlock(&ip4_frags.lock);
323 qp_in->q.last_in |= COMPLETE; 321 qp_in->q.last_in |= COMPLETE;
324 ipq_put(qp_in, NULL); 322 ipq_put(qp_in, NULL);
325 return qp; 323 return qp;
@@ -332,11 +330,11 @@ static struct ipq *ip_frag_intern(struct ipq *qp_in)
332 atomic_inc(&qp->q.refcnt); 330 atomic_inc(&qp->q.refcnt);
333 331
334 atomic_inc(&qp->q.refcnt); 332 atomic_inc(&qp->q.refcnt);
335 hlist_add_head(&qp->q.list, &ipq_hash[hash]); 333 hlist_add_head(&qp->q.list, &ip4_frags.hash[hash]);
336 INIT_LIST_HEAD(&qp->q.lru_list); 334 INIT_LIST_HEAD(&qp->q.lru_list);
337 list_add_tail(&qp->q.lru_list, &ipq_lru_list); 335 list_add_tail(&qp->q.lru_list, &ip4_frags.lru_list);
338 ip_frag_nqueues++; 336 ip4_frags.nqueues++;
339 write_unlock(&ipfrag_lock); 337 write_unlock(&ip4_frags.lock);
340 return qp; 338 return qp;
341} 339}
342 340
@@ -387,20 +385,20 @@ static inline struct ipq *ip_find(struct iphdr *iph, u32 user)
387 struct ipq *qp; 385 struct ipq *qp;
388 struct hlist_node *n; 386 struct hlist_node *n;
389 387
390 read_lock(&ipfrag_lock); 388 read_lock(&ip4_frags.lock);
391 hash = ipqhashfn(id, saddr, daddr, protocol); 389 hash = ipqhashfn(id, saddr, daddr, protocol);
392 hlist_for_each_entry(qp, n, &ipq_hash[hash], q.list) { 390 hlist_for_each_entry(qp, n, &ip4_frags.hash[hash], q.list) {
393 if (qp->id == id && 391 if (qp->id == id &&
394 qp->saddr == saddr && 392 qp->saddr == saddr &&
395 qp->daddr == daddr && 393 qp->daddr == daddr &&
396 qp->protocol == protocol && 394 qp->protocol == protocol &&
397 qp->user == user) { 395 qp->user == user) {
398 atomic_inc(&qp->q.refcnt); 396 atomic_inc(&qp->q.refcnt);
399 read_unlock(&ipfrag_lock); 397 read_unlock(&ip4_frags.lock);
400 return qp; 398 return qp;
401 } 399 }
402 } 400 }
403 read_unlock(&ipfrag_lock); 401 read_unlock(&ip4_frags.lock);
404 402
405 return ip_frag_create(iph, user); 403 return ip_frag_create(iph, user);
406} 404}
@@ -599,16 +597,16 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
599 } 597 }
600 qp->q.stamp = skb->tstamp; 598 qp->q.stamp = skb->tstamp;
601 qp->q.meat += skb->len; 599 qp->q.meat += skb->len;
602 atomic_add(skb->truesize, &ip_frag_mem); 600 atomic_add(skb->truesize, &ip4_frags.mem);
603 if (offset == 0) 601 if (offset == 0)
604 qp->q.last_in |= FIRST_IN; 602 qp->q.last_in |= FIRST_IN;
605 603
606 if (qp->q.last_in == (FIRST_IN | LAST_IN) && qp->q.meat == qp->q.len) 604 if (qp->q.last_in == (FIRST_IN | LAST_IN) && qp->q.meat == qp->q.len)
607 return ip_frag_reasm(qp, prev, dev); 605 return ip_frag_reasm(qp, prev, dev);
608 606
609 write_lock(&ipfrag_lock); 607 write_lock(&ip4_frags.lock);
610 list_move_tail(&qp->q.lru_list, &ipq_lru_list); 608 list_move_tail(&qp->q.lru_list, &ip4_frags.lru_list);
611 write_unlock(&ipfrag_lock); 609 write_unlock(&ip4_frags.lock);
612 return -EINPROGRESS; 610 return -EINPROGRESS;
613 611
614err: 612err:
@@ -684,12 +682,12 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
684 head->len -= clone->len; 682 head->len -= clone->len;
685 clone->csum = 0; 683 clone->csum = 0;
686 clone->ip_summed = head->ip_summed; 684 clone->ip_summed = head->ip_summed;
687 atomic_add(clone->truesize, &ip_frag_mem); 685 atomic_add(clone->truesize, &ip4_frags.mem);
688 } 686 }
689 687
690 skb_shinfo(head)->frag_list = head->next; 688 skb_shinfo(head)->frag_list = head->next;
691 skb_push(head, head->data - skb_network_header(head)); 689 skb_push(head, head->data - skb_network_header(head));
692 atomic_sub(head->truesize, &ip_frag_mem); 690 atomic_sub(head->truesize, &ip4_frags.mem);
693 691
694 for (fp=head->next; fp; fp = fp->next) { 692 for (fp=head->next; fp; fp = fp->next) {
695 head->data_len += fp->len; 693 head->data_len += fp->len;
@@ -699,7 +697,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
699 else if (head->ip_summed == CHECKSUM_COMPLETE) 697 else if (head->ip_summed == CHECKSUM_COMPLETE)
700 head->csum = csum_add(head->csum, fp->csum); 698 head->csum = csum_add(head->csum, fp->csum);
701 head->truesize += fp->truesize; 699 head->truesize += fp->truesize;
702 atomic_sub(fp->truesize, &ip_frag_mem); 700 atomic_sub(fp->truesize, &ip4_frags.mem);
703 } 701 }
704 702
705 head->next = NULL; 703 head->next = NULL;
@@ -735,7 +733,7 @@ int ip_defrag(struct sk_buff *skb, u32 user)
735 IP_INC_STATS_BH(IPSTATS_MIB_REASMREQDS); 733 IP_INC_STATS_BH(IPSTATS_MIB_REASMREQDS);
736 734
737 /* Start by cleaning up the memory. */ 735 /* Start by cleaning up the memory. */
738 if (atomic_read(&ip_frag_mem) > sysctl_ipfrag_high_thresh) 736 if (atomic_read(&ip4_frags.mem) > sysctl_ipfrag_high_thresh)
739 ip_evictor(); 737 ip_evictor();
740 738
741 /* Lookup (or create) queue header */ 739 /* Lookup (or create) queue header */
@@ -758,13 +756,12 @@ int ip_defrag(struct sk_buff *skb, u32 user)
758 756
759void __init ipfrag_init(void) 757void __init ipfrag_init(void)
760{ 758{
761 ipfrag_hash_rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^ 759 init_timer(&ip4_frags.secret_timer);
762 (jiffies ^ (jiffies >> 6))); 760 ip4_frags.secret_timer.function = ipfrag_secret_rebuild;
761 ip4_frags.secret_timer.expires = jiffies + sysctl_ipfrag_secret_interval;
762 add_timer(&ip4_frags.secret_timer);
763 763
764 init_timer(&ipfrag_secret_timer); 764 inet_frags_init(&ip4_frags);
765 ipfrag_secret_timer.function = ipfrag_secret_rebuild;
766 ipfrag_secret_timer.expires = jiffies + sysctl_ipfrag_secret_interval;
767 add_timer(&ipfrag_secret_timer);
768} 765}
769 766
770EXPORT_SYMBOL(ip_defrag); 767EXPORT_SYMBOL(ip_defrag);
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index e5b05b039101..fd16cb8f8abe 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -70,8 +70,8 @@ static int sockstat_seq_show(struct seq_file *seq, void *v)
70 seq_printf(seq, "UDP: inuse %d\n", fold_prot_inuse(&udp_prot)); 70 seq_printf(seq, "UDP: inuse %d\n", fold_prot_inuse(&udp_prot));
71 seq_printf(seq, "UDPLITE: inuse %d\n", fold_prot_inuse(&udplite_prot)); 71 seq_printf(seq, "UDPLITE: inuse %d\n", fold_prot_inuse(&udplite_prot));
72 seq_printf(seq, "RAW: inuse %d\n", fold_prot_inuse(&raw_prot)); 72 seq_printf(seq, "RAW: inuse %d\n", fold_prot_inuse(&raw_prot));
73 seq_printf(seq, "FRAG: inuse %d memory %d\n", ip_frag_nqueues, 73 seq_printf(seq, "FRAG: inuse %d memory %d\n",
74 atomic_read(&ip_frag_mem)); 74 ip_frag_nqueues(), ip_frag_mem());
75 return 0; 75 return 0;
76} 76}
77 77