aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
authorEric Dumazet <dada1@cosmosbay.com>2009-03-25 16:05:46 -0400
committerPatrick McHardy <kaber@trash.net>2009-03-25 16:05:46 -0400
commitea781f197d6a835cbb93a0bf88ee1696296ed8aa (patch)
tree820fe7df1199d8bb6c793e664e480ea56ecf612e /net/ipv4
parent1f9352ae2253a97b07b34dcf16ffa3b4ca12c558 (diff)
netfilter: nf_conntrack: use SLAB_DESTROY_BY_RCU and get rid of call_rcu()
Use "hlist_nulls" infrastructure we added in 2.6.29 for RCUification of UDP & TCP. This permits an easy conversion from call_rcu() based hash lists to a SLAB_DESTROY_BY_RCU one. Avoiding call_rcu() delay at nf_conn freeing time has numerous gains. First, it doesnt fill RCU queues (up to 10000 elements per cpu). This reduces OOM possibility, if queued elements are not taken into account This reduces latency problems when RCU queue size hits hilimit and triggers emergency mode. - It allows fast reuse of just freed elements, permitting better use of CPU cache. - We delete rcu_head from "struct nf_conn", shrinking size of this structure by 8 or 16 bytes. This patch only takes care of "struct nf_conn". call_rcu() is still used for less critical conntrack parts, that may be converted later if necessary. Signed-off-by: Eric Dumazet <dada1@cosmosbay.com> Signed-off-by: Patrick McHardy <kaber@trash.net>
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c63
-rw-r--r--net/ipv4/netfilter/nf_nat_core.c2
2 files changed, 37 insertions, 28 deletions
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
index 6ba5c557690c..8668a3defda6 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
@@ -25,40 +25,42 @@ struct ct_iter_state {
25 unsigned int bucket; 25 unsigned int bucket;
26}; 26};
27 27
28static struct hlist_node *ct_get_first(struct seq_file *seq) 28static struct hlist_nulls_node *ct_get_first(struct seq_file *seq)
29{ 29{
30 struct net *net = seq_file_net(seq); 30 struct net *net = seq_file_net(seq);
31 struct ct_iter_state *st = seq->private; 31 struct ct_iter_state *st = seq->private;
32 struct hlist_node *n; 32 struct hlist_nulls_node *n;
33 33
34 for (st->bucket = 0; 34 for (st->bucket = 0;
35 st->bucket < nf_conntrack_htable_size; 35 st->bucket < nf_conntrack_htable_size;
36 st->bucket++) { 36 st->bucket++) {
37 n = rcu_dereference(net->ct.hash[st->bucket].first); 37 n = rcu_dereference(net->ct.hash[st->bucket].first);
38 if (n) 38 if (!is_a_nulls(n))
39 return n; 39 return n;
40 } 40 }
41 return NULL; 41 return NULL;
42} 42}
43 43
44static struct hlist_node *ct_get_next(struct seq_file *seq, 44static struct hlist_nulls_node *ct_get_next(struct seq_file *seq,
45 struct hlist_node *head) 45 struct hlist_nulls_node *head)
46{ 46{
47 struct net *net = seq_file_net(seq); 47 struct net *net = seq_file_net(seq);
48 struct ct_iter_state *st = seq->private; 48 struct ct_iter_state *st = seq->private;
49 49
50 head = rcu_dereference(head->next); 50 head = rcu_dereference(head->next);
51 while (head == NULL) { 51 while (is_a_nulls(head)) {
52 if (++st->bucket >= nf_conntrack_htable_size) 52 if (likely(get_nulls_value(head) == st->bucket)) {
53 return NULL; 53 if (++st->bucket >= nf_conntrack_htable_size)
54 return NULL;
55 }
54 head = rcu_dereference(net->ct.hash[st->bucket].first); 56 head = rcu_dereference(net->ct.hash[st->bucket].first);
55 } 57 }
56 return head; 58 return head;
57} 59}
58 60
59static struct hlist_node *ct_get_idx(struct seq_file *seq, loff_t pos) 61static struct hlist_nulls_node *ct_get_idx(struct seq_file *seq, loff_t pos)
60{ 62{
61 struct hlist_node *head = ct_get_first(seq); 63 struct hlist_nulls_node *head = ct_get_first(seq);
62 64
63 if (head) 65 if (head)
64 while (pos && (head = ct_get_next(seq, head))) 66 while (pos && (head = ct_get_next(seq, head)))
@@ -87,69 +89,76 @@ static void ct_seq_stop(struct seq_file *s, void *v)
87 89
88static int ct_seq_show(struct seq_file *s, void *v) 90static int ct_seq_show(struct seq_file *s, void *v)
89{ 91{
90 const struct nf_conntrack_tuple_hash *hash = v; 92 struct nf_conntrack_tuple_hash *hash = v;
91 const struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(hash); 93 struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(hash);
92 const struct nf_conntrack_l3proto *l3proto; 94 const struct nf_conntrack_l3proto *l3proto;
93 const struct nf_conntrack_l4proto *l4proto; 95 const struct nf_conntrack_l4proto *l4proto;
96 int ret = 0;
94 97
95 NF_CT_ASSERT(ct); 98 NF_CT_ASSERT(ct);
99 if (unlikely(!atomic_inc_not_zero(&ct->ct_general.use)))
100 return 0;
101
96 102
97 /* we only want to print DIR_ORIGINAL */ 103 /* we only want to print DIR_ORIGINAL */
98 if (NF_CT_DIRECTION(hash)) 104 if (NF_CT_DIRECTION(hash))
99 return 0; 105 goto release;
100 if (nf_ct_l3num(ct) != AF_INET) 106 if (nf_ct_l3num(ct) != AF_INET)
101 return 0; 107 goto release;
102 108
103 l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct)); 109 l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct));
104 NF_CT_ASSERT(l3proto); 110 NF_CT_ASSERT(l3proto);
105 l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct)); 111 l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
106 NF_CT_ASSERT(l4proto); 112 NF_CT_ASSERT(l4proto);
107 113
114 ret = -ENOSPC;
108 if (seq_printf(s, "%-8s %u %ld ", 115 if (seq_printf(s, "%-8s %u %ld ",
109 l4proto->name, nf_ct_protonum(ct), 116 l4proto->name, nf_ct_protonum(ct),
110 timer_pending(&ct->timeout) 117 timer_pending(&ct->timeout)
111 ? (long)(ct->timeout.expires - jiffies)/HZ : 0) != 0) 118 ? (long)(ct->timeout.expires - jiffies)/HZ : 0) != 0)
112 return -ENOSPC; 119 goto release;
113 120
114 if (l4proto->print_conntrack && l4proto->print_conntrack(s, ct)) 121 if (l4proto->print_conntrack && l4proto->print_conntrack(s, ct))
115 return -ENOSPC; 122 goto release;
116 123
117 if (print_tuple(s, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, 124 if (print_tuple(s, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
118 l3proto, l4proto)) 125 l3proto, l4proto))
119 return -ENOSPC; 126 goto release;
120 127
121 if (seq_print_acct(s, ct, IP_CT_DIR_ORIGINAL)) 128 if (seq_print_acct(s, ct, IP_CT_DIR_ORIGINAL))
122 return -ENOSPC; 129 goto release;
123 130
124 if (!(test_bit(IPS_SEEN_REPLY_BIT, &ct->status))) 131 if (!(test_bit(IPS_SEEN_REPLY_BIT, &ct->status)))
125 if (seq_printf(s, "[UNREPLIED] ")) 132 if (seq_printf(s, "[UNREPLIED] "))
126 return -ENOSPC; 133 goto release;
127 134
128 if (print_tuple(s, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, 135 if (print_tuple(s, &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
129 l3proto, l4proto)) 136 l3proto, l4proto))
130 return -ENOSPC; 137 goto release;
131 138
132 if (seq_print_acct(s, ct, IP_CT_DIR_REPLY)) 139 if (seq_print_acct(s, ct, IP_CT_DIR_REPLY))
133 return -ENOSPC; 140 goto release;
134 141
135 if (test_bit(IPS_ASSURED_BIT, &ct->status)) 142 if (test_bit(IPS_ASSURED_BIT, &ct->status))
136 if (seq_printf(s, "[ASSURED] ")) 143 if (seq_printf(s, "[ASSURED] "))
137 return -ENOSPC; 144 goto release;
138 145
139#ifdef CONFIG_NF_CONNTRACK_MARK 146#ifdef CONFIG_NF_CONNTRACK_MARK
140 if (seq_printf(s, "mark=%u ", ct->mark)) 147 if (seq_printf(s, "mark=%u ", ct->mark))
141 return -ENOSPC; 148 goto release;
142#endif 149#endif
143 150
144#ifdef CONFIG_NF_CONNTRACK_SECMARK 151#ifdef CONFIG_NF_CONNTRACK_SECMARK
145 if (seq_printf(s, "secmark=%u ", ct->secmark)) 152 if (seq_printf(s, "secmark=%u ", ct->secmark))
146 return -ENOSPC; 153 goto release;
147#endif 154#endif
148 155
149 if (seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use))) 156 if (seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use)))
150 return -ENOSPC; 157 goto release;
151 158 ret = 0;
152 return 0; 159release:
160 nf_ct_put(ct);
161 return ret;
153} 162}
154 163
155static const struct seq_operations ct_seq_ops = { 164static const struct seq_operations ct_seq_ops = {
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index a65cf692359f..fe65187810f0 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -679,7 +679,7 @@ nfnetlink_parse_nat_setup(struct nf_conn *ct,
679static int __net_init nf_nat_net_init(struct net *net) 679static int __net_init nf_nat_net_init(struct net *net)
680{ 680{
681 net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&nf_nat_htable_size, 681 net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&nf_nat_htable_size,
682 &net->ipv4.nat_vmalloced); 682 &net->ipv4.nat_vmalloced, 0);
683 if (!net->ipv4.nat_bysource) 683 if (!net->ipv4.nat_bysource)
684 return -ENOMEM; 684 return -ENOMEM;
685 return 0; 685 return 0;