aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv6/addrlabel.c
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2017-09-19 19:27:06 -0400
committerDavid S. Miller <davem@davemloft.net>2017-09-19 19:32:23 -0400
commita90c9347e90ed1e9323d71402ed18023bc910cd8 (patch)
tree2b5eeeed2992686abdeb1121705aada9a5c44aff /net/ipv6/addrlabel.c
parentd464e84eed02993d40ad55fdc19f4523e4deee5b (diff)
ipv6: addrlabel: per netns list
Having a global list of labels do not scale to thousands of netns in the cloud era. This causes quadratic behavior on netns creation and deletion. This is time having a per netns list of ~10 labels. Tested: $ time perf record (for f in `seq 1 3000` ; do ip netns add tast$f; done) [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 3.637 MB perf.data (~158898 samples) ] real 0m20.837s # instead of 0m24.227s user 0m0.328s sys 0m20.338s # instead of 0m23.753s 16.17% ip [kernel.kallsyms] [k] netlink_broadcast_filtered 12.30% ip [kernel.kallsyms] [k] netlink_has_listeners 6.76% ip [kernel.kallsyms] [k] _raw_spin_lock_irqsave 5.78% ip [kernel.kallsyms] [k] memset_erms 5.77% ip [kernel.kallsyms] [k] kobject_uevent_env 5.18% ip [kernel.kallsyms] [k] refcount_sub_and_test 4.96% ip [kernel.kallsyms] [k] _raw_read_lock 3.82% ip [kernel.kallsyms] [k] refcount_inc_not_zero 3.33% ip [kernel.kallsyms] [k] _raw_spin_unlock_irqrestore 2.11% ip [kernel.kallsyms] [k] unmap_page_range 1.77% ip [kernel.kallsyms] [k] __wake_up 1.69% ip [kernel.kallsyms] [k] strlen 1.17% ip [kernel.kallsyms] [k] __wake_up_common 1.09% ip [kernel.kallsyms] [k] insert_header 1.04% ip [kernel.kallsyms] [k] page_remove_rmap 1.01% ip [kernel.kallsyms] [k] consume_skb 0.98% ip [kernel.kallsyms] [k] netlink_trim 0.51% ip [kernel.kallsyms] [k] kernfs_link_sibling 0.51% ip [kernel.kallsyms] [k] filemap_map_pages 0.46% ip [kernel.kallsyms] [k] memcpy_erms Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv6/addrlabel.c')
-rw-r--r--net/ipv6/addrlabel.c81
1 files changed, 30 insertions, 51 deletions
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index b055bc79f56d..c6311d7108f6 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -30,7 +30,6 @@
30 * Policy Table 30 * Policy Table
31 */ 31 */
32struct ip6addrlbl_entry { 32struct ip6addrlbl_entry {
33 possible_net_t lbl_net;
34 struct in6_addr prefix; 33 struct in6_addr prefix;
35 int prefixlen; 34 int prefixlen;
36 int ifindex; 35 int ifindex;
@@ -41,19 +40,6 @@ struct ip6addrlbl_entry {
41 struct rcu_head rcu; 40 struct rcu_head rcu;
42}; 41};
43 42
44static struct ip6addrlbl_table
45{
46 struct hlist_head head;
47 spinlock_t lock;
48 u32 seq;
49} ip6addrlbl_table;
50
51static inline
52struct net *ip6addrlbl_net(const struct ip6addrlbl_entry *lbl)
53{
54 return read_pnet(&lbl->lbl_net);
55}
56
57/* 43/*
58 * Default policy table (RFC6724 + extensions) 44 * Default policy table (RFC6724 + extensions)
59 * 45 *
@@ -148,13 +134,10 @@ static inline void ip6addrlbl_put(struct ip6addrlbl_entry *p)
148} 134}
149 135
150/* Find label */ 136/* Find label */
151static bool __ip6addrlbl_match(struct net *net, 137static bool __ip6addrlbl_match(const struct ip6addrlbl_entry *p,
152 const struct ip6addrlbl_entry *p,
153 const struct in6_addr *addr, 138 const struct in6_addr *addr,
154 int addrtype, int ifindex) 139 int addrtype, int ifindex)
155{ 140{
156 if (!net_eq(ip6addrlbl_net(p), net))
157 return false;
158 if (p->ifindex && p->ifindex != ifindex) 141 if (p->ifindex && p->ifindex != ifindex)
159 return false; 142 return false;
160 if (p->addrtype && p->addrtype != addrtype) 143 if (p->addrtype && p->addrtype != addrtype)
@@ -169,8 +152,9 @@ static struct ip6addrlbl_entry *__ipv6_addr_label(struct net *net,
169 int type, int ifindex) 152 int type, int ifindex)
170{ 153{
171 struct ip6addrlbl_entry *p; 154 struct ip6addrlbl_entry *p;
172 hlist_for_each_entry_rcu(p, &ip6addrlbl_table.head, list) { 155
173 if (__ip6addrlbl_match(net, p, addr, type, ifindex)) 156 hlist_for_each_entry_rcu(p, &net->ipv6.ip6addrlbl_table.head, list) {
157 if (__ip6addrlbl_match(p, addr, type, ifindex))
174 return p; 158 return p;
175 } 159 }
176 return NULL; 160 return NULL;
@@ -196,8 +180,7 @@ u32 ipv6_addr_label(struct net *net,
196} 180}
197 181
198/* allocate one entry */ 182/* allocate one entry */
199static struct ip6addrlbl_entry *ip6addrlbl_alloc(struct net *net, 183static struct ip6addrlbl_entry *ip6addrlbl_alloc(const struct in6_addr *prefix,
200 const struct in6_addr *prefix,
201 int prefixlen, int ifindex, 184 int prefixlen, int ifindex,
202 u32 label) 185 u32 label)
203{ 186{
@@ -236,24 +219,23 @@ static struct ip6addrlbl_entry *ip6addrlbl_alloc(struct net *net,
236 newp->addrtype = addrtype; 219 newp->addrtype = addrtype;
237 newp->label = label; 220 newp->label = label;
238 INIT_HLIST_NODE(&newp->list); 221 INIT_HLIST_NODE(&newp->list);
239 write_pnet(&newp->lbl_net, net);
240 refcount_set(&newp->refcnt, 1); 222 refcount_set(&newp->refcnt, 1);
241 return newp; 223 return newp;
242} 224}
243 225
244/* add a label */ 226/* add a label */
245static int __ip6addrlbl_add(struct ip6addrlbl_entry *newp, int replace) 227static int __ip6addrlbl_add(struct net *net, struct ip6addrlbl_entry *newp,
228 int replace)
246{ 229{
247 struct hlist_node *n;
248 struct ip6addrlbl_entry *last = NULL, *p = NULL; 230 struct ip6addrlbl_entry *last = NULL, *p = NULL;
231 struct hlist_node *n;
249 int ret = 0; 232 int ret = 0;
250 233
251 ADDRLABEL(KERN_DEBUG "%s(newp=%p, replace=%d)\n", __func__, newp, 234 ADDRLABEL(KERN_DEBUG "%s(newp=%p, replace=%d)\n", __func__, newp,
252 replace); 235 replace);
253 236
254 hlist_for_each_entry_safe(p, n, &ip6addrlbl_table.head, list) { 237 hlist_for_each_entry_safe(p, n, &net->ipv6.ip6addrlbl_table.head, list) {
255 if (p->prefixlen == newp->prefixlen && 238 if (p->prefixlen == newp->prefixlen &&
256 net_eq(ip6addrlbl_net(p), ip6addrlbl_net(newp)) &&
257 p->ifindex == newp->ifindex && 239 p->ifindex == newp->ifindex &&
258 ipv6_addr_equal(&p->prefix, &newp->prefix)) { 240 ipv6_addr_equal(&p->prefix, &newp->prefix)) {
259 if (!replace) { 241 if (!replace) {
@@ -273,10 +255,10 @@ static int __ip6addrlbl_add(struct ip6addrlbl_entry *newp, int replace)
273 if (last) 255 if (last)
274 hlist_add_behind_rcu(&newp->list, &last->list); 256 hlist_add_behind_rcu(&newp->list, &last->list);
275 else 257 else
276 hlist_add_head_rcu(&newp->list, &ip6addrlbl_table.head); 258 hlist_add_head_rcu(&newp->list, &net->ipv6.ip6addrlbl_table.head);
277out: 259out:
278 if (!ret) 260 if (!ret)
279 ip6addrlbl_table.seq++; 261 net->ipv6.ip6addrlbl_table.seq++;
280 return ret; 262 return ret;
281} 263}
282 264
@@ -292,12 +274,12 @@ static int ip6addrlbl_add(struct net *net,
292 __func__, prefix, prefixlen, ifindex, (unsigned int)label, 274 __func__, prefix, prefixlen, ifindex, (unsigned int)label,
293 replace); 275 replace);
294 276
295 newp = ip6addrlbl_alloc(net, prefix, prefixlen, ifindex, label); 277 newp = ip6addrlbl_alloc(prefix, prefixlen, ifindex, label);
296 if (IS_ERR(newp)) 278 if (IS_ERR(newp))
297 return PTR_ERR(newp); 279 return PTR_ERR(newp);
298 spin_lock(&ip6addrlbl_table.lock); 280 spin_lock(&net->ipv6.ip6addrlbl_table.lock);
299 ret = __ip6addrlbl_add(newp, replace); 281 ret = __ip6addrlbl_add(net, newp, replace);
300 spin_unlock(&ip6addrlbl_table.lock); 282 spin_unlock(&net->ipv6.ip6addrlbl_table.lock);
301 if (ret) 283 if (ret)
302 ip6addrlbl_free(newp); 284 ip6addrlbl_free(newp);
303 return ret; 285 return ret;
@@ -315,9 +297,8 @@ static int __ip6addrlbl_del(struct net *net,
315 ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d)\n", 297 ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d)\n",
316 __func__, prefix, prefixlen, ifindex); 298 __func__, prefix, prefixlen, ifindex);
317 299
318 hlist_for_each_entry_safe(p, n, &ip6addrlbl_table.head, list) { 300 hlist_for_each_entry_safe(p, n, &net->ipv6.ip6addrlbl_table.head, list) {
319 if (p->prefixlen == prefixlen && 301 if (p->prefixlen == prefixlen &&
320 net_eq(ip6addrlbl_net(p), net) &&
321 p->ifindex == ifindex && 302 p->ifindex == ifindex &&
322 ipv6_addr_equal(&p->prefix, prefix)) { 303 ipv6_addr_equal(&p->prefix, prefix)) {
323 hlist_del_rcu(&p->list); 304 hlist_del_rcu(&p->list);
@@ -340,9 +321,9 @@ static int ip6addrlbl_del(struct net *net,
340 __func__, prefix, prefixlen, ifindex); 321 __func__, prefix, prefixlen, ifindex);
341 322
342 ipv6_addr_prefix(&prefix_buf, prefix, prefixlen); 323 ipv6_addr_prefix(&prefix_buf, prefix, prefixlen);
343 spin_lock(&ip6addrlbl_table.lock); 324 spin_lock(&net->ipv6.ip6addrlbl_table.lock);
344 ret = __ip6addrlbl_del(net, &prefix_buf, prefixlen, ifindex); 325 ret = __ip6addrlbl_del(net, &prefix_buf, prefixlen, ifindex);
345 spin_unlock(&ip6addrlbl_table.lock); 326 spin_unlock(&net->ipv6.ip6addrlbl_table.lock);
346 return ret; 327 return ret;
347} 328}
348 329
@@ -354,6 +335,9 @@ static int __net_init ip6addrlbl_net_init(struct net *net)
354 335
355 ADDRLABEL(KERN_DEBUG "%s\n", __func__); 336 ADDRLABEL(KERN_DEBUG "%s\n", __func__);
356 337
338 spin_lock_init(&net->ipv6.ip6addrlbl_table.lock);
339 INIT_HLIST_HEAD(&net->ipv6.ip6addrlbl_table.head);
340
357 for (i = 0; i < ARRAY_SIZE(ip6addrlbl_init_table); i++) { 341 for (i = 0; i < ARRAY_SIZE(ip6addrlbl_init_table); i++) {
358 int ret = ip6addrlbl_add(net, 342 int ret = ip6addrlbl_add(net,
359 ip6addrlbl_init_table[i].prefix, 343 ip6addrlbl_init_table[i].prefix,
@@ -373,14 +357,12 @@ static void __net_exit ip6addrlbl_net_exit(struct net *net)
373 struct hlist_node *n; 357 struct hlist_node *n;
374 358
375 /* Remove all labels belonging to the exiting net */ 359 /* Remove all labels belonging to the exiting net */
376 spin_lock(&ip6addrlbl_table.lock); 360 spin_lock(&net->ipv6.ip6addrlbl_table.lock);
377 hlist_for_each_entry_safe(p, n, &ip6addrlbl_table.head, list) { 361 hlist_for_each_entry_safe(p, n, &net->ipv6.ip6addrlbl_table.head, list) {
378 if (net_eq(ip6addrlbl_net(p), net)) { 362 hlist_del_rcu(&p->list);
379 hlist_del_rcu(&p->list); 363 ip6addrlbl_put(p);
380 ip6addrlbl_put(p);
381 }
382 } 364 }
383 spin_unlock(&ip6addrlbl_table.lock); 365 spin_unlock(&net->ipv6.ip6addrlbl_table.lock);
384} 366}
385 367
386static struct pernet_operations ipv6_addr_label_ops = { 368static struct pernet_operations ipv6_addr_label_ops = {
@@ -390,8 +372,6 @@ static struct pernet_operations ipv6_addr_label_ops = {
390 372
391int __init ipv6_addr_label_init(void) 373int __init ipv6_addr_label_init(void)
392{ 374{
393 spin_lock_init(&ip6addrlbl_table.lock);
394
395 return register_pernet_subsys(&ipv6_addr_label_ops); 375 return register_pernet_subsys(&ipv6_addr_label_ops);
396} 376}
397 377
@@ -510,11 +490,10 @@ static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb)
510 int err; 490 int err;
511 491
512 rcu_read_lock(); 492 rcu_read_lock();
513 hlist_for_each_entry_rcu(p, &ip6addrlbl_table.head, list) { 493 hlist_for_each_entry_rcu(p, &net->ipv6.ip6addrlbl_table.head, list) {
514 if (idx >= s_idx && 494 if (idx >= s_idx) {
515 net_eq(ip6addrlbl_net(p), net)) {
516 err = ip6addrlbl_fill(skb, p, 495 err = ip6addrlbl_fill(skb, p,
517 ip6addrlbl_table.seq, 496 net->ipv6.ip6addrlbl_table.seq,
518 NETLINK_CB(cb->skb).portid, 497 NETLINK_CB(cb->skb).portid,
519 cb->nlh->nlmsg_seq, 498 cb->nlh->nlmsg_seq,
520 RTM_NEWADDRLABEL, 499 RTM_NEWADDRLABEL,
@@ -571,7 +550,7 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr *nlh,
571 p = __ipv6_addr_label(net, addr, ipv6_addr_type(addr), ifal->ifal_index); 550 p = __ipv6_addr_label(net, addr, ipv6_addr_type(addr), ifal->ifal_index);
572 if (p && !ip6addrlbl_hold(p)) 551 if (p && !ip6addrlbl_hold(p))
573 p = NULL; 552 p = NULL;
574 lseq = ip6addrlbl_table.seq; 553 lseq = net->ipv6.ip6addrlbl_table.seq;
575 rcu_read_unlock(); 554 rcu_read_unlock();
576 555
577 if (!p) { 556 if (!p) {