aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Ahern <dsa@cumulusnetworks.com>2016-06-08 13:55:39 -0400
committerDavid S. Miller <davem@davemloft.net>2016-06-08 14:36:02 -0400
commit96c63fa7393d0a346acfe5a91e0c7d4c7782641b (patch)
tree0dcc6d3f3e5ccb788347399c9a63059f8ad8b5df
parent6278e03dc667b611cc9ebbd8f455d859f18f8e11 (diff)
net: Add l3mdev rule
Currently, VRFs require 1 oif and 1 iif rule per address family per VRF. As the number of VRF devices increases it brings scalability issues with the increasing rule list. All of the VRF rules have the same format with the exception of the specific table id to direct the lookup. Since the table id is available from the oif or iif in the loopup, the VRF rules can be consolidated to a single rule that pulls the table from the VRF device. This patch introduces a new rule attribute l3mdev. The l3mdev rule means the table id used for the lookup is pulled from the L3 master device (e.g., VRF) rather than being statically defined. With the l3mdev rule all of the basic VRF FIB rules are reduced to 1 l3mdev rule per address family (IPv4 and IPv6). If an admin wishes to insert higher priority rules for specific VRFs those rules will co-exist with the l3mdev rule. This capability means current VRF scripts will co-exist with this new simpler implementation. Currently, the rules list for both ipv4 and ipv6 look like this: $ ip ru ls 1000: from all oif vrf1 lookup 1001 1000: from all iif vrf1 lookup 1001 1000: from all oif vrf2 lookup 1002 1000: from all iif vrf2 lookup 1002 1000: from all oif vrf3 lookup 1003 1000: from all iif vrf3 lookup 1003 1000: from all oif vrf4 lookup 1004 1000: from all iif vrf4 lookup 1004 1000: from all oif vrf5 lookup 1005 1000: from all iif vrf5 lookup 1005 1000: from all oif vrf6 lookup 1006 1000: from all iif vrf6 lookup 1006 1000: from all oif vrf7 lookup 1007 1000: from all iif vrf7 lookup 1007 1000: from all oif vrf8 lookup 1008 1000: from all iif vrf8 lookup 1008 ... 32765: from all lookup local 32766: from all lookup main 32767: from all lookup default With the l3mdev rule the list is just the following regardless of the number of VRFs: $ ip ru ls 1000: from all lookup [l3mdev table] 32765: from all lookup local 32766: from all lookup main 32767: from all lookup default (Note: the above pretty print of the rule is based on an iproute2 prototype. Actual verbage may change) Signed-off-by: David Ahern <dsa@cumulusnetworks.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/net/fib_rules.h24
-rw-r--r--include/net/l3mdev.h12
-rw-r--r--include/uapi/linux/fib_rules.h1
-rw-r--r--net/core/fib_rules.c33
-rw-r--r--net/ipv4/fib_rules.c6
-rw-r--r--net/ipv6/fib6_rules.c6
-rw-r--r--net/l3mdev/l3mdev.c38
7 files changed, 109 insertions, 11 deletions
diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h
index 59160de702b6..456e4a6006ab 100644
--- a/include/net/fib_rules.h
+++ b/include/net/fib_rules.h
@@ -17,7 +17,8 @@ struct fib_rule {
17 u32 flags; 17 u32 flags;
18 u32 table; 18 u32 table;
19 u8 action; 19 u8 action;
20 /* 3 bytes hole, try to use */ 20 u8 l3mdev;
21 /* 2 bytes hole, try to use */
21 u32 target; 22 u32 target;
22 __be64 tun_id; 23 __be64 tun_id;
23 struct fib_rule __rcu *ctarget; 24 struct fib_rule __rcu *ctarget;
@@ -36,6 +37,7 @@ struct fib_lookup_arg {
36 void *lookup_ptr; 37 void *lookup_ptr;
37 void *result; 38 void *result;
38 struct fib_rule *rule; 39 struct fib_rule *rule;
40 u32 table;
39 int flags; 41 int flags;
40#define FIB_LOOKUP_NOREF 1 42#define FIB_LOOKUP_NOREF 1
41#define FIB_LOOKUP_IGNORE_LINKSTATE 2 43#define FIB_LOOKUP_IGNORE_LINKSTATE 2
@@ -89,7 +91,8 @@ struct fib_rules_ops {
89 [FRA_TABLE] = { .type = NLA_U32 }, \ 91 [FRA_TABLE] = { .type = NLA_U32 }, \
90 [FRA_SUPPRESS_PREFIXLEN] = { .type = NLA_U32 }, \ 92 [FRA_SUPPRESS_PREFIXLEN] = { .type = NLA_U32 }, \
91 [FRA_SUPPRESS_IFGROUP] = { .type = NLA_U32 }, \ 93 [FRA_SUPPRESS_IFGROUP] = { .type = NLA_U32 }, \
92 [FRA_GOTO] = { .type = NLA_U32 } 94 [FRA_GOTO] = { .type = NLA_U32 }, \
95 [FRA_L3MDEV] = { .type = NLA_U8 }
93 96
94static inline void fib_rule_get(struct fib_rule *rule) 97static inline void fib_rule_get(struct fib_rule *rule)
95{ 98{
@@ -102,6 +105,20 @@ static inline void fib_rule_put(struct fib_rule *rule)
102 kfree_rcu(rule, rcu); 105 kfree_rcu(rule, rcu);
103} 106}
104 107
108#ifdef CONFIG_NET_L3_MASTER_DEV
109static inline u32 fib_rule_get_table(struct fib_rule *rule,
110 struct fib_lookup_arg *arg)
111{
112 return rule->l3mdev ? arg->table : rule->table;
113}
114#else
115static inline u32 fib_rule_get_table(struct fib_rule *rule,
116 struct fib_lookup_arg *arg)
117{
118 return rule->table;
119}
120#endif
121
105static inline u32 frh_get_table(struct fib_rule_hdr *frh, struct nlattr **nla) 122static inline u32 frh_get_table(struct fib_rule_hdr *frh, struct nlattr **nla)
106{ 123{
107 if (nla[FRA_TABLE]) 124 if (nla[FRA_TABLE])
@@ -117,4 +134,7 @@ int fib_rules_lookup(struct fib_rules_ops *, struct flowi *, int flags,
117 struct fib_lookup_arg *); 134 struct fib_lookup_arg *);
118int fib_default_rule_add(struct fib_rules_ops *, u32 pref, u32 table, 135int fib_default_rule_add(struct fib_rules_ops *, u32 pref, u32 table,
119 u32 flags); 136 u32 flags);
137
138int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh);
139int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh);
120#endif 140#endif
diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h
index 374388dc01c8..34f33eb96a5e 100644
--- a/include/net/l3mdev.h
+++ b/include/net/l3mdev.h
@@ -11,6 +11,8 @@
11#ifndef _NET_L3MDEV_H_ 11#ifndef _NET_L3MDEV_H_
12#define _NET_L3MDEV_H_ 12#define _NET_L3MDEV_H_
13 13
14#include <net/fib_rules.h>
15
14/** 16/**
15 * struct l3mdev_ops - l3mdev operations 17 * struct l3mdev_ops - l3mdev operations
16 * 18 *
@@ -41,6 +43,9 @@ struct l3mdev_ops {
41 43
42#ifdef CONFIG_NET_L3_MASTER_DEV 44#ifdef CONFIG_NET_L3_MASTER_DEV
43 45
46int l3mdev_fib_rule_match(struct net *net, struct flowi *fl,
47 struct fib_lookup_arg *arg);
48
44int l3mdev_master_ifindex_rcu(const struct net_device *dev); 49int l3mdev_master_ifindex_rcu(const struct net_device *dev);
45static inline int l3mdev_master_ifindex(struct net_device *dev) 50static inline int l3mdev_master_ifindex(struct net_device *dev)
46{ 51{
@@ -236,6 +241,13 @@ struct sk_buff *l3mdev_ip6_rcv(struct sk_buff *skb)
236{ 241{
237 return skb; 242 return skb;
238} 243}
244
245static inline
246int l3mdev_fib_rule_match(struct net *net, struct flowi *fl,
247 struct fib_lookup_arg *arg)
248{
249 return 1;
250}
239#endif 251#endif
240 252
241#endif /* _NET_L3MDEV_H_ */ 253#endif /* _NET_L3MDEV_H_ */
diff --git a/include/uapi/linux/fib_rules.h b/include/uapi/linux/fib_rules.h
index 620c8a5ddc00..14404b3ebb89 100644
--- a/include/uapi/linux/fib_rules.h
+++ b/include/uapi/linux/fib_rules.h
@@ -50,6 +50,7 @@ enum {
50 FRA_FWMASK, /* mask for netfilter mark */ 50 FRA_FWMASK, /* mask for netfilter mark */
51 FRA_OIFNAME, 51 FRA_OIFNAME,
52 FRA_PAD, 52 FRA_PAD,
53 FRA_L3MDEV, /* iif or oif is l3mdev goto its table */
53 __FRA_MAX 54 __FRA_MAX
54}; 55};
55 56
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 840acebbb80c..98298b11f534 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -173,7 +173,8 @@ void fib_rules_unregister(struct fib_rules_ops *ops)
173EXPORT_SYMBOL_GPL(fib_rules_unregister); 173EXPORT_SYMBOL_GPL(fib_rules_unregister);
174 174
175static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops, 175static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops,
176 struct flowi *fl, int flags) 176 struct flowi *fl, int flags,
177 struct fib_lookup_arg *arg)
177{ 178{
178 int ret = 0; 179 int ret = 0;
179 180
@@ -189,6 +190,9 @@ static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops,
189 if (rule->tun_id && (rule->tun_id != fl->flowi_tun_key.tun_id)) 190 if (rule->tun_id && (rule->tun_id != fl->flowi_tun_key.tun_id))
190 goto out; 191 goto out;
191 192
193 if (rule->l3mdev && !l3mdev_fib_rule_match(rule->fr_net, fl, arg))
194 goto out;
195
192 ret = ops->match(rule, fl, flags); 196 ret = ops->match(rule, fl, flags);
193out: 197out:
194 return (rule->flags & FIB_RULE_INVERT) ? !ret : ret; 198 return (rule->flags & FIB_RULE_INVERT) ? !ret : ret;
@@ -204,7 +208,7 @@ int fib_rules_lookup(struct fib_rules_ops *ops, struct flowi *fl,
204 208
205 list_for_each_entry_rcu(rule, &ops->rules_list, list) { 209 list_for_each_entry_rcu(rule, &ops->rules_list, list) {
206jumped: 210jumped:
207 if (!fib_rule_match(rule, ops, fl, flags)) 211 if (!fib_rule_match(rule, ops, fl, flags, arg))
208 continue; 212 continue;
209 213
210 if (rule->action == FR_ACT_GOTO) { 214 if (rule->action == FR_ACT_GOTO) {
@@ -265,7 +269,7 @@ errout:
265 return err; 269 return err;
266} 270}
267 271
268static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh) 272int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh)
269{ 273{
270 struct net *net = sock_net(skb->sk); 274 struct net *net = sock_net(skb->sk);
271 struct fib_rule_hdr *frh = nlmsg_data(nlh); 275 struct fib_rule_hdr *frh = nlmsg_data(nlh);
@@ -336,6 +340,14 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh)
336 if (tb[FRA_TUN_ID]) 340 if (tb[FRA_TUN_ID])
337 rule->tun_id = nla_get_be64(tb[FRA_TUN_ID]); 341 rule->tun_id = nla_get_be64(tb[FRA_TUN_ID]);
338 342
343 if (tb[FRA_L3MDEV]) {
344#ifdef CONFIG_NET_L3_MASTER_DEV
345 rule->l3mdev = nla_get_u8(tb[FRA_L3MDEV]);
346 if (rule->l3mdev != 1)
347#endif
348 goto errout_free;
349 }
350
339 rule->action = frh->action; 351 rule->action = frh->action;
340 rule->flags = frh->flags; 352 rule->flags = frh->flags;
341 rule->table = frh_get_table(frh, tb); 353 rule->table = frh_get_table(frh, tb);
@@ -371,6 +383,9 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh)
371 } else if (rule->action == FR_ACT_GOTO) 383 } else if (rule->action == FR_ACT_GOTO)
372 goto errout_free; 384 goto errout_free;
373 385
386 if (rule->l3mdev && rule->table)
387 goto errout_free;
388
374 err = ops->configure(rule, skb, frh, tb); 389 err = ops->configure(rule, skb, frh, tb);
375 if (err < 0) 390 if (err < 0)
376 goto errout_free; 391 goto errout_free;
@@ -424,8 +439,9 @@ errout:
424 rules_ops_put(ops); 439 rules_ops_put(ops);
425 return err; 440 return err;
426} 441}
442EXPORT_SYMBOL_GPL(fib_nl_newrule);
427 443
428static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh) 444int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh)
429{ 445{
430 struct net *net = sock_net(skb->sk); 446 struct net *net = sock_net(skb->sk);
431 struct fib_rule_hdr *frh = nlmsg_data(nlh); 447 struct fib_rule_hdr *frh = nlmsg_data(nlh);
@@ -483,6 +499,10 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh)
483 (rule->tun_id != nla_get_be64(tb[FRA_TUN_ID]))) 499 (rule->tun_id != nla_get_be64(tb[FRA_TUN_ID])))
484 continue; 500 continue;
485 501
502 if (tb[FRA_L3MDEV] &&
503 (rule->l3mdev != nla_get_u8(tb[FRA_L3MDEV])))
504 continue;
505
486 if (!ops->compare(rule, frh, tb)) 506 if (!ops->compare(rule, frh, tb))
487 continue; 507 continue;
488 508
@@ -536,6 +556,7 @@ errout:
536 rules_ops_put(ops); 556 rules_ops_put(ops);
537 return err; 557 return err;
538} 558}
559EXPORT_SYMBOL_GPL(fib_nl_delrule);
539 560
540static inline size_t fib_rule_nlmsg_size(struct fib_rules_ops *ops, 561static inline size_t fib_rule_nlmsg_size(struct fib_rules_ops *ops,
541 struct fib_rule *rule) 562 struct fib_rule *rule)
@@ -607,7 +628,9 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
607 (rule->target && 628 (rule->target &&
608 nla_put_u32(skb, FRA_GOTO, rule->target)) || 629 nla_put_u32(skb, FRA_GOTO, rule->target)) ||
609 (rule->tun_id && 630 (rule->tun_id &&
610 nla_put_be64(skb, FRA_TUN_ID, rule->tun_id, FRA_PAD))) 631 nla_put_be64(skb, FRA_TUN_ID, rule->tun_id, FRA_PAD)) ||
632 (rule->l3mdev &&
633 nla_put_u8(skb, FRA_L3MDEV, rule->l3mdev)))
611 goto nla_put_failure; 634 goto nla_put_failure;
612 635
613 if (rule->suppress_ifgroup != -1) { 636 if (rule->suppress_ifgroup != -1) {
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index f2bda9e89c61..6e9ea69e5f75 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -76,6 +76,7 @@ static int fib4_rule_action(struct fib_rule *rule, struct flowi *flp,
76{ 76{
77 int err = -EAGAIN; 77 int err = -EAGAIN;
78 struct fib_table *tbl; 78 struct fib_table *tbl;
79 u32 tb_id;
79 80
80 switch (rule->action) { 81 switch (rule->action) {
81 case FR_ACT_TO_TBL: 82 case FR_ACT_TO_TBL:
@@ -94,7 +95,8 @@ static int fib4_rule_action(struct fib_rule *rule, struct flowi *flp,
94 95
95 rcu_read_lock(); 96 rcu_read_lock();
96 97
97 tbl = fib_get_table(rule->fr_net, rule->table); 98 tb_id = fib_rule_get_table(rule, arg);
99 tbl = fib_get_table(rule->fr_net, tb_id);
98 if (tbl) 100 if (tbl)
99 err = fib_table_lookup(tbl, &flp->u.ip4, 101 err = fib_table_lookup(tbl, &flp->u.ip4,
100 (struct fib_result *)arg->result, 102 (struct fib_result *)arg->result,
@@ -180,7 +182,7 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
180 if (err) 182 if (err)
181 goto errout; 183 goto errout;
182 184
183 if (rule->table == RT_TABLE_UNSPEC) { 185 if (rule->table == RT_TABLE_UNSPEC && !rule->l3mdev) {
184 if (rule->action == FR_ACT_TO_TBL) { 186 if (rule->action == FR_ACT_TO_TBL) {
185 struct fib_table *table; 187 struct fib_table *table;
186 188
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index ed33abf57abd..5857c1fc8b67 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -67,6 +67,7 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
67 struct net *net = rule->fr_net; 67 struct net *net = rule->fr_net;
68 pol_lookup_t lookup = arg->lookup_ptr; 68 pol_lookup_t lookup = arg->lookup_ptr;
69 int err = 0; 69 int err = 0;
70 u32 tb_id;
70 71
71 switch (rule->action) { 72 switch (rule->action) {
72 case FR_ACT_TO_TBL: 73 case FR_ACT_TO_TBL:
@@ -86,7 +87,8 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
86 goto discard_pkt; 87 goto discard_pkt;
87 } 88 }
88 89
89 table = fib6_get_table(net, rule->table); 90 tb_id = fib_rule_get_table(rule, arg);
91 table = fib6_get_table(net, tb_id);
90 if (!table) { 92 if (!table) {
91 err = -EAGAIN; 93 err = -EAGAIN;
92 goto out; 94 goto out;
@@ -199,7 +201,7 @@ static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
199 struct net *net = sock_net(skb->sk); 201 struct net *net = sock_net(skb->sk);
200 struct fib6_rule *rule6 = (struct fib6_rule *) rule; 202 struct fib6_rule *rule6 = (struct fib6_rule *) rule;
201 203
202 if (rule->action == FR_ACT_TO_TBL) { 204 if (rule->action == FR_ACT_TO_TBL && !rule->l3mdev) {
203 if (rule->table == RT6_TABLE_UNSPEC) 205 if (rule->table == RT6_TABLE_UNSPEC)
204 goto errout; 206 goto errout;
205 207
diff --git a/net/l3mdev/l3mdev.c b/net/l3mdev/l3mdev.c
index 6651a78e100c..7da97809a7e8 100644
--- a/net/l3mdev/l3mdev.c
+++ b/net/l3mdev/l3mdev.c
@@ -10,6 +10,7 @@
10 */ 10 */
11 11
12#include <linux/netdevice.h> 12#include <linux/netdevice.h>
13#include <net/fib_rules.h>
13#include <net/l3mdev.h> 14#include <net/l3mdev.h>
14 15
15/** 16/**
@@ -160,3 +161,40 @@ int l3mdev_get_saddr(struct net *net, int ifindex, struct flowi4 *fl4)
160 return rc; 161 return rc;
161} 162}
162EXPORT_SYMBOL_GPL(l3mdev_get_saddr); 163EXPORT_SYMBOL_GPL(l3mdev_get_saddr);
164
165/**
166 * l3mdev_fib_rule_match - Determine if flowi references an
167 * L3 master device
168 * @net: network namespace for device index lookup
169 * @fl: flow struct
170 */
171
172int l3mdev_fib_rule_match(struct net *net, struct flowi *fl,
173 struct fib_lookup_arg *arg)
174{
175 struct net_device *dev;
176 int rc = 0;
177
178 rcu_read_lock();
179
180 dev = dev_get_by_index_rcu(net, fl->flowi_oif);
181 if (dev && netif_is_l3_master(dev) &&
182 dev->l3mdev_ops->l3mdev_fib_table) {
183 arg->table = dev->l3mdev_ops->l3mdev_fib_table(dev);
184 rc = 1;
185 goto out;
186 }
187
188 dev = dev_get_by_index_rcu(net, fl->flowi_iif);
189 if (dev && netif_is_l3_master(dev) &&
190 dev->l3mdev_ops->l3mdev_fib_table) {
191 arg->table = dev->l3mdev_ops->l3mdev_fib_table(dev);
192 rc = 1;
193 goto out;
194 }
195
196out:
197 rcu_read_unlock();
198
199 return rc;
200}