aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/net/vrf.c189
-rw-r--r--include/linux/ipv6.h17
-rw-r--r--include/linux/netdevice.h2
-rw-r--r--include/net/l3mdev.h42
-rw-r--r--include/net/tcp.h4
-rw-r--r--net/core/dev.c3
-rw-r--r--net/ipv4/ip_input.c7
-rw-r--r--net/ipv6/ip6_input.c7
8 files changed, 170 insertions, 101 deletions
diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index c8db55aa8280..0ea29345eb2e 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -42,9 +42,6 @@
42#define DRV_NAME "vrf" 42#define DRV_NAME "vrf"
43#define DRV_VERSION "1.0" 43#define DRV_VERSION "1.0"
44 44
45#define vrf_master_get_rcu(dev) \
46 ((struct net_device *)rcu_dereference(dev->rx_handler_data))
47
48struct net_vrf { 45struct net_vrf {
49 struct rtable *rth; 46 struct rtable *rth;
50 struct rt6_info *rt6; 47 struct rt6_info *rt6;
@@ -60,90 +57,12 @@ struct pcpu_dstats {
60 struct u64_stats_sync syncp; 57 struct u64_stats_sync syncp;
61}; 58};
62 59
63/* neighbor handling is done with actual device; do not want
64 * to flip skb->dev for those ndisc packets. This really fails
65 * for multiple next protocols (e.g., NEXTHDR_HOP). But it is
66 * a start.
67 */
68#if IS_ENABLED(CONFIG_IPV6)
69static bool check_ipv6_frame(const struct sk_buff *skb)
70{
71 const struct ipv6hdr *ipv6h;
72 struct ipv6hdr _ipv6h;
73 bool rc = true;
74
75 ipv6h = skb_header_pointer(skb, 0, sizeof(_ipv6h), &_ipv6h);
76 if (!ipv6h)
77 goto out;
78
79 if (ipv6h->nexthdr == NEXTHDR_ICMP) {
80 const struct icmp6hdr *icmph;
81 struct icmp6hdr _icmph;
82
83 icmph = skb_header_pointer(skb, sizeof(_ipv6h),
84 sizeof(_icmph), &_icmph);
85 if (!icmph)
86 goto out;
87
88 switch (icmph->icmp6_type) {
89 case NDISC_ROUTER_SOLICITATION:
90 case NDISC_ROUTER_ADVERTISEMENT:
91 case NDISC_NEIGHBOUR_SOLICITATION:
92 case NDISC_NEIGHBOUR_ADVERTISEMENT:
93 case NDISC_REDIRECT:
94 rc = false;
95 break;
96 }
97 }
98
99out:
100 return rc;
101}
102#else
103static bool check_ipv6_frame(const struct sk_buff *skb)
104{
105 return false;
106}
107#endif
108
109static bool is_ip_rx_frame(struct sk_buff *skb)
110{
111 switch (skb->protocol) {
112 case htons(ETH_P_IP):
113 return true;
114 case htons(ETH_P_IPV6):
115 return check_ipv6_frame(skb);
116 }
117 return false;
118}
119
120static void vrf_tx_error(struct net_device *vrf_dev, struct sk_buff *skb) 60static void vrf_tx_error(struct net_device *vrf_dev, struct sk_buff *skb)
121{ 61{
122 vrf_dev->stats.tx_errors++; 62 vrf_dev->stats.tx_errors++;
123 kfree_skb(skb); 63 kfree_skb(skb);
124} 64}
125 65
126/* note: already called with rcu_read_lock */
127static rx_handler_result_t vrf_handle_frame(struct sk_buff **pskb)
128{
129 struct sk_buff *skb = *pskb;
130
131 if (is_ip_rx_frame(skb)) {
132 struct net_device *dev = vrf_master_get_rcu(skb->dev);
133 struct pcpu_dstats *dstats = this_cpu_ptr(dev->dstats);
134
135 u64_stats_update_begin(&dstats->syncp);
136 dstats->rx_pkts++;
137 dstats->rx_bytes += skb->len;
138 u64_stats_update_end(&dstats->syncp);
139
140 skb->dev = dev;
141
142 return RX_HANDLER_ANOTHER;
143 }
144 return RX_HANDLER_PASS;
145}
146
147static struct rtnl_link_stats64 *vrf_get_stats64(struct net_device *dev, 66static struct rtnl_link_stats64 *vrf_get_stats64(struct net_device *dev,
148 struct rtnl_link_stats64 *stats) 67 struct rtnl_link_stats64 *stats)
149{ 68{
@@ -506,28 +425,14 @@ static int do_vrf_add_slave(struct net_device *dev, struct net_device *port_dev)
506{ 425{
507 int ret; 426 int ret;
508 427
509 /* register the packet handler for slave ports */
510 ret = netdev_rx_handler_register(port_dev, vrf_handle_frame, dev);
511 if (ret) {
512 netdev_err(port_dev,
513 "Device %s failed to register rx_handler\n",
514 port_dev->name);
515 goto out_fail;
516 }
517
518 ret = netdev_master_upper_dev_link(port_dev, dev, NULL, NULL); 428 ret = netdev_master_upper_dev_link(port_dev, dev, NULL, NULL);
519 if (ret < 0) 429 if (ret < 0)
520 goto out_unregister; 430 return ret;
521 431
522 port_dev->priv_flags |= IFF_L3MDEV_SLAVE; 432 port_dev->priv_flags |= IFF_L3MDEV_SLAVE;
523 cycle_netdev(port_dev); 433 cycle_netdev(port_dev);
524 434
525 return 0; 435 return 0;
526
527out_unregister:
528 netdev_rx_handler_unregister(port_dev);
529out_fail:
530 return ret;
531} 436}
532 437
533static int vrf_add_slave(struct net_device *dev, struct net_device *port_dev) 438static int vrf_add_slave(struct net_device *dev, struct net_device *port_dev)
@@ -544,8 +449,6 @@ static int do_vrf_del_slave(struct net_device *dev, struct net_device *port_dev)
544 netdev_upper_dev_unlink(port_dev, dev); 449 netdev_upper_dev_unlink(port_dev, dev);
545 port_dev->priv_flags &= ~IFF_L3MDEV_SLAVE; 450 port_dev->priv_flags &= ~IFF_L3MDEV_SLAVE;
546 451
547 netdev_rx_handler_unregister(port_dev);
548
549 cycle_netdev(port_dev); 452 cycle_netdev(port_dev);
550 453
551 return 0; 454 return 0;
@@ -670,6 +573,95 @@ static int vrf_get_saddr(struct net_device *dev, struct flowi4 *fl4)
670} 573}
671 574
672#if IS_ENABLED(CONFIG_IPV6) 575#if IS_ENABLED(CONFIG_IPV6)
576/* neighbor handling is done with actual device; do not want
577 * to flip skb->dev for those ndisc packets. This really fails
578 * for multiple next protocols (e.g., NEXTHDR_HOP). But it is
579 * a start.
580 */
581static bool ipv6_ndisc_frame(const struct sk_buff *skb)
582{
583 const struct ipv6hdr *iph = ipv6_hdr(skb);
584 bool rc = false;
585
586 if (iph->nexthdr == NEXTHDR_ICMP) {
587 const struct icmp6hdr *icmph;
588 struct icmp6hdr _icmph;
589
590 icmph = skb_header_pointer(skb, sizeof(*iph),
591 sizeof(_icmph), &_icmph);
592 if (!icmph)
593 goto out;
594
595 switch (icmph->icmp6_type) {
596 case NDISC_ROUTER_SOLICITATION:
597 case NDISC_ROUTER_ADVERTISEMENT:
598 case NDISC_NEIGHBOUR_SOLICITATION:
599 case NDISC_NEIGHBOUR_ADVERTISEMENT:
600 case NDISC_REDIRECT:
601 rc = true;
602 break;
603 }
604 }
605
606out:
607 return rc;
608}
609
610static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev,
611 struct sk_buff *skb)
612{
613 /* if packet is NDISC keep the ingress interface */
614 if (!ipv6_ndisc_frame(skb)) {
615 skb->dev = vrf_dev;
616 skb->skb_iif = vrf_dev->ifindex;
617
618 skb_push(skb, skb->mac_len);
619 dev_queue_xmit_nit(skb, vrf_dev);
620 skb_pull(skb, skb->mac_len);
621
622 IP6CB(skb)->flags |= IP6SKB_L3SLAVE;
623 }
624
625 return skb;
626}
627
628#else
629static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev,
630 struct sk_buff *skb)
631{
632 return skb;
633}
634#endif
635
636static struct sk_buff *vrf_ip_rcv(struct net_device *vrf_dev,
637 struct sk_buff *skb)
638{
639 skb->dev = vrf_dev;
640 skb->skb_iif = vrf_dev->ifindex;
641
642 skb_push(skb, skb->mac_len);
643 dev_queue_xmit_nit(skb, vrf_dev);
644 skb_pull(skb, skb->mac_len);
645
646 return skb;
647}
648
649/* called with rcu lock held */
650static struct sk_buff *vrf_l3_rcv(struct net_device *vrf_dev,
651 struct sk_buff *skb,
652 u16 proto)
653{
654 switch (proto) {
655 case AF_INET:
656 return vrf_ip_rcv(vrf_dev, skb);
657 case AF_INET6:
658 return vrf_ip6_rcv(vrf_dev, skb);
659 }
660
661 return skb;
662}
663
664#if IS_ENABLED(CONFIG_IPV6)
673static struct dst_entry *vrf_get_rt6_dst(const struct net_device *dev, 665static struct dst_entry *vrf_get_rt6_dst(const struct net_device *dev,
674 const struct flowi6 *fl6) 666 const struct flowi6 *fl6)
675{ 667{
@@ -690,6 +682,7 @@ static const struct l3mdev_ops vrf_l3mdev_ops = {
690 .l3mdev_fib_table = vrf_fib_table, 682 .l3mdev_fib_table = vrf_fib_table,
691 .l3mdev_get_rtable = vrf_get_rtable, 683 .l3mdev_get_rtable = vrf_get_rtable,
692 .l3mdev_get_saddr = vrf_get_saddr, 684 .l3mdev_get_saddr = vrf_get_saddr,
685 .l3mdev_l3_rcv = vrf_l3_rcv,
693#if IS_ENABLED(CONFIG_IPV6) 686#if IS_ENABLED(CONFIG_IPV6)
694 .l3mdev_get_rt6_dst = vrf_get_rt6_dst, 687 .l3mdev_get_rt6_dst = vrf_get_rt6_dst,
695#endif 688#endif
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 58d6e158755f..5c91b0b055d4 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -118,14 +118,29 @@ struct inet6_skb_parm {
118#define IP6SKB_ROUTERALERT 8 118#define IP6SKB_ROUTERALERT 8
119#define IP6SKB_FRAGMENTED 16 119#define IP6SKB_FRAGMENTED 16
120#define IP6SKB_HOPBYHOP 32 120#define IP6SKB_HOPBYHOP 32
121#define IP6SKB_L3SLAVE 64
121}; 122};
122 123
124#if defined(CONFIG_NET_L3_MASTER_DEV)
125static inline bool skb_l3mdev_slave(__u16 flags)
126{
127 return flags & IP6SKB_L3SLAVE;
128}
129#else
130static inline bool skb_l3mdev_slave(__u16 flags)
131{
132 return false;
133}
134#endif
135
123#define IP6CB(skb) ((struct inet6_skb_parm*)((skb)->cb)) 136#define IP6CB(skb) ((struct inet6_skb_parm*)((skb)->cb))
124#define IP6CBMTU(skb) ((struct ip6_mtuinfo *)((skb)->cb)) 137#define IP6CBMTU(skb) ((struct ip6_mtuinfo *)((skb)->cb))
125 138
126static inline int inet6_iif(const struct sk_buff *skb) 139static inline int inet6_iif(const struct sk_buff *skb)
127{ 140{
128 return IP6CB(skb)->iif; 141 bool l3_slave = skb_l3mdev_slave(IP6CB(skb)->flags);
142
143 return l3_slave ? skb->skb_iif : IP6CB(skb)->iif;
129} 144}
130 145
131struct tcp6_request_sock { 146struct tcp6_request_sock {
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 63580e6d0df4..c2f5112f08f7 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3258,6 +3258,8 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
3258bool is_skb_forwardable(const struct net_device *dev, 3258bool is_skb_forwardable(const struct net_device *dev,
3259 const struct sk_buff *skb); 3259 const struct sk_buff *skb);
3260 3260
3261void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev);
3262
3261extern int netdev_budget; 3263extern int netdev_budget;
3262 3264
3263/* Called by rtnetlink.c:rtnl_unlock() */ 3265/* Called by rtnetlink.c:rtnl_unlock() */
diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h
index 78872bd1dc2c..374388dc01c8 100644
--- a/include/net/l3mdev.h
+++ b/include/net/l3mdev.h
@@ -25,6 +25,8 @@
25 25
26struct l3mdev_ops { 26struct l3mdev_ops {
27 u32 (*l3mdev_fib_table)(const struct net_device *dev); 27 u32 (*l3mdev_fib_table)(const struct net_device *dev);
28 struct sk_buff * (*l3mdev_l3_rcv)(struct net_device *dev,
29 struct sk_buff *skb, u16 proto);
28 30
29 /* IPv4 ops */ 31 /* IPv4 ops */
30 struct rtable * (*l3mdev_get_rtable)(const struct net_device *dev, 32 struct rtable * (*l3mdev_get_rtable)(const struct net_device *dev,
@@ -134,6 +136,34 @@ int l3mdev_get_saddr(struct net *net, int ifindex, struct flowi4 *fl4);
134 136
135struct dst_entry *l3mdev_get_rt6_dst(struct net *net, const struct flowi6 *fl6); 137struct dst_entry *l3mdev_get_rt6_dst(struct net *net, const struct flowi6 *fl6);
136 138
139static inline
140struct sk_buff *l3mdev_l3_rcv(struct sk_buff *skb, u16 proto)
141{
142 struct net_device *master = NULL;
143
144 if (netif_is_l3_slave(skb->dev))
145 master = netdev_master_upper_dev_get_rcu(skb->dev);
146 else if (netif_is_l3_master(skb->dev))
147 master = skb->dev;
148
149 if (master && master->l3mdev_ops->l3mdev_l3_rcv)
150 skb = master->l3mdev_ops->l3mdev_l3_rcv(master, skb, proto);
151
152 return skb;
153}
154
155static inline
156struct sk_buff *l3mdev_ip_rcv(struct sk_buff *skb)
157{
158 return l3mdev_l3_rcv(skb, AF_INET);
159}
160
161static inline
162struct sk_buff *l3mdev_ip6_rcv(struct sk_buff *skb)
163{
164 return l3mdev_l3_rcv(skb, AF_INET6);
165}
166
137#else 167#else
138 168
139static inline int l3mdev_master_ifindex_rcu(const struct net_device *dev) 169static inline int l3mdev_master_ifindex_rcu(const struct net_device *dev)
@@ -194,6 +224,18 @@ struct dst_entry *l3mdev_get_rt6_dst(struct net *net, const struct flowi6 *fl6)
194{ 224{
195 return NULL; 225 return NULL;
196} 226}
227
228static inline
229struct sk_buff *l3mdev_ip_rcv(struct sk_buff *skb)
230{
231 return skb;
232}
233
234static inline
235struct sk_buff *l3mdev_ip6_rcv(struct sk_buff *skb)
236{
237 return skb;
238}
197#endif 239#endif
198 240
199#endif /* _NET_L3MDEV_H_ */ 241#endif /* _NET_L3MDEV_H_ */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index c9ab561387c4..0bcc70f4e1fb 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -786,7 +786,9 @@ struct tcp_skb_cb {
786 */ 786 */
787static inline int tcp_v6_iif(const struct sk_buff *skb) 787static inline int tcp_v6_iif(const struct sk_buff *skb)
788{ 788{
789 return TCP_SKB_CB(skb)->header.h6.iif; 789 bool l3_slave = skb_l3mdev_slave(TCP_SKB_CB(skb)->header.h6.flags);
790
791 return l3_slave ? skb->skb_iif : TCP_SKB_CB(skb)->header.h6.iif;
790} 792}
791#endif 793#endif
792 794
diff --git a/net/core/dev.c b/net/core/dev.c
index c7490339315c..12436d1312ca 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1850,7 +1850,7 @@ static inline bool skb_loop_sk(struct packet_type *ptype, struct sk_buff *skb)
1850 * taps currently in use. 1850 * taps currently in use.
1851 */ 1851 */
1852 1852
1853static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) 1853void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1854{ 1854{
1855 struct packet_type *ptype; 1855 struct packet_type *ptype;
1856 struct sk_buff *skb2 = NULL; 1856 struct sk_buff *skb2 = NULL;
@@ -1907,6 +1907,7 @@ out_unlock:
1907 pt_prev->func(skb2, skb->dev, pt_prev, skb->dev); 1907 pt_prev->func(skb2, skb->dev, pt_prev, skb->dev);
1908 rcu_read_unlock(); 1908 rcu_read_unlock();
1909} 1909}
1910EXPORT_SYMBOL_GPL(dev_queue_xmit_nit);
1910 1911
1911/** 1912/**
1912 * netif_setup_tc - Handle tc mappings on real_num_tx_queues change 1913 * netif_setup_tc - Handle tc mappings on real_num_tx_queues change
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 751c0658e194..37375eedeef9 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -313,6 +313,13 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
313 const struct iphdr *iph = ip_hdr(skb); 313 const struct iphdr *iph = ip_hdr(skb);
314 struct rtable *rt; 314 struct rtable *rt;
315 315
316 /* if ingress device is enslaved to an L3 master device pass the
317 * skb to its handler for processing
318 */
319 skb = l3mdev_ip_rcv(skb);
320 if (!skb)
321 return NET_RX_SUCCESS;
322
316 if (net->ipv4.sysctl_ip_early_demux && 323 if (net->ipv4.sysctl_ip_early_demux &&
317 !skb_dst(skb) && 324 !skb_dst(skb) &&
318 !skb->sk && 325 !skb->sk &&
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 6ed56012005d..f185cbcda114 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -49,6 +49,13 @@
49 49
50int ip6_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) 50int ip6_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
51{ 51{
52 /* if ingress device is enslaved to an L3 master device pass the
53 * skb to its handler for processing
54 */
55 skb = l3mdev_ip6_rcv(skb);
56 if (!skb)
57 return NET_RX_SUCCESS;
58
52 if (net->ipv4.sysctl_ip_early_demux && !skb_dst(skb) && skb->sk == NULL) { 59 if (net->ipv4.sysctl_ip_early_demux && !skb_dst(skb) && skb->sk == NULL) {
53 const struct inet6_protocol *ipprot; 60 const struct inet6_protocol *ipprot;
54 61