aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/networking/switchdev.txt14
-rw-r--r--drivers/net/ethernet/rocker/rocker.c11
-rw-r--r--drivers/net/ethernet/rocker/rocker.h1
-rw-r--r--include/linux/netdevice.h13
-rw-r--r--include/linux/skbuff.h9
-rw-r--r--include/net/switchdev.h9
-rw-r--r--net/core/dev.c10
-rw-r--r--net/switchdev/switchdev.c111
8 files changed, 169 insertions, 9 deletions
diff --git a/Documentation/networking/switchdev.txt b/Documentation/networking/switchdev.txt
index c5d7ade10ff2..9825f32a8634 100644
--- a/Documentation/networking/switchdev.txt
+++ b/Documentation/networking/switchdev.txt
@@ -279,8 +279,18 @@ and unknown unicast packets to all ports in domain, if allowed by port's
279current STP state. The switch driver, knowing which ports are within which 279current STP state. The switch driver, knowing which ports are within which
280vlan L2 domain, can program the switch device for flooding. The packet should 280vlan L2 domain, can program the switch device for flooding. The packet should
281also be sent to the port netdev for processing by the bridge driver. The 281also be sent to the port netdev for processing by the bridge driver. The
282bridge should not reflood the packet to the same ports the device flooded. 282bridge should not reflood the packet to the same ports the device flooded,
283XXX: the mechanism to avoid duplicate flood packets is being discuseed. 283otherwise there will be duplicate packets on the wire.
284
285To avoid duplicate packets, the device/driver should mark a packet as already
286forwarded using skb->offload_fwd_mark. The same mark is set on the device
287ports in the domain using dev->offload_fwd_mark. If the skb->offload_fwd_mark
288is non-zero and matches the forwarding egress port's dev->skb_mark, the kernel
289will drop the skb right before transmit on the egress port, with the
290understanding that the device already forwarded the packet on same egress port.
291The driver can use switchdev_port_fwd_mark_set() to set a globally unique mark
292for port's dev->offload_fwd_mark, based on the port's parent ID (switch ID) and
293a group ifindex.
284 294
285It is possible for the switch device to not handle flooding and push the 295It is possible for the switch device to not handle flooding and push the
286packets up to the bridge driver for flooding. This is not ideal as the number 296packets up to the bridge driver for flooding. This is not ideal as the number
diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c
index 4ccde93cd07a..7b4c3474acfe 100644
--- a/drivers/net/ethernet/rocker/rocker.c
+++ b/drivers/net/ethernet/rocker/rocker.c
@@ -4822,6 +4822,7 @@ static int rocker_port_rx_proc(const struct rocker *rocker,
4822 const struct rocker_tlv *attrs[ROCKER_TLV_RX_MAX + 1]; 4822 const struct rocker_tlv *attrs[ROCKER_TLV_RX_MAX + 1];
4823 struct sk_buff *skb = rocker_desc_cookie_ptr_get(desc_info); 4823 struct sk_buff *skb = rocker_desc_cookie_ptr_get(desc_info);
4824 size_t rx_len; 4824 size_t rx_len;
4825 u16 rx_flags = 0;
4825 4826
4826 if (!skb) 4827 if (!skb)
4827 return -ENOENT; 4828 return -ENOENT;
@@ -4829,6 +4830,8 @@ static int rocker_port_rx_proc(const struct rocker *rocker,
4829 rocker_tlv_parse_desc(attrs, ROCKER_TLV_RX_MAX, desc_info); 4830 rocker_tlv_parse_desc(attrs, ROCKER_TLV_RX_MAX, desc_info);
4830 if (!attrs[ROCKER_TLV_RX_FRAG_LEN]) 4831 if (!attrs[ROCKER_TLV_RX_FRAG_LEN])
4831 return -EINVAL; 4832 return -EINVAL;
4833 if (attrs[ROCKER_TLV_RX_FLAGS])
4834 rx_flags = rocker_tlv_get_u16(attrs[ROCKER_TLV_RX_FLAGS]);
4832 4835
4833 rocker_dma_rx_ring_skb_unmap(rocker, attrs); 4836 rocker_dma_rx_ring_skb_unmap(rocker, attrs);
4834 4837
@@ -4836,6 +4839,9 @@ static int rocker_port_rx_proc(const struct rocker *rocker,
4836 skb_put(skb, rx_len); 4839 skb_put(skb, rx_len);
4837 skb->protocol = eth_type_trans(skb, rocker_port->dev); 4840 skb->protocol = eth_type_trans(skb, rocker_port->dev);
4838 4841
4842 if (rx_flags & ROCKER_RX_FLAGS_FWD_OFFLOAD)
4843 skb->offload_fwd_mark = rocker_port->dev->offload_fwd_mark;
4844
4839 rocker_port->dev->stats.rx_packets++; 4845 rocker_port->dev->stats.rx_packets++;
4840 rocker_port->dev->stats.rx_bytes += skb->len; 4846 rocker_port->dev->stats.rx_bytes += skb->len;
4841 4847
@@ -4973,6 +4979,8 @@ static int rocker_probe_port(struct rocker *rocker, unsigned int port_number)
4973 } 4979 }
4974 rocker->ports[port_number] = rocker_port; 4980 rocker->ports[port_number] = rocker_port;
4975 4981
4982 switchdev_port_fwd_mark_set(rocker_port->dev, NULL, false);
4983
4976 rocker_port_set_learning(rocker_port, SWITCHDEV_TRANS_NONE); 4984 rocker_port_set_learning(rocker_port, SWITCHDEV_TRANS_NONE);
4977 4985
4978 err = rocker_port_ig_tbl(rocker_port, SWITCHDEV_TRANS_NONE, 0); 4986 err = rocker_port_ig_tbl(rocker_port, SWITCHDEV_TRANS_NONE, 0);
@@ -5252,6 +5260,7 @@ static int rocker_port_bridge_join(struct rocker_port *rocker_port,
5252 rocker_port_internal_vlan_id_get(rocker_port, bridge->ifindex); 5260 rocker_port_internal_vlan_id_get(rocker_port, bridge->ifindex);
5253 5261
5254 rocker_port->bridge_dev = bridge; 5262 rocker_port->bridge_dev = bridge;
5263 switchdev_port_fwd_mark_set(rocker_port->dev, bridge, true);
5255 5264
5256 return rocker_port_vlan_add(rocker_port, SWITCHDEV_TRANS_NONE, 5265 return rocker_port_vlan_add(rocker_port, SWITCHDEV_TRANS_NONE,
5257 untagged_vid, 0); 5266 untagged_vid, 0);
@@ -5272,6 +5281,8 @@ static int rocker_port_bridge_leave(struct rocker_port *rocker_port)
5272 rocker_port_internal_vlan_id_get(rocker_port, 5281 rocker_port_internal_vlan_id_get(rocker_port,
5273 rocker_port->dev->ifindex); 5282 rocker_port->dev->ifindex);
5274 5283
5284 switchdev_port_fwd_mark_set(rocker_port->dev, rocker_port->bridge_dev,
5285 false);
5275 rocker_port->bridge_dev = NULL; 5286 rocker_port->bridge_dev = NULL;
5276 5287
5277 err = rocker_port_vlan_add(rocker_port, SWITCHDEV_TRANS_NONE, 5288 err = rocker_port_vlan_add(rocker_port, SWITCHDEV_TRANS_NONE,
diff --git a/drivers/net/ethernet/rocker/rocker.h b/drivers/net/ethernet/rocker/rocker.h
index 08b2c3d96188..12490b2f6504 100644
--- a/drivers/net/ethernet/rocker/rocker.h
+++ b/drivers/net/ethernet/rocker/rocker.h
@@ -246,6 +246,7 @@ enum {
246#define ROCKER_RX_FLAGS_TCP BIT(5) 246#define ROCKER_RX_FLAGS_TCP BIT(5)
247#define ROCKER_RX_FLAGS_UDP BIT(6) 247#define ROCKER_RX_FLAGS_UDP BIT(6)
248#define ROCKER_RX_FLAGS_TCP_UDP_CSUM_GOOD BIT(7) 248#define ROCKER_RX_FLAGS_TCP_UDP_CSUM_GOOD BIT(7)
249#define ROCKER_RX_FLAGS_FWD_OFFLOAD BIT(8)
249 250
250enum { 251enum {
251 ROCKER_TLV_TX_UNSPEC, 252 ROCKER_TLV_TX_UNSPEC,
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 45cfd797eb77..607b5f41f46f 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -766,6 +766,13 @@ struct netdev_phys_item_id {
766 unsigned char id_len; 766 unsigned char id_len;
767}; 767};
768 768
769static inline bool netdev_phys_item_id_same(struct netdev_phys_item_id *a,
770 struct netdev_phys_item_id *b)
771{
772 return a->id_len == b->id_len &&
773 memcmp(a->id, b->id, a->id_len) == 0;
774}
775
769typedef u16 (*select_queue_fallback_t)(struct net_device *dev, 776typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
770 struct sk_buff *skb); 777 struct sk_buff *skb);
771 778
@@ -1456,6 +1463,8 @@ enum netdev_priv_flags {
1456 * 1463 *
1457 * @xps_maps: XXX: need comments on this one 1464 * @xps_maps: XXX: need comments on this one
1458 * 1465 *
1466 * @offload_fwd_mark: Offload device fwding mark
1467 *
1459 * @trans_start: Time (in jiffies) of last Tx 1468 * @trans_start: Time (in jiffies) of last Tx
1460 * @watchdog_timeo: Represents the timeout that is used by 1469 * @watchdog_timeo: Represents the timeout that is used by
1461 * the watchdog ( see dev_watchdog() ) 1470 * the watchdog ( see dev_watchdog() )
@@ -1697,6 +1706,10 @@ struct net_device {
1697 struct xps_dev_maps __rcu *xps_maps; 1706 struct xps_dev_maps __rcu *xps_maps;
1698#endif 1707#endif
1699 1708
1709#ifdef CONFIG_NET_SWITCHDEV
1710 u32 offload_fwd_mark;
1711#endif
1712
1700 /* These may be needed for future network-power-down code. */ 1713 /* These may be needed for future network-power-down code. */
1701 1714
1702 /* 1715 /*
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index d6cdd6e87d53..af7a09650fa2 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -506,6 +506,7 @@ static inline u32 skb_mstamp_us_delta(const struct skb_mstamp *t1,
506 * @no_fcs: Request NIC to treat last 4 bytes as Ethernet FCS 506 * @no_fcs: Request NIC to treat last 4 bytes as Ethernet FCS
507 * @napi_id: id of the NAPI struct this skb came from 507 * @napi_id: id of the NAPI struct this skb came from
508 * @secmark: security marking 508 * @secmark: security marking
509 * @offload_fwd_mark: fwding offload mark
509 * @mark: Generic packet mark 510 * @mark: Generic packet mark
510 * @vlan_proto: vlan encapsulation protocol 511 * @vlan_proto: vlan encapsulation protocol
511 * @vlan_tci: vlan tag control information 512 * @vlan_tci: vlan tag control information
@@ -650,9 +651,15 @@ struct sk_buff {
650 unsigned int sender_cpu; 651 unsigned int sender_cpu;
651 }; 652 };
652#endif 653#endif
654 union {
653#ifdef CONFIG_NETWORK_SECMARK 655#ifdef CONFIG_NETWORK_SECMARK
654 __u32 secmark; 656 __u32 secmark;
657#endif
658#ifdef CONFIG_NET_SWITCHDEV
659 __u32 offload_fwd_mark;
655#endif 660#endif
661 };
662
656 union { 663 union {
657 __u32 mark; 664 __u32 mark;
658 __u32 reserved_tailroom; 665 __u32 reserved_tailroom;
diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index d5671f118bfc..89da8934519b 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -157,6 +157,9 @@ int switchdev_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
157int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, 157int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
158 struct net_device *dev, 158 struct net_device *dev,
159 struct net_device *filter_dev, int idx); 159 struct net_device *filter_dev, int idx);
160void switchdev_port_fwd_mark_set(struct net_device *dev,
161 struct net_device *group_dev,
162 bool joining);
160 163
161#else 164#else
162 165
@@ -271,6 +274,12 @@ static inline int switchdev_port_fdb_dump(struct sk_buff *skb,
271 return -EOPNOTSUPP; 274 return -EOPNOTSUPP;
272} 275}
273 276
277static inline void switchdev_port_fwd_mark_set(struct net_device *dev,
278 struct net_device *group_dev,
279 bool joining)
280{
281}
282
274#endif 283#endif
275 284
276#endif /* _LINUX_SWITCHDEV_H_ */ 285#endif /* _LINUX_SWITCHDEV_H_ */
diff --git a/net/core/dev.c b/net/core/dev.c
index 8810b6bbebfe..2ee15afb412d 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3061,6 +3061,16 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)
3061 else 3061 else
3062 skb_dst_force(skb); 3062 skb_dst_force(skb);
3063 3063
3064#ifdef CONFIG_NET_SWITCHDEV
3065 /* Don't forward if offload device already forwarded */
3066 if (skb->offload_fwd_mark &&
3067 skb->offload_fwd_mark == dev->offload_fwd_mark) {
3068 consume_skb(skb);
3069 rc = NET_XMIT_SUCCESS;
3070 goto out;
3071 }
3072#endif
3073
3064 txq = netdev_pick_tx(dev, skb, accel_priv); 3074 txq = netdev_pick_tx(dev, skb, accel_priv);
3065 q = rcu_dereference_bh(txq->qdisc); 3075 q = rcu_dereference_bh(txq->qdisc);
3066 3076
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 9f2add3cba26..33bafa2e703e 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -910,13 +910,9 @@ static struct net_device *switchdev_get_dev_by_nhs(struct fib_info *fi)
910 if (switchdev_port_attr_get(dev, &attr)) 910 if (switchdev_port_attr_get(dev, &attr))
911 return NULL; 911 return NULL;
912 912
913 if (nhsel > 0) { 913 if (nhsel > 0 &&
914 if (prev_attr.u.ppid.id_len != attr.u.ppid.id_len) 914 !netdev_phys_item_id_same(&prev_attr.u.ppid, &attr.u.ppid))
915 return NULL; 915 return NULL;
916 if (memcmp(prev_attr.u.ppid.id, attr.u.ppid.id,
917 attr.u.ppid.id_len))
918 return NULL;
919 }
920 916
921 prev_attr = attr; 917 prev_attr = attr;
922 } 918 }
@@ -1043,3 +1039,106 @@ void switchdev_fib_ipv4_abort(struct fib_info *fi)
1043 fi->fib_net->ipv4.fib_offload_disabled = true; 1039 fi->fib_net->ipv4.fib_offload_disabled = true;
1044} 1040}
1045EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_abort); 1041EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_abort);
1042
1043static bool switchdev_port_same_parent_id(struct net_device *a,
1044 struct net_device *b)
1045{
1046 struct switchdev_attr a_attr = {
1047 .id = SWITCHDEV_ATTR_PORT_PARENT_ID,
1048 .flags = SWITCHDEV_F_NO_RECURSE,
1049 };
1050 struct switchdev_attr b_attr = {
1051 .id = SWITCHDEV_ATTR_PORT_PARENT_ID,
1052 .flags = SWITCHDEV_F_NO_RECURSE,
1053 };
1054
1055 if (switchdev_port_attr_get(a, &a_attr) ||
1056 switchdev_port_attr_get(b, &b_attr))
1057 return false;
1058
1059 return netdev_phys_item_id_same(&a_attr.u.ppid, &b_attr.u.ppid);
1060}
1061
1062static u32 switchdev_port_fwd_mark_get(struct net_device *dev,
1063 struct net_device *group_dev)
1064{
1065 struct net_device *lower_dev;
1066 struct list_head *iter;
1067
1068 netdev_for_each_lower_dev(group_dev, lower_dev, iter) {
1069 if (lower_dev == dev)
1070 continue;
1071 if (switchdev_port_same_parent_id(dev, lower_dev))
1072 return lower_dev->offload_fwd_mark;
1073 return switchdev_port_fwd_mark_get(dev, lower_dev);
1074 }
1075
1076 return dev->ifindex;
1077}
1078
1079static void switchdev_port_fwd_mark_reset(struct net_device *group_dev,
1080 u32 old_mark, u32 *reset_mark)
1081{
1082 struct net_device *lower_dev;
1083 struct list_head *iter;
1084
1085 netdev_for_each_lower_dev(group_dev, lower_dev, iter) {
1086 if (lower_dev->offload_fwd_mark == old_mark) {
1087 if (!*reset_mark)
1088 *reset_mark = lower_dev->ifindex;
1089 lower_dev->offload_fwd_mark = *reset_mark;
1090 }
1091 switchdev_port_fwd_mark_reset(lower_dev, old_mark, reset_mark);
1092 }
1093}
1094
1095/**
1096 * switchdev_port_fwd_mark_set - Set port offload forwarding mark
1097 *
1098 * @dev: port device
1099 * @group_dev: containing device
1100 * @joining: true if dev is joining group; false if leaving group
1101 *
1102 * An ungrouped port's offload mark is just its ifindex. A grouped
1103 * port's (member of a bridge, for example) offload mark is the ifindex
1104 * of one of the ports in the group with the same parent (switch) ID.
1105 * Ports on the same device in the same group will have the same mark.
1106 *
1107 * Example:
1108 *
1109 * br0 ifindex=9
1110 * sw1p1 ifindex=2 mark=2
1111 * sw1p2 ifindex=3 mark=2
1112 * sw2p1 ifindex=4 mark=5
1113 * sw2p2 ifindex=5 mark=5
1114 *
1115 * If sw2p2 leaves the bridge, we'll have:
1116 *
1117 * br0 ifindex=9
1118 * sw1p1 ifindex=2 mark=2
1119 * sw1p2 ifindex=3 mark=2
1120 * sw2p1 ifindex=4 mark=4
1121 * sw2p2 ifindex=5 mark=5
1122 */
1123void switchdev_port_fwd_mark_set(struct net_device *dev,
1124 struct net_device *group_dev,
1125 bool joining)
1126{
1127 u32 mark = dev->ifindex;
1128 u32 reset_mark = 0;
1129
1130 if (group_dev && joining) {
1131 mark = switchdev_port_fwd_mark_get(dev, group_dev);
1132 } else if (group_dev && !joining) {
1133 if (dev->offload_fwd_mark == mark)
1134 /* Ohoh, this port was the mark reference port,
1135 * but it's leaving the group, so reset the
1136 * mark for the remaining ports in the group.
1137 */
1138 switchdev_port_fwd_mark_reset(group_dev, mark,
1139 &reset_mark);
1140 }
1141
1142 dev->offload_fwd_mark = mark;
1143}
1144EXPORT_SYMBOL_GPL(switchdev_port_fwd_mark_set);