summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/devicetree/bindings/net/dsa/ksz.txt102
-rw-r--r--Documentation/networking/devlink-health.txt86
-rw-r--r--Documentation/networking/dsa/dsa.txt13
-rw-r--r--Documentation/networking/snmp_counter.rst111
-rw-r--r--Documentation/networking/switchdev.txt2
-rw-r--r--MAINTAINERS1
-rw-r--r--arch/alpha/include/uapi/asm/socket.h2
-rw-r--r--arch/arm/mach-orion5x/common.c2
-rw-r--r--arch/arm/mach-orion5x/rd88f5181l-fxo-setup.c2
-rw-r--r--arch/arm/mach-orion5x/rd88f5181l-ge-setup.c2
-rw-r--r--arch/arm/mach-orion5x/rd88f6183ap-ge-setup.c2
-rw-r--r--arch/arm/mach-orion5x/wnr854t-setup.c2
-rw-r--r--arch/arm/mach-orion5x/wrt350n-v2-setup.c2
-rw-r--r--arch/arm/plat-orion/common.c2
-rw-r--r--arch/ia64/include/uapi/asm/socket.h2
-rw-r--r--arch/mips/include/uapi/asm/socket.h2
-rw-r--r--arch/parisc/include/uapi/asm/socket.h2
-rw-r--r--arch/s390/include/uapi/asm/socket.h2
-rw-r--r--arch/sparc/include/uapi/asm/socket.h2
-rw-r--r--arch/xtensa/include/uapi/asm/socket.h2
-rw-r--r--drivers/isdn/hisax/netjet.c6
-rw-r--r--drivers/isdn/hisax/q931.c2
-rw-r--r--drivers/isdn/hisax/st5481.h2
-rw-r--r--drivers/isdn/isdnloop/isdnloop.c2
-rw-r--r--drivers/net/dsa/lantiq_gswip.c24
-rw-r--r--drivers/net/ethernet/broadcom/bnx2x/bnx2x_init.h2
-rw-r--r--drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c176
-rw-r--r--drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c2
-rw-r--r--drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c12
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/l2t.c2
-rw-r--r--drivers/net/ethernet/davicom/dm9000.c1
-rw-r--r--drivers/net/ethernet/freescale/dpaa2/Makefile1
-rw-r--r--drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-debugfs.c237
-rw-r--r--drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-debugfs.h31
-rw-r--r--drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c98
-rw-r--r--drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h14
-rw-r--r--drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c5
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hnae3.h2
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3_enet.c180
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3_enet.h1
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c17
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c96
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h2
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c65
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h1
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c2
-rw-r--r--drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c13
-rw-r--r--drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h3
-rw-r--r--drivers/net/ethernet/huawei/hinic/hinic_hw_if.c28
-rw-r--r--drivers/net/ethernet/huawei/hinic/hinic_hw_if.h14
-rw-r--r--drivers/net/ethernet/huawei/hinic/hinic_rx.c11
-rw-r--r--drivers/net/ethernet/huawei/hinic/hinic_tx.c8
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_main.c3
-rw-r--r--drivers/net/ethernet/intel/ice/ice.h16
-rw-r--r--drivers/net/ethernet/intel/ice/ice_adminq_cmd.h77
-rw-r--r--drivers/net/ethernet/intel/ice/ice_common.c160
-rw-r--r--drivers/net/ethernet/intel/ice/ice_common.h11
-rw-r--r--drivers/net/ethernet/intel/ice/ice_ethtool.c680
-rw-r--r--drivers/net/ethernet/intel/ice/ice_hw_autogen.h1
-rw-r--r--drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h3
-rw-r--r--drivers/net/ethernet/intel/ice/ice_lib.c266
-rw-r--r--drivers/net/ethernet/intel/ice/ice_lib.h5
-rw-r--r--drivers/net/ethernet/intel/ice/ice_main.c135
-rw-r--r--drivers/net/ethernet/intel/ice/ice_nvm.c81
-rw-r--r--drivers/net/ethernet/intel/ice/ice_sched.c156
-rw-r--r--drivers/net/ethernet/intel/ice/ice_sched.h2
-rw-r--r--drivers/net/ethernet/intel/ice/ice_sriov.c9
-rw-r--r--drivers/net/ethernet/intel/ice/ice_txrx.c71
-rw-r--r--drivers/net/ethernet/intel/ice/ice_txrx.h32
-rw-r--r--drivers/net/ethernet/intel/ice/ice_type.h4
-rw-r--r--drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c24
-rw-r--r--drivers/net/ethernet/intel/igb/igb_main.c3
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_main.c3
-rw-r--r--drivers/net/ethernet/jme.c5
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/cmd.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/eq.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/Makefile2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en.h18
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/reporter.h15
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c356
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c186
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tx.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/reg.h23
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum.h6
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_bloom_filter.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c10
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.h3
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_nve_vxlan.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c227
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c37
-rw-r--r--drivers/net/ethernet/mscc/ocelot.c3
-rw-r--r--drivers/net/ethernet/neterion/vxge/vxge-main.c2
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/cmsg.c2
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/cmsg.h3
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/main.c109
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/main.h89
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/match.c3
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/metadata.c27
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c612
-rw-r--r--drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c3
-rw-r--r--drivers/net/ethernet/realtek/atp.c27
-rw-r--r--drivers/net/ethernet/realtek/r8169.c188
-rw-r--r--drivers/net/ethernet/rocker/rocker_main.c2
-rw-r--r--drivers/net/ethernet/rocker/rocker_ofdpa.c4
-rw-r--r--drivers/net/ethernet/sfc/ef10.c14
-rw-r--r--drivers/net/ethernet/sfc/mcdi.c56
-rw-r--r--drivers/net/ethernet/sfc/mtd.c3
-rw-r--r--drivers/net/ethernet/sun/sungem.c2
-rw-r--r--drivers/net/ethernet/ti/cpsw-phy-sel.c4
-rw-r--r--drivers/net/macvlan.c3
-rw-r--r--drivers/net/phy/mdio_bus.c2
-rw-r--r--drivers/net/phy/micrel.c43
-rw-r--r--drivers/net/phy/phy.c27
-rw-r--r--drivers/net/phy/phy_device.c101
-rw-r--r--drivers/net/sb1000.c11
-rw-r--r--drivers/net/usb/rtl8150.c2
-rw-r--r--drivers/net/virtio_net.c1
-rw-r--r--drivers/net/vxlan.c352
-rw-r--r--drivers/net/wan/dscc4.c50
-rw-r--r--include/linux/netdevice.h6
-rw-r--r--include/linux/netlink.h1
-rw-r--r--include/linux/phy.h10
-rw-r--r--include/linux/platform_data/b53.h2
-rw-r--r--include/linux/platform_data/dsa.h68
-rw-r--r--include/linux/platform_data/mv88e6xxx.h2
-rw-r--r--include/linux/qed/qed_if.h1
-rw-r--r--include/net/devlink.h144
-rw-r--r--include/net/dsa.h66
-rw-r--r--include/net/sch_generic.h3
-rw-r--r--include/net/switchdev.h6
-rw-r--r--include/net/tcp.h2
-rw-r--r--include/net/tls.h3
-rw-r--r--include/net/vxlan.h6
-rw-r--r--include/trace/events/devlink.h62
-rw-r--r--include/uapi/asm-generic/socket.h2
-rw-r--r--include/uapi/linux/devlink.h25
-rw-r--r--include/uapi/linux/pkt_cls.h13
-rw-r--r--net/bridge/br_fdb.c3
-rw-r--r--net/bridge/br_private.h3
-rw-r--r--net/bridge/br_switchdev.c2
-rw-r--r--net/core/devlink.c1055
-rw-r--r--net/core/dst.c6
-rw-r--r--net/core/net_namespace.c38
-rw-r--r--net/core/rtnetlink.c116
-rw-r--r--net/core/skmsg.c25
-rw-r--r--net/core/sock.c47
-rw-r--r--net/decnet/dn_fib.c2
-rw-r--r--net/dsa/dsa_priv.h3
-rw-r--r--net/dsa/master.c56
-rw-r--r--net/dsa/slave.c5
-rw-r--r--net/ipv4/devinet.c43
-rw-r--r--net/ipv4/ipmr.c61
-rw-r--r--net/ipv4/route.c72
-rw-r--r--net/ipv4/tcp.c21
-rw-r--r--net/ipv4/tcp_minisocks.c34
-rw-r--r--net/ipv4/tcp_output.c47
-rw-r--r--net/ipv4/tcp_timer.c83
-rw-r--r--net/ipv4/udp_tunnel.c15
-rw-r--r--net/ipv6/addrconf.c90
-rw-r--r--net/ipv6/addrlabel.c47
-rw-r--r--net/ipv6/ip6_gre.c4
-rw-r--r--net/ipv6/ip6_udp_tunnel.c15
-rw-r--r--net/ipv6/route.c70
-rw-r--r--net/mpls/af_mpls.c103
-rw-r--r--net/netlink/af_netlink.c8
-rw-r--r--net/openvswitch/meter.c3
-rw-r--r--net/sched/cls_basic.c25
-rw-r--r--net/sched/cls_matchall.c24
-rw-r--r--net/sched/sch_api.c3
-rw-r--r--net/switchdev/switchdev.c5
-rw-r--r--net/tipc/trace.c4
-rw-r--r--net/tls/tls_sw.c280
-rw-r--r--net/vmw_vsock/af_vsock.c2
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/vxlan_fdb_veto.sh126
-rwxr-xr-xtools/testing/selftests/net/forwarding/ipip_flat_gre.sh63
-rwxr-xr-xtools/testing/selftests/net/forwarding/ipip_flat_gre_key.sh63
-rwxr-xr-xtools/testing/selftests/net/forwarding/ipip_flat_gre_keys.sh63
-rwxr-xr-xtools/testing/selftests/net/forwarding/ipip_hier_gre.sh63
-rwxr-xr-xtools/testing/selftests/net/forwarding/ipip_hier_gre_key.sh63
-rwxr-xr-xtools/testing/selftests/net/forwarding/ipip_hier_gre_keys.sh63
-rw-r--r--tools/testing/selftests/net/forwarding/ipip_lib.sh349
-rw-r--r--tools/testing/selftests/net/tls.c12
186 files changed, 7476 insertions, 2032 deletions
diff --git a/Documentation/devicetree/bindings/net/dsa/ksz.txt b/Documentation/devicetree/bindings/net/dsa/ksz.txt
index 0f407fb371ce..8d58c2a7de39 100644
--- a/Documentation/devicetree/bindings/net/dsa/ksz.txt
+++ b/Documentation/devicetree/bindings/net/dsa/ksz.txt
@@ -19,58 +19,58 @@ Examples:
19 19
20Ethernet switch connected via SPI to the host, CPU port wired to eth0: 20Ethernet switch connected via SPI to the host, CPU port wired to eth0:
21 21
22 eth0: ethernet@10001000 { 22 eth0: ethernet@10001000 {
23 fixed-link { 23 fixed-link {
24 speed = <1000>; 24 speed = <1000>;
25 full-duplex; 25 full-duplex;
26 }; 26 };
27 }; 27 };
28 28
29 spi1: spi@f8008000 { 29 spi1: spi@f8008000 {
30 pinctrl-0 = <&pinctrl_spi_ksz>; 30 pinctrl-0 = <&pinctrl_spi_ksz>;
31 cs-gpios = <&pioC 25 0>; 31 cs-gpios = <&pioC 25 0>;
32 id = <1>; 32 id = <1>;
33 33
34 ksz9477: ksz9477@0 { 34 ksz9477: ksz9477@0 {
35 compatible = "microchip,ksz9477"; 35 compatible = "microchip,ksz9477";
36 reg = <0>; 36 reg = <0>;
37 37
38 spi-max-frequency = <44000000>; 38 spi-max-frequency = <44000000>;
39 spi-cpha; 39 spi-cpha;
40 spi-cpol; 40 spi-cpol;
41 41
42 ports { 42 ports {
43 #address-cells = <1>; 43 #address-cells = <1>;
44 #size-cells = <0>; 44 #size-cells = <0>;
45 port@0 { 45 port@0 {
46 reg = <0>; 46 reg = <0>;
47 label = "lan1"; 47 label = "lan1";
48 }; 48 };
49 port@1 { 49 port@1 {
50 reg = <1>; 50 reg = <1>;
51 label = "lan2"; 51 label = "lan2";
52 }; 52 };
53 port@2 { 53 port@2 {
54 reg = <2>; 54 reg = <2>;
55 label = "lan3"; 55 label = "lan3";
56 }; 56 };
57 port@3 { 57 port@3 {
58 reg = <3>; 58 reg = <3>;
59 label = "lan4"; 59 label = "lan4";
60 }; 60 };
61 port@4 { 61 port@4 {
62 reg = <4>; 62 reg = <4>;
63 label = "lan5"; 63 label = "lan5";
64 }; 64 };
65 port@5 { 65 port@5 {
66 reg = <5>; 66 reg = <5>;
67 label = "cpu"; 67 label = "cpu";
68 ethernet = <&eth0>; 68 ethernet = <&eth0>;
69 fixed-link { 69 fixed-link {
70 speed = <1000>; 70 speed = <1000>;
71 full-duplex; 71 full-duplex;
72 }; 72 };
73 }; 73 };
74 }; 74 };
75 }; 75 };
76 }; 76 };
diff --git a/Documentation/networking/devlink-health.txt b/Documentation/networking/devlink-health.txt
new file mode 100644
index 000000000000..1db3fbea0831
--- /dev/null
+++ b/Documentation/networking/devlink-health.txt
@@ -0,0 +1,86 @@
1The health mechanism is targeted for Real Time Alerting, in order to know when
2something bad had happened to a PCI device
3- Provide alert debug information
4- Self healing
5- If problem needs vendor support, provide a way to gather all needed debugging
6 information.
7
8The main idea is to unify and centralize driver health reports in the
9generic devlink instance and allow the user to set different
10attributes of the health reporting and recovery procedures.
11
12The devlink health reporter:
13Device driver creates a "health reporter" per each error/health type.
14Error/Health type can be a known/generic (eg pci error, fw error, rx/tx error)
15or unknown (driver specific).
16For each registered health reporter a driver can issue error/health reports
17asynchronously. All health reports handling is done by devlink.
18Device driver can provide specific callbacks for each "health reporter", e.g.
19 - Recovery procedures
20 - Diagnostics and object dump procedures
21 - OOB initial parameters
22Different parts of the driver can register different types of health reporters
23with different handlers.
24
25Once an error is reported, devlink health will do the following actions:
26 * A log is being send to the kernel trace events buffer
27 * Health status and statistics are being updated for the reporter instance
28 * Object dump is being taken and saved at the reporter instance (as long as
29 there is no other dump which is already stored)
30 * Auto recovery attempt is being done. Depends on:
31 - Auto-recovery configuration
32 - Grace period vs. time passed since last recover
33
34The user interface:
35User can access/change each reporter's parameters and driver specific callbacks
36via devlink, e.g per error type (per health reporter)
37 - Configure reporter's generic parameters (like: disable/enable auto recovery)
38 - Invoke recovery procedure
39 - Run diagnostics
40 - Object dump
41
42The devlink health interface (via netlink):
43DEVLINK_CMD_HEALTH_REPORTER_GET
44 Retrieves status and configuration info per DEV and reporter.
45DEVLINK_CMD_HEALTH_REPORTER_SET
46 Allows reporter-related configuration setting.
47DEVLINK_CMD_HEALTH_REPORTER_RECOVER
48 Triggers a reporter's recovery procedure.
49DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE
50 Retrieves diagnostics data from a reporter on a device.
51DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET
52 Retrieves the last stored dump. Devlink health
53 saves a single dump. If an dump is not already stored by the devlink
54 for this reporter, devlink generates a new dump.
55 dump output is defined by the reporter.
56DEVLINK_CMD_HEALTH_REPORTER_DUMP_CLEAR
57 Clears the last saved dump file for the specified reporter.
58
59
60 netlink
61 +--------------------------+
62 | |
63 | + |
64 | | |
65 +--------------------------+
66 |request for ops
67 |(diagnose,
68 mlx5_core devlink |recover,
69 |dump)
70+--------+ +--------------------------+
71| | | reporter| |
72| | | +---------v----------+ |
73| | ops execution | | | |
74| <----------------------------------+ | |
75| | | | | |
76| | | + ^------------------+ |
77| | | | request for ops |
78| | | | (recover, dump) |
79| | | | |
80| | | +-+------------------+ |
81| | health report | | health handler | |
82| +-------------------------------> | |
83| | | +--------------------+ |
84| | health reporter create | |
85| +----------------------------> |
86+--------+ +--------------------------+
diff --git a/Documentation/networking/dsa/dsa.txt b/Documentation/networking/dsa/dsa.txt
index 25170ad7d25b..1000b821681c 100644
--- a/Documentation/networking/dsa/dsa.txt
+++ b/Documentation/networking/dsa/dsa.txt
@@ -236,19 +236,6 @@ description.
236Design limitations 236Design limitations
237================== 237==================
238 238
239DSA is a platform device driver
240-------------------------------
241
242DSA is implemented as a DSA platform device driver which is convenient because
243it will register the entire DSA switch tree attached to a master network device
244in one-shot, facilitating the device creation and simplifying the device driver
245model a bit, this comes however with a number of limitations:
246
247- building DSA and its switch drivers as modules is currently not working
248- the device driver parenting does not necessarily reflect the original
249 bus/device the switch can be created from
250- supporting non-MDIO and non-MMIO (platform) switches is not possible
251
252Limits on the number of devices and ports 239Limits on the number of devices and ports
253----------------------------------------- 240-----------------------------------------
254 241
diff --git a/Documentation/networking/snmp_counter.rst b/Documentation/networking/snmp_counter.rst
index fe8f741193be..c5642f430d2e 100644
--- a/Documentation/networking/snmp_counter.rst
+++ b/Documentation/networking/snmp_counter.rst
@@ -1,16 +1,17 @@
1=========== 1============
2SNMP counter 2SNMP counter
3=========== 3============
4 4
5This document explains the meaning of SNMP counters. 5This document explains the meaning of SNMP counters.
6 6
7General IPv4 counters 7General IPv4 counters
8==================== 8=====================
9All layer 4 packets and ICMP packets will change these counters, but 9All layer 4 packets and ICMP packets will change these counters, but
10these counters won't be changed by layer 2 packets (such as STP) or 10these counters won't be changed by layer 2 packets (such as STP) or
11ARP packets. 11ARP packets.
12 12
13* IpInReceives 13* IpInReceives
14
14Defined in `RFC1213 ipInReceives`_ 15Defined in `RFC1213 ipInReceives`_
15 16
16.. _RFC1213 ipInReceives: https://tools.ietf.org/html/rfc1213#page-26 17.. _RFC1213 ipInReceives: https://tools.ietf.org/html/rfc1213#page-26
@@ -23,6 +24,7 @@ and so on). It indicates the number of aggregated segments after
23GRO/LRO. 24GRO/LRO.
24 25
25* IpInDelivers 26* IpInDelivers
27
26Defined in `RFC1213 ipInDelivers`_ 28Defined in `RFC1213 ipInDelivers`_
27 29
28.. _RFC1213 ipInDelivers: https://tools.ietf.org/html/rfc1213#page-28 30.. _RFC1213 ipInDelivers: https://tools.ietf.org/html/rfc1213#page-28
@@ -33,6 +35,7 @@ supported protocols will be delivered, if someone listens on the raw
33socket, all valid IP packets will be delivered. 35socket, all valid IP packets will be delivered.
34 36
35* IpOutRequests 37* IpOutRequests
38
36Defined in `RFC1213 ipOutRequests`_ 39Defined in `RFC1213 ipOutRequests`_
37 40
38.. _RFC1213 ipOutRequests: https://tools.ietf.org/html/rfc1213#page-28 41.. _RFC1213 ipOutRequests: https://tools.ietf.org/html/rfc1213#page-28
@@ -42,6 +45,7 @@ multicast packets, and would always be updated together with
42IpExtOutOctets. 45IpExtOutOctets.
43 46
44* IpExtInOctets and IpExtOutOctets 47* IpExtInOctets and IpExtOutOctets
48
45They are Linux kernel extensions, no RFC definitions. Please note, 49They are Linux kernel extensions, no RFC definitions. Please note,
46RFC1213 indeed defines ifInOctets and ifOutOctets, but they 50RFC1213 indeed defines ifInOctets and ifOutOctets, but they
47are different things. The ifInOctets and ifOutOctets include the MAC 51are different things. The ifInOctets and ifOutOctets include the MAC
@@ -49,6 +53,7 @@ layer header size but IpExtInOctets and IpExtOutOctets don't, they
49only include the IP layer header and the IP layer data. 53only include the IP layer header and the IP layer data.
50 54
51* IpExtInNoECTPkts, IpExtInECT1Pkts, IpExtInECT0Pkts, IpExtInCEPkts 55* IpExtInNoECTPkts, IpExtInECT1Pkts, IpExtInECT0Pkts, IpExtInCEPkts
56
52They indicate the number of four kinds of ECN IP packets, please refer 57They indicate the number of four kinds of ECN IP packets, please refer
53`Explicit Congestion Notification`_ for more details. 58`Explicit Congestion Notification`_ for more details.
54 59
@@ -60,6 +65,7 @@ for the same packet, you might find that IpInReceives count 1, but
60IpExtInNoECTPkts counts 2 or more. 65IpExtInNoECTPkts counts 2 or more.
61 66
62* IpInHdrErrors 67* IpInHdrErrors
68
63Defined in `RFC1213 ipInHdrErrors`_. It indicates the packet is 69Defined in `RFC1213 ipInHdrErrors`_. It indicates the packet is
64dropped due to the IP header error. It might happen in both IP input 70dropped due to the IP header error. It might happen in both IP input
65and IP forward paths. 71and IP forward paths.
@@ -67,6 +73,7 @@ and IP forward paths.
67.. _RFC1213 ipInHdrErrors: https://tools.ietf.org/html/rfc1213#page-27 73.. _RFC1213 ipInHdrErrors: https://tools.ietf.org/html/rfc1213#page-27
68 74
69* IpInAddrErrors 75* IpInAddrErrors
76
70Defined in `RFC1213 ipInAddrErrors`_. It will be increased in two 77Defined in `RFC1213 ipInAddrErrors`_. It will be increased in two
71scenarios: (1) The IP address is invalid. (2) The destination IP 78scenarios: (1) The IP address is invalid. (2) The destination IP
72address is not a local address and IP forwarding is not enabled 79address is not a local address and IP forwarding is not enabled
@@ -74,6 +81,7 @@ address is not a local address and IP forwarding is not enabled
74.. _RFC1213 ipInAddrErrors: https://tools.ietf.org/html/rfc1213#page-27 81.. _RFC1213 ipInAddrErrors: https://tools.ietf.org/html/rfc1213#page-27
75 82
76* IpExtInNoRoutes 83* IpExtInNoRoutes
84
77This counter means the packet is dropped when the IP stack receives a 85This counter means the packet is dropped when the IP stack receives a
78packet and can't find a route for it from the route table. It might 86packet and can't find a route for it from the route table. It might
79happen when IP forwarding is enabled and the destination IP address is 87happen when IP forwarding is enabled and the destination IP address is
@@ -81,6 +89,7 @@ not a local address and there is no route for the destination IP
81address. 89address.
82 90
83* IpInUnknownProtos 91* IpInUnknownProtos
92
84Defined in `RFC1213 ipInUnknownProtos`_. It will be increased if the 93Defined in `RFC1213 ipInUnknownProtos`_. It will be increased if the
85layer 4 protocol is unsupported by kernel. If an application is using 94layer 4 protocol is unsupported by kernel. If an application is using
86raw socket, kernel will always deliver the packet to the raw socket 95raw socket, kernel will always deliver the packet to the raw socket
@@ -89,10 +98,12 @@ and this counter won't be increased.
89.. _RFC1213 ipInUnknownProtos: https://tools.ietf.org/html/rfc1213#page-27 98.. _RFC1213 ipInUnknownProtos: https://tools.ietf.org/html/rfc1213#page-27
90 99
91* IpExtInTruncatedPkts 100* IpExtInTruncatedPkts
101
92For IPv4 packet, it means the actual data size is smaller than the 102For IPv4 packet, it means the actual data size is smaller than the
93"Total Length" field in the IPv4 header. 103"Total Length" field in the IPv4 header.
94 104
95* IpInDiscards 105* IpInDiscards
106
96Defined in `RFC1213 ipInDiscards`_. It indicates the packet is dropped 107Defined in `RFC1213 ipInDiscards`_. It indicates the packet is dropped
97in the IP receiving path and due to kernel internal reasons (e.g. no 108in the IP receiving path and due to kernel internal reasons (e.g. no
98enough memory). 109enough memory).
@@ -100,20 +111,23 @@ enough memory).
100.. _RFC1213 ipInDiscards: https://tools.ietf.org/html/rfc1213#page-28 111.. _RFC1213 ipInDiscards: https://tools.ietf.org/html/rfc1213#page-28
101 112
102* IpOutDiscards 113* IpOutDiscards
114
103Defined in `RFC1213 ipOutDiscards`_. It indicates the packet is 115Defined in `RFC1213 ipOutDiscards`_. It indicates the packet is
104dropped in the IP sending path and due to kernel internal reasons. 116dropped in the IP sending path and due to kernel internal reasons.
105 117
106.. _RFC1213 ipOutDiscards: https://tools.ietf.org/html/rfc1213#page-28 118.. _RFC1213 ipOutDiscards: https://tools.ietf.org/html/rfc1213#page-28
107 119
108* IpOutNoRoutes 120* IpOutNoRoutes
121
109Defined in `RFC1213 ipOutNoRoutes`_. It indicates the packet is 122Defined in `RFC1213 ipOutNoRoutes`_. It indicates the packet is
110dropped in the IP sending path and no route is found for it. 123dropped in the IP sending path and no route is found for it.
111 124
112.. _RFC1213 ipOutNoRoutes: https://tools.ietf.org/html/rfc1213#page-29 125.. _RFC1213 ipOutNoRoutes: https://tools.ietf.org/html/rfc1213#page-29
113 126
114ICMP counters 127ICMP counters
115============ 128=============
116* IcmpInMsgs and IcmpOutMsgs 129* IcmpInMsgs and IcmpOutMsgs
130
117Defined by `RFC1213 icmpInMsgs`_ and `RFC1213 icmpOutMsgs`_ 131Defined by `RFC1213 icmpInMsgs`_ and `RFC1213 icmpOutMsgs`_
118 132
119.. _RFC1213 icmpInMsgs: https://tools.ietf.org/html/rfc1213#page-41 133.. _RFC1213 icmpInMsgs: https://tools.ietf.org/html/rfc1213#page-41
@@ -126,6 +140,7 @@ IcmpOutMsgs would still be updated if the IP header is constructed by
126a userspace program. 140a userspace program.
127 141
128* ICMP named types 142* ICMP named types
143
129| These counters include most of common ICMP types, they are: 144| These counters include most of common ICMP types, they are:
130| IcmpInDestUnreachs: `RFC1213 icmpInDestUnreachs`_ 145| IcmpInDestUnreachs: `RFC1213 icmpInDestUnreachs`_
131| IcmpInTimeExcds: `RFC1213 icmpInTimeExcds`_ 146| IcmpInTimeExcds: `RFC1213 icmpInTimeExcds`_
@@ -180,6 +195,7 @@ straightforward. The 'In' counter means kernel receives such a packet
180and the 'Out' counter means kernel sends such a packet. 195and the 'Out' counter means kernel sends such a packet.
181 196
182* ICMP numeric types 197* ICMP numeric types
198
183They are IcmpMsgInType[N] and IcmpMsgOutType[N], the [N] indicates the 199They are IcmpMsgInType[N] and IcmpMsgOutType[N], the [N] indicates the
184ICMP type number. These counters track all kinds of ICMP packets. The 200ICMP type number. These counters track all kinds of ICMP packets. The
185ICMP type number definition could be found in the `ICMP parameters`_ 201ICMP type number definition could be found in the `ICMP parameters`_
@@ -192,12 +208,14 @@ IcmpMsgOutType8 would increase 1. And if kernel gets an ICMP Echo Reply
192packet, IcmpMsgInType0 would increase 1. 208packet, IcmpMsgInType0 would increase 1.
193 209
194* IcmpInCsumErrors 210* IcmpInCsumErrors
211
195This counter indicates the checksum of the ICMP packet is 212This counter indicates the checksum of the ICMP packet is
196wrong. Kernel verifies the checksum after updating the IcmpInMsgs and 213wrong. Kernel verifies the checksum after updating the IcmpInMsgs and
197before updating IcmpMsgInType[N]. If a packet has bad checksum, the 214before updating IcmpMsgInType[N]. If a packet has bad checksum, the
198IcmpInMsgs would be updated but none of IcmpMsgInType[N] would be updated. 215IcmpInMsgs would be updated but none of IcmpMsgInType[N] would be updated.
199 216
200* IcmpInErrors and IcmpOutErrors 217* IcmpInErrors and IcmpOutErrors
218
201Defined by `RFC1213 icmpInErrors`_ and `RFC1213 icmpOutErrors`_ 219Defined by `RFC1213 icmpInErrors`_ and `RFC1213 icmpOutErrors`_
202 220
203.. _RFC1213 icmpInErrors: https://tools.ietf.org/html/rfc1213#page-41 221.. _RFC1213 icmpInErrors: https://tools.ietf.org/html/rfc1213#page-41
@@ -209,7 +227,7 @@ and the sending packet path use IcmpOutErrors. When IcmpInCsumErrors
209is increased, IcmpInErrors would always be increased too. 227is increased, IcmpInErrors would always be increased too.
210 228
211relationship of the ICMP counters 229relationship of the ICMP counters
212------------------------------- 230---------------------------------
213The sum of IcmpMsgOutType[N] is always equal to IcmpOutMsgs, as they 231The sum of IcmpMsgOutType[N] is always equal to IcmpOutMsgs, as they
214are updated at the same time. The sum of IcmpMsgInType[N] plus 232are updated at the same time. The sum of IcmpMsgInType[N] plus
215IcmpInErrors should be equal or larger than IcmpInMsgs. When kernel 233IcmpInErrors should be equal or larger than IcmpInMsgs. When kernel
@@ -229,8 +247,9 @@ IcmpInMsgs should be less than the sum of IcmpMsgOutType[N] plus
229IcmpInErrors. 247IcmpInErrors.
230 248
231General TCP counters 249General TCP counters
232================== 250====================
233* TcpInSegs 251* TcpInSegs
252
234Defined in `RFC1213 tcpInSegs`_ 253Defined in `RFC1213 tcpInSegs`_
235 254
236.. _RFC1213 tcpInSegs: https://tools.ietf.org/html/rfc1213#page-48 255.. _RFC1213 tcpInSegs: https://tools.ietf.org/html/rfc1213#page-48
@@ -247,6 +266,7 @@ isn't aware of GRO. So if two packets are merged by GRO, the TcpInSegs
247counter would only increase 1. 266counter would only increase 1.
248 267
249* TcpOutSegs 268* TcpOutSegs
269
250Defined in `RFC1213 tcpOutSegs`_ 270Defined in `RFC1213 tcpOutSegs`_
251 271
252.. _RFC1213 tcpOutSegs: https://tools.ietf.org/html/rfc1213#page-48 272.. _RFC1213 tcpOutSegs: https://tools.ietf.org/html/rfc1213#page-48
@@ -258,6 +278,7 @@ GSO, so if a packet would be split to 2 by GSO, TcpOutSegs will
258increase 2. 278increase 2.
259 279
260* TcpActiveOpens 280* TcpActiveOpens
281
261Defined in `RFC1213 tcpActiveOpens`_ 282Defined in `RFC1213 tcpActiveOpens`_
262 283
263.. _RFC1213 tcpActiveOpens: https://tools.ietf.org/html/rfc1213#page-47 284.. _RFC1213 tcpActiveOpens: https://tools.ietf.org/html/rfc1213#page-47
@@ -267,6 +288,7 @@ state. Every time TcpActiveOpens increases 1, TcpOutSegs should always
267increase 1. 288increase 1.
268 289
269* TcpPassiveOpens 290* TcpPassiveOpens
291
270Defined in `RFC1213 tcpPassiveOpens`_ 292Defined in `RFC1213 tcpPassiveOpens`_
271 293
272.. _RFC1213 tcpPassiveOpens: https://tools.ietf.org/html/rfc1213#page-47 294.. _RFC1213 tcpPassiveOpens: https://tools.ietf.org/html/rfc1213#page-47
@@ -275,6 +297,7 @@ It means the TCP layer receives a SYN, replies a SYN+ACK, come into
275the SYN-RCVD state. 297the SYN-RCVD state.
276 298
277* TcpExtTCPRcvCoalesce 299* TcpExtTCPRcvCoalesce
300
278When packets are received by the TCP layer and are not be read by the 301When packets are received by the TCP layer and are not be read by the
279application, the TCP layer will try to merge them. This counter 302application, the TCP layer will try to merge them. This counter
280indicate how many packets are merged in such situation. If GRO is 303indicate how many packets are merged in such situation. If GRO is
@@ -282,12 +305,14 @@ enabled, lots of packets would be merged by GRO, these packets
282wouldn't be counted to TcpExtTCPRcvCoalesce. 305wouldn't be counted to TcpExtTCPRcvCoalesce.
283 306
284* TcpExtTCPAutoCorking 307* TcpExtTCPAutoCorking
308
285When sending packets, the TCP layer will try to merge small packets to 309When sending packets, the TCP layer will try to merge small packets to
286a bigger one. This counter increase 1 for every packet merged in such 310a bigger one. This counter increase 1 for every packet merged in such
287situation. Please refer to the LWN article for more details: 311situation. Please refer to the LWN article for more details:
288https://lwn.net/Articles/576263/ 312https://lwn.net/Articles/576263/
289 313
290* TcpExtTCPOrigDataSent 314* TcpExtTCPOrigDataSent
315
291This counter is explained by `kernel commit f19c29e3e391`_, I pasted the 316This counter is explained by `kernel commit f19c29e3e391`_, I pasted the
292explaination below:: 317explaination below::
293 318
@@ -297,6 +322,7 @@ explaination below::
297 more useful to track the TCP retransmission rate. 322 more useful to track the TCP retransmission rate.
298 323
299* TCPSynRetrans 324* TCPSynRetrans
325
300This counter is explained by `kernel commit f19c29e3e391`_, I pasted the 326This counter is explained by `kernel commit f19c29e3e391`_, I pasted the
301explaination below:: 327explaination below::
302 328
@@ -304,6 +330,7 @@ explaination below::
304 retransmissions into SYN, fast-retransmits, timeout retransmits, etc. 330 retransmissions into SYN, fast-retransmits, timeout retransmits, etc.
305 331
306* TCPFastOpenActiveFail 332* TCPFastOpenActiveFail
333
307This counter is explained by `kernel commit f19c29e3e391`_, I pasted the 334This counter is explained by `kernel commit f19c29e3e391`_, I pasted the
308explaination below:: 335explaination below::
309 336
@@ -313,6 +340,7 @@ explaination below::
313.. _kernel commit f19c29e3e391: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=f19c29e3e391a66a273e9afebaf01917245148cd 340.. _kernel commit f19c29e3e391: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=f19c29e3e391a66a273e9afebaf01917245148cd
314 341
315* TcpExtListenOverflows and TcpExtListenDrops 342* TcpExtListenOverflows and TcpExtListenDrops
343
316When kernel receives a SYN from a client, and if the TCP accept queue 344When kernel receives a SYN from a client, and if the TCP accept queue
317is full, kernel will drop the SYN and add 1 to TcpExtListenOverflows. 345is full, kernel will drop the SYN and add 1 to TcpExtListenOverflows.
318At the same time kernel will also add 1 to TcpExtListenDrops. When a 346At the same time kernel will also add 1 to TcpExtListenDrops. When a
@@ -336,6 +364,8 @@ time client replies ACK, this socket will get another chance to move
336to the accept queue. 364to the accept queue.
337 365
338 366
367TCP Fast Open
368=============
339* TcpEstabResets 369* TcpEstabResets
340Defined in `RFC1213 tcpEstabResets`_. 370Defined in `RFC1213 tcpEstabResets`_.
341 371
@@ -389,20 +419,23 @@ will disable the fast path at first, and try to enable it after kernel
389receives packets. 419receives packets.
390 420
391* TcpExtTCPPureAcks and TcpExtTCPHPAcks 421* TcpExtTCPPureAcks and TcpExtTCPHPAcks
422
392If a packet set ACK flag and has no data, it is a pure ACK packet, if 423If a packet set ACK flag and has no data, it is a pure ACK packet, if
393kernel handles it in the fast path, TcpExtTCPHPAcks will increase 1, 424kernel handles it in the fast path, TcpExtTCPHPAcks will increase 1,
394if kernel handles it in the slow path, TcpExtTCPPureAcks will 425if kernel handles it in the slow path, TcpExtTCPPureAcks will
395increase 1. 426increase 1.
396 427
397* TcpExtTCPHPHits 428* TcpExtTCPHPHits
429
398If a TCP packet has data (which means it is not a pure ACK packet), 430If a TCP packet has data (which means it is not a pure ACK packet),
399and this packet is handled in the fast path, TcpExtTCPHPHits will 431and this packet is handled in the fast path, TcpExtTCPHPHits will
400increase 1. 432increase 1.
401 433
402 434
403TCP abort 435TCP abort
404======== 436=========
405* TcpExtTCPAbortOnData 437* TcpExtTCPAbortOnData
438
406It means TCP layer has data in flight, but need to close the 439It means TCP layer has data in flight, but need to close the
407connection. So TCP layer sends a RST to the other side, indicate the 440connection. So TCP layer sends a RST to the other side, indicate the
408connection is not closed very graceful. An easy way to increase this 441connection is not closed very graceful. An easy way to increase this
@@ -421,11 +454,13 @@ when the application closes a connection, kernel will send a RST
421immediately and increase the TcpExtTCPAbortOnData counter. 454immediately and increase the TcpExtTCPAbortOnData counter.
422 455
423* TcpExtTCPAbortOnClose 456* TcpExtTCPAbortOnClose
457
424This counter means the application has unread data in the TCP layer when 458This counter means the application has unread data in the TCP layer when
425the application wants to close the TCP connection. In such a situation, 459the application wants to close the TCP connection. In such a situation,
426kernel will send a RST to the other side of the TCP connection. 460kernel will send a RST to the other side of the TCP connection.
427 461
428* TcpExtTCPAbortOnMemory 462* TcpExtTCPAbortOnMemory
463
429When an application closes a TCP connection, kernel still need to track 464When an application closes a TCP connection, kernel still need to track
430the connection, let it complete the TCP disconnect process. E.g. an 465the connection, let it complete the TCP disconnect process. E.g. an
431app calls the close method of a socket, kernel sends fin to the other 466app calls the close method of a socket, kernel sends fin to the other
@@ -447,10 +482,12 @@ the tcp_mem. Please refer the tcp_mem section in the `TCP man page`_:
447 482
448 483
449* TcpExtTCPAbortOnTimeout 484* TcpExtTCPAbortOnTimeout
485
450This counter will increase when any of the TCP timers expire. In such 486This counter will increase when any of the TCP timers expire. In such
451situation, kernel won't send RST, just give up the connection. 487situation, kernel won't send RST, just give up the connection.
452 488
453* TcpExtTCPAbortOnLinger 489* TcpExtTCPAbortOnLinger
490
454When a TCP connection comes into FIN_WAIT_2 state, instead of waiting 491When a TCP connection comes into FIN_WAIT_2 state, instead of waiting
455for the fin packet from the other side, kernel could send a RST and 492for the fin packet from the other side, kernel could send a RST and
456delete the socket immediately. This is not the default behavior of 493delete the socket immediately. This is not the default behavior of
@@ -458,6 +495,7 @@ Linux kernel TCP stack. By configuring the TCP_LINGER2 socket option,
458you could let kernel follow this behavior. 495you could let kernel follow this behavior.
459 496
460* TcpExtTCPAbortFailed 497* TcpExtTCPAbortFailed
498
461The kernel TCP layer will send RST if the `RFC2525 2.17 section`_ is 499The kernel TCP layer will send RST if the `RFC2525 2.17 section`_ is
462satisfied. If an internal error occurs during this process, 500satisfied. If an internal error occurs during this process,
463TcpExtTCPAbortFailed will be increased. 501TcpExtTCPAbortFailed will be increased.
@@ -465,7 +503,7 @@ TcpExtTCPAbortFailed will be increased.
465.. _RFC2525 2.17 section: https://tools.ietf.org/html/rfc2525#page-50 503.. _RFC2525 2.17 section: https://tools.ietf.org/html/rfc2525#page-50
466 504
467TCP Hybrid Slow Start 505TCP Hybrid Slow Start
468==================== 506=====================
469The Hybrid Slow Start algorithm is an enhancement of the traditional 507The Hybrid Slow Start algorithm is an enhancement of the traditional
470TCP congestion window Slow Start algorithm. It uses two pieces of 508TCP congestion window Slow Start algorithm. It uses two pieces of
471information to detect whether the max bandwidth of the TCP path is 509information to detect whether the max bandwidth of the TCP path is
@@ -481,23 +519,27 @@ relate with the Hybrid Slow Start algorithm.
481.. _Hybrid Slow Start paper: https://pdfs.semanticscholar.org/25e9/ef3f03315782c7f1cbcd31b587857adae7d1.pdf 519.. _Hybrid Slow Start paper: https://pdfs.semanticscholar.org/25e9/ef3f03315782c7f1cbcd31b587857adae7d1.pdf
482 520
483* TcpExtTCPHystartTrainDetect 521* TcpExtTCPHystartTrainDetect
522
484How many times the ACK train length threshold is detected 523How many times the ACK train length threshold is detected
485 524
486* TcpExtTCPHystartTrainCwnd 525* TcpExtTCPHystartTrainCwnd
526
487The sum of CWND detected by ACK train length. Dividing this value by 527The sum of CWND detected by ACK train length. Dividing this value by
488TcpExtTCPHystartTrainDetect is the average CWND which detected by the 528TcpExtTCPHystartTrainDetect is the average CWND which detected by the
489ACK train length. 529ACK train length.
490 530
491* TcpExtTCPHystartDelayDetect 531* TcpExtTCPHystartDelayDetect
532
492How many times the packet delay threshold is detected. 533How many times the packet delay threshold is detected.
493 534
494* TcpExtTCPHystartDelayCwnd 535* TcpExtTCPHystartDelayCwnd
536
495The sum of CWND detected by packet delay. Dividing this value by 537The sum of CWND detected by packet delay. Dividing this value by
496TcpExtTCPHystartDelayDetect is the average CWND which detected by the 538TcpExtTCPHystartDelayDetect is the average CWND which detected by the
497packet delay. 539packet delay.
498 540
499TCP retransmission and congestion control 541TCP retransmission and congestion control
500====================================== 542=========================================
501The TCP protocol has two retransmission mechanisms: SACK and fast 543The TCP protocol has two retransmission mechanisms: SACK and fast
502recovery. They are exclusive with each other. When SACK is enabled, 544recovery. They are exclusive with each other. When SACK is enabled,
503the kernel TCP stack would use SACK, or kernel would use fast 545the kernel TCP stack would use SACK, or kernel would use fast
@@ -516,12 +558,14 @@ https://pdfs.semanticscholar.org/0e9c/968d09ab2e53e24c4dca5b2d67c7f7140f8e.pdf
516.. _RFC6582: https://tools.ietf.org/html/rfc6582 558.. _RFC6582: https://tools.ietf.org/html/rfc6582
517 559
518* TcpExtTCPRenoRecovery and TcpExtTCPSackRecovery 560* TcpExtTCPRenoRecovery and TcpExtTCPSackRecovery
561
519When the congestion control comes into Recovery state, if sack is 562When the congestion control comes into Recovery state, if sack is
520used, TcpExtTCPSackRecovery increases 1, if sack is not used, 563used, TcpExtTCPSackRecovery increases 1, if sack is not used,
521TcpExtTCPRenoRecovery increases 1. These two counters mean the TCP 564TcpExtTCPRenoRecovery increases 1. These two counters mean the TCP
522stack begins to retransmit the lost packets. 565stack begins to retransmit the lost packets.
523 566
524* TcpExtTCPSACKReneging 567* TcpExtTCPSACKReneging
568
525A packet was acknowledged by SACK, but the receiver has dropped this 569A packet was acknowledged by SACK, but the receiver has dropped this
526packet, so the sender needs to retransmit this packet. In this 570packet, so the sender needs to retransmit this packet. In this
527situation, the sender adds 1 to TcpExtTCPSACKReneging. A receiver 571situation, the sender adds 1 to TcpExtTCPSACKReneging. A receiver
@@ -532,6 +576,7 @@ the RTO expires for this packet, then the sender assumes this packet
532has been dropped by the receiver. 576has been dropped by the receiver.
533 577
534* TcpExtTCPRenoReorder 578* TcpExtTCPRenoReorder
579
535The reorder packet is detected by fast recovery. It would only be used 580The reorder packet is detected by fast recovery. It would only be used
536if SACK is disabled. The fast recovery algorithm detects recorder by 581if SACK is disabled. The fast recovery algorithm detects recorder by
537the duplicate ACK number. E.g., if retransmission is triggered, and 582the duplicate ACK number. E.g., if retransmission is triggered, and
@@ -542,6 +587,7 @@ order packet. Thus the sender would find more ACks than its
542expectation, and the sender knows out of order occurs. 587expectation, and the sender knows out of order occurs.
543 588
544* TcpExtTCPTSReorder 589* TcpExtTCPTSReorder
590
545The reorder packet is detected when a hole is filled. E.g., assume the 591The reorder packet is detected when a hole is filled. E.g., assume the
546sender sends packet 1,2,3,4,5, and the receiving order is 592sender sends packet 1,2,3,4,5, and the receiving order is
5471,2,4,5,3. When the sender receives the ACK of packet 3 (which will 5931,2,4,5,3. When the sender receives the ACK of packet 3 (which will
@@ -551,6 +597,7 @@ fill the hole), two conditions will let TcpExtTCPTSReorder increase
551than the retransmission timestamp. 597than the retransmission timestamp.
552 598
553* TcpExtTCPSACKReorder 599* TcpExtTCPSACKReorder
600
554The reorder packet detected by SACK. The SACK has two methods to 601The reorder packet detected by SACK. The SACK has two methods to
555detect reorder: (1) DSACK is received by the sender. It means the 602detect reorder: (1) DSACK is received by the sender. It means the
556sender sends the same packet more than one times. And the only reason 603sender sends the same packet more than one times. And the only reason
@@ -574,10 +621,12 @@ sender side.
574.. _RFC2883 : https://tools.ietf.org/html/rfc2883 621.. _RFC2883 : https://tools.ietf.org/html/rfc2883
575 622
576* TcpExtTCPDSACKOldSent 623* TcpExtTCPDSACKOldSent
624
577The TCP stack receives a duplicate packet which has been acked, so it 625The TCP stack receives a duplicate packet which has been acked, so it
578sends a DSACK to the sender. 626sends a DSACK to the sender.
579 627
580* TcpExtTCPDSACKOfoSent 628* TcpExtTCPDSACKOfoSent
629
581The TCP stack receives an out of order duplicate packet, so it sends a 630The TCP stack receives an out of order duplicate packet, so it sends a
582DSACK to the sender. 631DSACK to the sender.
583 632
@@ -586,6 +635,7 @@ The TCP stack receives a DSACK, which indicates an acknowledged
586duplicate packet is received. 635duplicate packet is received.
587 636
588* TcpExtTCPDSACKOfoRecv 637* TcpExtTCPDSACKOfoRecv
638
589The TCP stack receives a DSACK, which indicate an out of order 639The TCP stack receives a DSACK, which indicate an out of order
590duplicate packet is received. 640duplicate packet is received.
591 641
@@ -640,23 +690,26 @@ A skb should be shifted or merged, but the TCP stack doesn't do it for
640some reasons. 690some reasons.
641 691
642TCP out of order 692TCP out of order
643=============== 693================
644* TcpExtTCPOFOQueue 694* TcpExtTCPOFOQueue
695
645The TCP layer receives an out of order packet and has enough memory 696The TCP layer receives an out of order packet and has enough memory
646to queue it. 697to queue it.
647 698
648* TcpExtTCPOFODrop 699* TcpExtTCPOFODrop
700
649The TCP layer receives an out of order packet but doesn't have enough 701The TCP layer receives an out of order packet but doesn't have enough
650memory, so drops it. Such packets won't be counted into 702memory, so drops it. Such packets won't be counted into
651TcpExtTCPOFOQueue. 703TcpExtTCPOFOQueue.
652 704
653* TcpExtTCPOFOMerge 705* TcpExtTCPOFOMerge
706
654The received out of order packet has an overlay with the previous 707The received out of order packet has an overlay with the previous
655packet. the overlay part will be dropped. All of TcpExtTCPOFOMerge 708packet. the overlay part will be dropped. All of TcpExtTCPOFOMerge
656packets will also be counted into TcpExtTCPOFOQueue. 709packets will also be counted into TcpExtTCPOFOQueue.
657 710
658TCP PAWS 711TCP PAWS
659======= 712========
660PAWS (Protection Against Wrapped Sequence numbers) is an algorithm 713PAWS (Protection Against Wrapped Sequence numbers) is an algorithm
661which is used to drop old packets. It depends on the TCP 714which is used to drop old packets. It depends on the TCP
662timestamps. For detail information, please refer the `timestamp wiki`_ 715timestamps. For detail information, please refer the `timestamp wiki`_
@@ -666,13 +719,15 @@ and the `RFC of PAWS`_.
666.. _timestamp wiki: https://en.wikipedia.org/wiki/Transmission_Control_Protocol#TCP_timestamps 719.. _timestamp wiki: https://en.wikipedia.org/wiki/Transmission_Control_Protocol#TCP_timestamps
667 720
668* TcpExtPAWSActive 721* TcpExtPAWSActive
722
669Packets are dropped by PAWS in Syn-Sent status. 723Packets are dropped by PAWS in Syn-Sent status.
670 724
671* TcpExtPAWSEstab 725* TcpExtPAWSEstab
726
672Packets are dropped by PAWS in any status other than Syn-Sent. 727Packets are dropped by PAWS in any status other than Syn-Sent.
673 728
674TCP ACK skip 729TCP ACK skip
675=========== 730============
676In some scenarios, kernel would avoid sending duplicate ACKs too 731In some scenarios, kernel would avoid sending duplicate ACKs too
677frequently. Please find more details in the tcp_invalid_ratelimit 732frequently. Please find more details in the tcp_invalid_ratelimit
678section of the `sysctl document`_. When kernel decides to skip an ACK 733section of the `sysctl document`_. When kernel decides to skip an ACK
@@ -684,6 +739,7 @@ it has no data.
684.. _sysctl document: https://www.kernel.org/doc/Documentation/networking/ip-sysctl.txt 739.. _sysctl document: https://www.kernel.org/doc/Documentation/networking/ip-sysctl.txt
685 740
686* TcpExtTCPACKSkippedSynRecv 741* TcpExtTCPACKSkippedSynRecv
742
687The ACK is skipped in Syn-Recv status. The Syn-Recv status means the 743The ACK is skipped in Syn-Recv status. The Syn-Recv status means the
688TCP stack receives a SYN and replies SYN+ACK. Now the TCP stack is 744TCP stack receives a SYN and replies SYN+ACK. Now the TCP stack is
689waiting for an ACK. Generally, the TCP stack doesn't need to send ACK 745waiting for an ACK. Generally, the TCP stack doesn't need to send ACK
@@ -697,6 +753,7 @@ increase TcpExtTCPACKSkippedSynRecv.
697 753
698 754
699* TcpExtTCPACKSkippedPAWS 755* TcpExtTCPACKSkippedPAWS
756
700The ACK is skipped due to PAWS (Protect Against Wrapped Sequence 757The ACK is skipped due to PAWS (Protect Against Wrapped Sequence
701numbers) check fails. If the PAWS check fails in Syn-Recv, Fin-Wait-2 758numbers) check fails. If the PAWS check fails in Syn-Recv, Fin-Wait-2
702or Time-Wait statuses, the skipped ACK would be counted to 759or Time-Wait statuses, the skipped ACK would be counted to
@@ -705,18 +762,22 @@ TcpExtTCPACKSkippedTimeWait. In all other statuses, the skipped ACK
705would be counted to TcpExtTCPACKSkippedPAWS. 762would be counted to TcpExtTCPACKSkippedPAWS.
706 763
707* TcpExtTCPACKSkippedSeq 764* TcpExtTCPACKSkippedSeq
765
708The sequence number is out of window and the timestamp passes the PAWS 766The sequence number is out of window and the timestamp passes the PAWS
709check and the TCP status is not Syn-Recv, Fin-Wait-2, and Time-Wait. 767check and the TCP status is not Syn-Recv, Fin-Wait-2, and Time-Wait.
710 768
711* TcpExtTCPACKSkippedFinWait2 769* TcpExtTCPACKSkippedFinWait2
770
712The ACK is skipped in Fin-Wait-2 status, the reason would be either 771The ACK is skipped in Fin-Wait-2 status, the reason would be either
713PAWS check fails or the received sequence number is out of window. 772PAWS check fails or the received sequence number is out of window.
714 773
715* TcpExtTCPACKSkippedTimeWait 774* TcpExtTCPACKSkippedTimeWait
775
716Tha ACK is skipped in Time-Wait status, the reason would be either 776Tha ACK is skipped in Time-Wait status, the reason would be either
717PAWS check failed or the received sequence number is out of window. 777PAWS check failed or the received sequence number is out of window.
718 778
719* TcpExtTCPACKSkippedChallenge 779* TcpExtTCPACKSkippedChallenge
780
720The ACK is skipped if the ACK is a challenge ACK. The RFC 5961 defines 781The ACK is skipped if the ACK is a challenge ACK. The RFC 5961 defines
7213 kind of challenge ACK, please refer `RFC 5961 section 3.2`_, 7823 kind of challenge ACK, please refer `RFC 5961 section 3.2`_,
722`RFC 5961 section 4.2`_ and `RFC 5961 section 5.2`_. Besides these 783`RFC 5961 section 4.2`_ and `RFC 5961 section 5.2`_. Besides these
@@ -784,10 +845,10 @@ A TLP probe packet is sent.
784A packet loss is detected and recovered by TLP. 845A packet loss is detected and recovered by TLP.
785 846
786examples 847examples
787======= 848========
788 849
789ping test 850ping test
790-------- 851---------
791Run the ping command against the public dns server 8.8.8.8:: 852Run the ping command against the public dns server 8.8.8.8::
792 853
793 nstatuser@nstat-a:~$ ping 8.8.8.8 -c 1 854 nstatuser@nstat-a:~$ ping 8.8.8.8 -c 1
@@ -831,7 +892,7 @@ and its corresponding Echo Reply packet are constructed by:
831So the IpExtInOctets and IpExtOutOctets are 20+16+48=84. 892So the IpExtInOctets and IpExtOutOctets are 20+16+48=84.
832 893
833tcp 3-way handshake 894tcp 3-way handshake
834------------------ 895-------------------
835On server side, we run:: 896On server side, we run::
836 897
837 nstatuser@nstat-b:~$ nc -lknv 0.0.0.0 9000 898 nstatuser@nstat-b:~$ nc -lknv 0.0.0.0 9000
@@ -873,7 +934,7 @@ ACK, so client sent 2 packets, received 1 packet, TcpInSegs increased
8731, TcpOutSegs increased 2. 9341, TcpOutSegs increased 2.
874 935
875TCP normal traffic 936TCP normal traffic
876----------------- 937------------------
877Run nc on server:: 938Run nc on server::
878 939
879 nstatuser@nstat-b:~$ nc -lkv 0.0.0.0 9000 940 nstatuser@nstat-b:~$ nc -lkv 0.0.0.0 9000
@@ -996,7 +1057,7 @@ and the packet received from client qualified for fast path, so it
996was counted into 'TcpExtTCPHPHits'. 1057was counted into 'TcpExtTCPHPHits'.
997 1058
998TcpExtTCPAbortOnClose 1059TcpExtTCPAbortOnClose
999-------------------- 1060---------------------
1000On the server side, we run below python script:: 1061On the server side, we run below python script::
1001 1062
1002 import socket 1063 import socket
@@ -1030,7 +1091,7 @@ If we run tcpdump on the server side, we could find the server sent a
1030RST after we type Ctrl-C. 1091RST after we type Ctrl-C.
1031 1092
1032TcpExtTCPAbortOnMemory and TcpExtTCPAbortOnTimeout 1093TcpExtTCPAbortOnMemory and TcpExtTCPAbortOnTimeout
1033----------------------------------------------- 1094---------------------------------------------------
1034Below is an example which let the orphan socket count be higher than 1095Below is an example which let the orphan socket count be higher than
1035net.ipv4.tcp_max_orphans. 1096net.ipv4.tcp_max_orphans.
1036Change tcp_max_orphans to a smaller value on client:: 1097Change tcp_max_orphans to a smaller value on client::
@@ -1152,7 +1213,7 @@ FIN_WAIT_1 state finally. So we wait for a few minutes, we could find
1152 TcpExtTCPAbortOnTimeout 10 0.0 1213 TcpExtTCPAbortOnTimeout 10 0.0
1153 1214
1154TcpExtTCPAbortOnLinger 1215TcpExtTCPAbortOnLinger
1155--------------------- 1216----------------------
1156The server side code:: 1217The server side code::
1157 1218
1158 nstatuser@nstat-b:~$ cat server_linger.py 1219 nstatuser@nstat-b:~$ cat server_linger.py
@@ -1197,7 +1258,7 @@ After run client_linger.py, check the output of nstat::
1197 TcpExtTCPAbortOnLinger 1 0.0 1258 TcpExtTCPAbortOnLinger 1 0.0
1198 1259
1199TcpExtTCPRcvCoalesce 1260TcpExtTCPRcvCoalesce
1200------------------- 1261--------------------
1201On the server, we run a program which listen on TCP port 9000, but 1262On the server, we run a program which listen on TCP port 9000, but
1202doesn't read any data:: 1263doesn't read any data::
1203 1264
@@ -1257,7 +1318,7 @@ the receiving queue. So the TCP layer merged the two packets, and we
1257could find the TcpExtTCPRcvCoalesce increased 1. 1318could find the TcpExtTCPRcvCoalesce increased 1.
1258 1319
1259TcpExtListenOverflows and TcpExtListenDrops 1320TcpExtListenOverflows and TcpExtListenDrops
1260---------------------------------------- 1321-------------------------------------------
1261On server, run the nc command, listen on port 9000:: 1322On server, run the nc command, listen on port 9000::
1262 1323
1263 nstatuser@nstat-b:~$ nc -lkv 0.0.0.0 9000 1324 nstatuser@nstat-b:~$ nc -lkv 0.0.0.0 9000
@@ -1305,7 +1366,7 @@ TcpExtListenOverflows and TcpExtListenDrops would be larger, because
1305the SYN of the 4th nc was dropped, the client was retrying. 1366the SYN of the 4th nc was dropped, the client was retrying.
1306 1367
1307IpInAddrErrors, IpExtInNoRoutes and IpOutNoRoutes 1368IpInAddrErrors, IpExtInNoRoutes and IpOutNoRoutes
1308---------------------------------------------- 1369-------------------------------------------------
1309server A IP address: 192.168.122.250 1370server A IP address: 192.168.122.250
1310server B IP address: 192.168.122.251 1371server B IP address: 192.168.122.251
1311Prepare on server A, add a route to server B:: 1372Prepare on server A, add a route to server B::
@@ -1400,7 +1461,7 @@ a route for the 8.8.8.8 IP address, so server B increased
1400IpOutNoRoutes. 1461IpOutNoRoutes.
1401 1462
1402TcpExtTCPACKSkippedSynRecv 1463TcpExtTCPACKSkippedSynRecv
1403------------------------ 1464--------------------------
1404In this test, we send 3 same SYN packets from client to server. The 1465In this test, we send 3 same SYN packets from client to server. The
1405first SYN will let server create a socket, set it to Syn-Recv status, 1466first SYN will let server create a socket, set it to Syn-Recv status,
1406and reply a SYN/ACK. The second SYN will let server reply the SYN/ACK 1467and reply a SYN/ACK. The second SYN will let server reply the SYN/ACK
@@ -1448,7 +1509,7 @@ Check snmp cunter on nstat-b::
1448As we expected, TcpExtTCPACKSkippedSynRecv is 1. 1509As we expected, TcpExtTCPACKSkippedSynRecv is 1.
1449 1510
1450TcpExtTCPACKSkippedPAWS 1511TcpExtTCPACKSkippedPAWS
1451---------------------- 1512-----------------------
1452To trigger PAWS, we could send an old SYN. 1513To trigger PAWS, we could send an old SYN.
1453 1514
1454On nstat-b, let nc listen on port 9000:: 1515On nstat-b, let nc listen on port 9000::
@@ -1485,7 +1546,7 @@ failed, the nstat-b replied an ACK for the first SYN, skipped the ACK
1485for the second SYN, and updated TcpExtTCPACKSkippedPAWS. 1546for the second SYN, and updated TcpExtTCPACKSkippedPAWS.
1486 1547
1487TcpExtTCPACKSkippedSeq 1548TcpExtTCPACKSkippedSeq
1488-------------------- 1549----------------------
1489To trigger TcpExtTCPACKSkippedSeq, we send packets which have valid 1550To trigger TcpExtTCPACKSkippedSeq, we send packets which have valid
1490timestamp (to pass PAWS check) but the sequence number is out of 1551timestamp (to pass PAWS check) but the sequence number is out of
1491window. The linux TCP stack would avoid to skip if the packet has 1552window. The linux TCP stack would avoid to skip if the packet has
diff --git a/Documentation/networking/switchdev.txt b/Documentation/networking/switchdev.txt
index 82236a17b5e6..f3244d87512a 100644
--- a/Documentation/networking/switchdev.txt
+++ b/Documentation/networking/switchdev.txt
@@ -196,7 +196,7 @@ The switch device will learn/forget source MAC address/VLAN on ingress packets
196and notify the switch driver of the mac/vlan/port tuples. The switch driver, 196and notify the switch driver of the mac/vlan/port tuples. The switch driver,
197in turn, will notify the bridge driver using the switchdev notifier call: 197in turn, will notify the bridge driver using the switchdev notifier call:
198 198
199 err = call_switchdev_notifiers(val, dev, info); 199 err = call_switchdev_notifiers(val, dev, info, extack);
200 200
201Where val is SWITCHDEV_FDB_ADD when learning and SWITCHDEV_FDB_DEL when 201Where val is SWITCHDEV_FDB_ADD when learning and SWITCHDEV_FDB_DEL when
202forgetting, and info points to a struct switchdev_notifier_fdb_info. On 202forgetting, and info points to a struct switchdev_notifier_fdb_info. On
diff --git a/MAINTAINERS b/MAINTAINERS
index 51029a425dbe..8e2c82f4c72f 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10575,6 +10575,7 @@ F: Documentation/devicetree/bindings/net/dsa/
10575F: net/dsa/ 10575F: net/dsa/
10576F: include/net/dsa.h 10576F: include/net/dsa.h
10577F: include/linux/dsa/ 10577F: include/linux/dsa/
10578F: include/linux/platform_data/dsa.h
10578F: drivers/net/dsa/ 10579F: drivers/net/dsa/
10579 10580
10580NETWORKING [GENERAL] 10581NETWORKING [GENERAL]
diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h
index 065fb372e355..b1c9b542c021 100644
--- a/arch/alpha/include/uapi/asm/socket.h
+++ b/arch/alpha/include/uapi/asm/socket.h
@@ -115,4 +115,6 @@
115#define SO_TXTIME 61 115#define SO_TXTIME 61
116#define SCM_TXTIME SO_TXTIME 116#define SCM_TXTIME SO_TXTIME
117 117
118#define SO_BINDTOIFINDEX 62
119
118#endif /* _UAPI_ASM_SOCKET_H */ 120#endif /* _UAPI_ASM_SOCKET_H */
diff --git a/arch/arm/mach-orion5x/common.c b/arch/arm/mach-orion5x/common.c
index 83a7ec4c16d0..c67f92bfa30e 100644
--- a/arch/arm/mach-orion5x/common.c
+++ b/arch/arm/mach-orion5x/common.c
@@ -20,7 +20,7 @@
20#include <linux/delay.h> 20#include <linux/delay.h>
21#include <linux/clk-provider.h> 21#include <linux/clk-provider.h>
22#include <linux/cpu.h> 22#include <linux/cpu.h>
23#include <net/dsa.h> 23#include <linux/platform_data/dsa.h>
24#include <asm/page.h> 24#include <asm/page.h>
25#include <asm/setup.h> 25#include <asm/setup.h>
26#include <asm/system_misc.h> 26#include <asm/system_misc.h>
diff --git a/arch/arm/mach-orion5x/rd88f5181l-fxo-setup.c b/arch/arm/mach-orion5x/rd88f5181l-fxo-setup.c
index a3c1336d30c9..c65ab7db36ad 100644
--- a/arch/arm/mach-orion5x/rd88f5181l-fxo-setup.c
+++ b/arch/arm/mach-orion5x/rd88f5181l-fxo-setup.c
@@ -16,7 +16,7 @@
16#include <linux/mtd/physmap.h> 16#include <linux/mtd/physmap.h>
17#include <linux/mv643xx_eth.h> 17#include <linux/mv643xx_eth.h>
18#include <linux/ethtool.h> 18#include <linux/ethtool.h>
19#include <net/dsa.h> 19#include <linux/platform_data/dsa.h>
20#include <asm/mach-types.h> 20#include <asm/mach-types.h>
21#include <asm/mach/arch.h> 21#include <asm/mach/arch.h>
22#include <asm/mach/pci.h> 22#include <asm/mach/pci.h>
diff --git a/arch/arm/mach-orion5x/rd88f5181l-ge-setup.c b/arch/arm/mach-orion5x/rd88f5181l-ge-setup.c
index 252efe29bd1a..76b8138d9d79 100644
--- a/arch/arm/mach-orion5x/rd88f5181l-ge-setup.c
+++ b/arch/arm/mach-orion5x/rd88f5181l-ge-setup.c
@@ -17,7 +17,7 @@
17#include <linux/mv643xx_eth.h> 17#include <linux/mv643xx_eth.h>
18#include <linux/ethtool.h> 18#include <linux/ethtool.h>
19#include <linux/i2c.h> 19#include <linux/i2c.h>
20#include <net/dsa.h> 20#include <linux/platform_data/dsa.h>
21#include <asm/mach-types.h> 21#include <asm/mach-types.h>
22#include <asm/mach/arch.h> 22#include <asm/mach/arch.h>
23#include <asm/mach/pci.h> 23#include <asm/mach/pci.h>
diff --git a/arch/arm/mach-orion5x/rd88f6183ap-ge-setup.c b/arch/arm/mach-orion5x/rd88f6183ap-ge-setup.c
index f4f1dbe1d91d..5f388a1ed1e4 100644
--- a/arch/arm/mach-orion5x/rd88f6183ap-ge-setup.c
+++ b/arch/arm/mach-orion5x/rd88f6183ap-ge-setup.c
@@ -18,7 +18,7 @@
18#include <linux/spi/spi.h> 18#include <linux/spi/spi.h>
19#include <linux/spi/flash.h> 19#include <linux/spi/flash.h>
20#include <linux/ethtool.h> 20#include <linux/ethtool.h>
21#include <net/dsa.h> 21#include <linux/platform_data/dsa.h>
22#include <asm/mach-types.h> 22#include <asm/mach-types.h>
23#include <asm/mach/arch.h> 23#include <asm/mach/arch.h>
24#include <asm/mach/pci.h> 24#include <asm/mach/pci.h>
diff --git a/arch/arm/mach-orion5x/wnr854t-setup.c b/arch/arm/mach-orion5x/wnr854t-setup.c
index d162d4c7f85d..83589a28a491 100644
--- a/arch/arm/mach-orion5x/wnr854t-setup.c
+++ b/arch/arm/mach-orion5x/wnr854t-setup.c
@@ -15,7 +15,7 @@
15#include <linux/mtd/physmap.h> 15#include <linux/mtd/physmap.h>
16#include <linux/mv643xx_eth.h> 16#include <linux/mv643xx_eth.h>
17#include <linux/ethtool.h> 17#include <linux/ethtool.h>
18#include <net/dsa.h> 18#include <linux/platform_data/dsa.h>
19#include <asm/mach-types.h> 19#include <asm/mach-types.h>
20#include <asm/mach/arch.h> 20#include <asm/mach/arch.h>
21#include <asm/mach/pci.h> 21#include <asm/mach/pci.h>
diff --git a/arch/arm/mach-orion5x/wrt350n-v2-setup.c b/arch/arm/mach-orion5x/wrt350n-v2-setup.c
index 9250bb2e429c..cea08d4a2597 100644
--- a/arch/arm/mach-orion5x/wrt350n-v2-setup.c
+++ b/arch/arm/mach-orion5x/wrt350n-v2-setup.c
@@ -18,7 +18,7 @@
18#include <linux/leds.h> 18#include <linux/leds.h>
19#include <linux/gpio_keys.h> 19#include <linux/gpio_keys.h>
20#include <linux/input.h> 20#include <linux/input.h>
21#include <net/dsa.h> 21#include <linux/platform_data/dsa.h>
22#include <asm/mach-types.h> 22#include <asm/mach-types.h>
23#include <asm/mach/arch.h> 23#include <asm/mach/arch.h>
24#include <asm/mach/pci.h> 24#include <asm/mach/pci.h>
diff --git a/arch/arm/plat-orion/common.c b/arch/arm/plat-orion/common.c
index a2399fd66e97..a6c81ce00f52 100644
--- a/arch/arm/plat-orion/common.c
+++ b/arch/arm/plat-orion/common.c
@@ -18,7 +18,7 @@
18#include <linux/clkdev.h> 18#include <linux/clkdev.h>
19#include <linux/mv643xx_eth.h> 19#include <linux/mv643xx_eth.h>
20#include <linux/mv643xx_i2c.h> 20#include <linux/mv643xx_i2c.h>
21#include <net/dsa.h> 21#include <linux/platform_data/dsa.h>
22#include <linux/platform_data/dma-mv_xor.h> 22#include <linux/platform_data/dma-mv_xor.h>
23#include <linux/platform_data/usb-ehci-orion.h> 23#include <linux/platform_data/usb-ehci-orion.h>
24#include <plat/common.h> 24#include <plat/common.h>
diff --git a/arch/ia64/include/uapi/asm/socket.h b/arch/ia64/include/uapi/asm/socket.h
index c872c4e6bafb..ba0d245f9576 100644
--- a/arch/ia64/include/uapi/asm/socket.h
+++ b/arch/ia64/include/uapi/asm/socket.h
@@ -117,4 +117,6 @@
117#define SO_TXTIME 61 117#define SO_TXTIME 61
118#define SCM_TXTIME SO_TXTIME 118#define SCM_TXTIME SO_TXTIME
119 119
120#define SO_BINDTOIFINDEX 62
121
120#endif /* _ASM_IA64_SOCKET_H */ 122#endif /* _ASM_IA64_SOCKET_H */
diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h
index 71370fb3ceef..73e25e35d803 100644
--- a/arch/mips/include/uapi/asm/socket.h
+++ b/arch/mips/include/uapi/asm/socket.h
@@ -126,4 +126,6 @@
126#define SO_TXTIME 61 126#define SO_TXTIME 61
127#define SCM_TXTIME SO_TXTIME 127#define SCM_TXTIME SO_TXTIME
128 128
129#define SO_BINDTOIFINDEX 62
130
129#endif /* _UAPI_ASM_SOCKET_H */ 131#endif /* _UAPI_ASM_SOCKET_H */
diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h
index 061b9cf2a779..52bed5976cbe 100644
--- a/arch/parisc/include/uapi/asm/socket.h
+++ b/arch/parisc/include/uapi/asm/socket.h
@@ -107,4 +107,6 @@
107#define SO_TXTIME 0x4036 107#define SO_TXTIME 0x4036
108#define SCM_TXTIME SO_TXTIME 108#define SCM_TXTIME SO_TXTIME
109 109
110#define SO_BINDTOIFINDEX 0x4037
111
110#endif /* _UAPI_ASM_SOCKET_H */ 112#endif /* _UAPI_ASM_SOCKET_H */
diff --git a/arch/s390/include/uapi/asm/socket.h b/arch/s390/include/uapi/asm/socket.h
index 39d901476ee5..49c971587087 100644
--- a/arch/s390/include/uapi/asm/socket.h
+++ b/arch/s390/include/uapi/asm/socket.h
@@ -114,4 +114,6 @@
114#define SO_TXTIME 61 114#define SO_TXTIME 61
115#define SCM_TXTIME SO_TXTIME 115#define SCM_TXTIME SO_TXTIME
116 116
117#define SO_BINDTOIFINDEX 62
118
117#endif /* _ASM_SOCKET_H */ 119#endif /* _ASM_SOCKET_H */
diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h
index 7ea35e5601b6..bbdb81594dd4 100644
--- a/arch/sparc/include/uapi/asm/socket.h
+++ b/arch/sparc/include/uapi/asm/socket.h
@@ -104,6 +104,8 @@
104#define SO_TXTIME 0x003f 104#define SO_TXTIME 0x003f
105#define SCM_TXTIME SO_TXTIME 105#define SCM_TXTIME SO_TXTIME
106 106
107#define SO_BINDTOIFINDEX 0x0041
108
107/* Security levels - as per NRL IPv6 - don't actually do anything */ 109/* Security levels - as per NRL IPv6 - don't actually do anything */
108#define SO_SECURITY_AUTHENTICATION 0x5001 110#define SO_SECURITY_AUTHENTICATION 0x5001
109#define SO_SECURITY_ENCRYPTION_TRANSPORT 0x5002 111#define SO_SECURITY_ENCRYPTION_TRANSPORT 0x5002
diff --git a/arch/xtensa/include/uapi/asm/socket.h b/arch/xtensa/include/uapi/asm/socket.h
index 1de07a7f7680..b434217783d0 100644
--- a/arch/xtensa/include/uapi/asm/socket.h
+++ b/arch/xtensa/include/uapi/asm/socket.h
@@ -119,4 +119,6 @@
119#define SO_TXTIME 61 119#define SO_TXTIME 61
120#define SCM_TXTIME SO_TXTIME 120#define SCM_TXTIME SO_TXTIME
121 121
122#define SO_BINDTOIFINDEX 62
123
122#endif /* _XTENSA_SOCKET_H */ 124#endif /* _XTENSA_SOCKET_H */
diff --git a/drivers/isdn/hisax/netjet.c b/drivers/isdn/hisax/netjet.c
index e932a152c405..d7b011c8d692 100644
--- a/drivers/isdn/hisax/netjet.c
+++ b/drivers/isdn/hisax/netjet.c
@@ -332,7 +332,7 @@ static int make_raw_data_56k(struct BCState *bcs) {
332 bitcnt = 0; 332 bitcnt = 0;
333 } 333 }
334 val >>= 1; 334 val >>= 1;
335 }; 335 }
336 fcs = PPP_INITFCS; 336 fcs = PPP_INITFCS;
337 for (i = 0; i < bcs->tx_skb->len; i++) { 337 for (i = 0; i < bcs->tx_skb->len; i++) {
338 val = bcs->tx_skb->data[i]; 338 val = bcs->tx_skb->data[i];
@@ -415,7 +415,7 @@ static void read_raw(struct BCState *bcs, u_int *buf, int cnt) {
415 else { // it's 56K 415 else { // it's 56K
416 mask = 0x7f; 416 mask = 0x7f;
417 bits = 7; 417 bits = 7;
418 }; 418 }
419 for (i = 0; i < cnt; i++) { 419 for (i = 0; i < cnt; i++) {
420 val = bcs->channel ? ((*p >> 8) & 0xff) : (*p & 0xff); 420 val = bcs->channel ? ((*p >> 8) & 0xff) : (*p & 0xff);
421 p++; 421 p++;
@@ -623,7 +623,7 @@ void netjet_fill_dma(struct BCState *bcs)
623 else { // it's 56k 623 else { // it's 56k
624 if (make_raw_data_56k(bcs)) 624 if (make_raw_data_56k(bcs))
625 return; 625 return;
626 }; 626 }
627 if (bcs->cs->debug & L1_DEB_HSCX) 627 if (bcs->cs->debug & L1_DEB_HSCX)
628 debugl1(bcs->cs, "tiger fill_dma2: c%d %4lx", bcs->channel, 628 debugl1(bcs->cs, "tiger fill_dma2: c%d %4lx", bcs->channel,
629 bcs->Flag); 629 bcs->Flag);
diff --git a/drivers/isdn/hisax/q931.c b/drivers/isdn/hisax/q931.c
index 298c8dba0321..6b8c3fbe3965 100644
--- a/drivers/isdn/hisax/q931.c
+++ b/drivers/isdn/hisax/q931.c
@@ -598,7 +598,7 @@ prcalling(char *dest, u_char *p)
598 dp += prbits(dp, *++p, 8, 8); 598 dp += prbits(dp, *++p, 8, 8);
599 *dp++ = '\n'; 599 *dp++ = '\n';
600 l--; 600 l--;
601 }; 601 }
602 p++; 602 p++;
603 603
604 dp += sprintf(dp, " number digits "); 604 dp += sprintf(dp, " number digits ");
diff --git a/drivers/isdn/hisax/st5481.h b/drivers/isdn/hisax/st5481.h
index 8cd2d8277426..b421b86ca7da 100644
--- a/drivers/isdn/hisax/st5481.h
+++ b/drivers/isdn/hisax/st5481.h
@@ -512,7 +512,7 @@ static inline const char *ST5481_CMD_string(int evt)
512 case ST5481_CMD_AR10: return "AR10"; 512 case ST5481_CMD_AR10: return "AR10";
513 case ST5481_CMD_ARL: return "ARL"; 513 case ST5481_CMD_ARL: return "ARL";
514 case ST5481_CMD_PDN: return "PDN"; 514 case ST5481_CMD_PDN: return "PDN";
515 }; 515 }
516 516
517 sprintf(s, "0x%x", evt); 517 sprintf(s, "0x%x", evt);
518 return s; 518 return s;
diff --git a/drivers/isdn/isdnloop/isdnloop.c b/drivers/isdn/isdnloop/isdnloop.c
index a4597e96c916..f4253d468ae1 100644
--- a/drivers/isdn/isdnloop/isdnloop.c
+++ b/drivers/isdn/isdnloop/isdnloop.c
@@ -72,7 +72,7 @@ isdnloop_bchan_send(isdnloop_card *card, int ch)
72 printk(KERN_WARNING "isdnloop: no rcard, skb dropped\n"); 72 printk(KERN_WARNING "isdnloop: no rcard, skb dropped\n");
73 dev_kfree_skb(skb); 73 dev_kfree_skb(skb);
74 74
75 }; 75 }
76 cmd.command = ISDN_STAT_BSENT; 76 cmd.command = ISDN_STAT_BSENT;
77 cmd.parm.length = len; 77 cmd.parm.length = len;
78 card->interface.statcallb(&cmd); 78 card->interface.statcallb(&cmd);
diff --git a/drivers/net/dsa/lantiq_gswip.c b/drivers/net/dsa/lantiq_gswip.c
index 693a67f45bef..27d092cab40e 100644
--- a/drivers/net/dsa/lantiq_gswip.c
+++ b/drivers/net/dsa/lantiq_gswip.c
@@ -1069,10 +1069,10 @@ static int gswip_probe(struct platform_device *pdev)
1069 version = gswip_switch_r(priv, GSWIP_VERSION); 1069 version = gswip_switch_r(priv, GSWIP_VERSION);
1070 1070
1071 /* bring up the mdio bus */ 1071 /* bring up the mdio bus */
1072 gphy_fw_np = of_find_compatible_node(pdev->dev.of_node, NULL, 1072 gphy_fw_np = of_get_compatible_child(dev->of_node, "lantiq,gphy-fw");
1073 "lantiq,gphy-fw");
1074 if (gphy_fw_np) { 1073 if (gphy_fw_np) {
1075 err = gswip_gphy_fw_list(priv, gphy_fw_np, version); 1074 err = gswip_gphy_fw_list(priv, gphy_fw_np, version);
1075 of_node_put(gphy_fw_np);
1076 if (err) { 1076 if (err) {
1077 dev_err(dev, "gphy fw probe failed\n"); 1077 dev_err(dev, "gphy fw probe failed\n");
1078 return err; 1078 return err;
@@ -1080,13 +1080,12 @@ static int gswip_probe(struct platform_device *pdev)
1080 } 1080 }
1081 1081
1082 /* bring up the mdio bus */ 1082 /* bring up the mdio bus */
1083 mdio_np = of_find_compatible_node(pdev->dev.of_node, NULL, 1083 mdio_np = of_get_compatible_child(dev->of_node, "lantiq,xrx200-mdio");
1084 "lantiq,xrx200-mdio");
1085 if (mdio_np) { 1084 if (mdio_np) {
1086 err = gswip_mdio(priv, mdio_np); 1085 err = gswip_mdio(priv, mdio_np);
1087 if (err) { 1086 if (err) {
1088 dev_err(dev, "mdio probe failed\n"); 1087 dev_err(dev, "mdio probe failed\n");
1089 goto gphy_fw; 1088 goto put_mdio_node;
1090 } 1089 }
1091 } 1090 }
1092 1091
@@ -1099,7 +1098,7 @@ static int gswip_probe(struct platform_device *pdev)
1099 dev_err(dev, "wrong CPU port defined, HW only supports port: %i", 1098 dev_err(dev, "wrong CPU port defined, HW only supports port: %i",
1100 priv->hw_info->cpu_port); 1099 priv->hw_info->cpu_port);
1101 err = -EINVAL; 1100 err = -EINVAL;
1102 goto mdio_bus; 1101 goto disable_switch;
1103 } 1102 }
1104 1103
1105 platform_set_drvdata(pdev, priv); 1104 platform_set_drvdata(pdev, priv);
@@ -1109,10 +1108,14 @@ static int gswip_probe(struct platform_device *pdev)
1109 (version & GSWIP_VERSION_MOD_MASK) >> GSWIP_VERSION_MOD_SHIFT); 1108 (version & GSWIP_VERSION_MOD_MASK) >> GSWIP_VERSION_MOD_SHIFT);
1110 return 0; 1109 return 0;
1111 1110
1111disable_switch:
1112 gswip_mdio_mask(priv, GSWIP_MDIO_GLOB_ENABLE, 0, GSWIP_MDIO_GLOB);
1113 dsa_unregister_switch(priv->ds);
1112mdio_bus: 1114mdio_bus:
1113 if (mdio_np) 1115 if (mdio_np)
1114 mdiobus_unregister(priv->ds->slave_mii_bus); 1116 mdiobus_unregister(priv->ds->slave_mii_bus);
1115gphy_fw: 1117put_mdio_node:
1118 of_node_put(mdio_np);
1116 for (i = 0; i < priv->num_gphy_fw; i++) 1119 for (i = 0; i < priv->num_gphy_fw; i++)
1117 gswip_gphy_fw_remove(priv, &priv->gphy_fw[i]); 1120 gswip_gphy_fw_remove(priv, &priv->gphy_fw[i]);
1118 return err; 1121 return err;
@@ -1123,16 +1126,15 @@ static int gswip_remove(struct platform_device *pdev)
1123 struct gswip_priv *priv = platform_get_drvdata(pdev); 1126 struct gswip_priv *priv = platform_get_drvdata(pdev);
1124 int i; 1127 int i;
1125 1128
1126 if (!priv)
1127 return 0;
1128
1129 /* disable the switch */ 1129 /* disable the switch */
1130 gswip_mdio_mask(priv, GSWIP_MDIO_GLOB_ENABLE, 0, GSWIP_MDIO_GLOB); 1130 gswip_mdio_mask(priv, GSWIP_MDIO_GLOB_ENABLE, 0, GSWIP_MDIO_GLOB);
1131 1131
1132 dsa_unregister_switch(priv->ds); 1132 dsa_unregister_switch(priv->ds);
1133 1133
1134 if (priv->ds->slave_mii_bus) 1134 if (priv->ds->slave_mii_bus) {
1135 mdiobus_unregister(priv->ds->slave_mii_bus); 1135 mdiobus_unregister(priv->ds->slave_mii_bus);
1136 of_node_put(priv->ds->slave_mii_bus->dev.of_node);
1137 }
1136 1138
1137 for (i = 0; i < priv->num_gphy_fw; i++) 1139 for (i = 0; i < priv->num_gphy_fw; i++)
1138 gswip_gphy_fw_remove(priv, &priv->gphy_fw[i]); 1140 gswip_gphy_fw_remove(priv, &priv->gphy_fw[i]);
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_init.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_init.h
index 46ee2c01f4c5..066765fbef06 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_init.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_init.h
@@ -449,7 +449,7 @@ static inline void bnx2x_init_fw_wrr(const struct cmng_init_input *input_data,
449 ccd[cos] = 449 ccd[cos] =
450 (u32)input_data->cos_min_rate[cos] * 100 * 450 (u32)input_data->cos_min_rate[cos] * 100 *
451 (T_FAIR_COEF / (8 * 100 * cosWeightSum)); 451 (T_FAIR_COEF / (8 * 100 * cosWeightSum));
452 if (ccd[cos] < pdata->fair_vars.fair_threshold 452 if (ccd[cos] < pdata->fair_vars.fair_threshold
453 + MIN_ABOVE_THRESH) { 453 + MIN_ABOVE_THRESH) {
454 ccd[cos] = 454 ccd[cos] =
455 pdata->fair_vars.fair_threshold + 455 pdata->fair_vars.fair_threshold +
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
index 98d4c5a3ff21..29738dfa878c 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
@@ -837,49 +837,45 @@ static int bnx2x_ets_e3b0_set_cos_bw(struct bnx2x *bp,
837 837
838 switch (cos_entry) { 838 switch (cos_entry) {
839 case 0: 839 case 0:
840 nig_reg_adress_crd_weight = 840 nig_reg_adress_crd_weight =
841 (port) ? NIG_REG_P1_TX_ARB_CREDIT_WEIGHT_0 : 841 (port) ? NIG_REG_P1_TX_ARB_CREDIT_WEIGHT_0 :
842 NIG_REG_P0_TX_ARB_CREDIT_WEIGHT_0; 842 NIG_REG_P0_TX_ARB_CREDIT_WEIGHT_0;
843 pbf_reg_adress_crd_weight = (port) ? 843 pbf_reg_adress_crd_weight = (port) ?
844 PBF_REG_COS0_WEIGHT_P1 : PBF_REG_COS0_WEIGHT_P0; 844 PBF_REG_COS0_WEIGHT_P1 : PBF_REG_COS0_WEIGHT_P0;
845 break; 845 break;
846 case 1: 846 case 1:
847 nig_reg_adress_crd_weight = (port) ? 847 nig_reg_adress_crd_weight = (port) ?
848 NIG_REG_P1_TX_ARB_CREDIT_WEIGHT_1 : 848 NIG_REG_P1_TX_ARB_CREDIT_WEIGHT_1 :
849 NIG_REG_P0_TX_ARB_CREDIT_WEIGHT_1; 849 NIG_REG_P0_TX_ARB_CREDIT_WEIGHT_1;
850 pbf_reg_adress_crd_weight = (port) ? 850 pbf_reg_adress_crd_weight = (port) ?
851 PBF_REG_COS1_WEIGHT_P1 : PBF_REG_COS1_WEIGHT_P0; 851 PBF_REG_COS1_WEIGHT_P1 : PBF_REG_COS1_WEIGHT_P0;
852 break; 852 break;
853 case 2: 853 case 2:
854 nig_reg_adress_crd_weight = (port) ? 854 nig_reg_adress_crd_weight = (port) ?
855 NIG_REG_P1_TX_ARB_CREDIT_WEIGHT_2 : 855 NIG_REG_P1_TX_ARB_CREDIT_WEIGHT_2 :
856 NIG_REG_P0_TX_ARB_CREDIT_WEIGHT_2; 856 NIG_REG_P0_TX_ARB_CREDIT_WEIGHT_2;
857 857
858 pbf_reg_adress_crd_weight = (port) ? 858 pbf_reg_adress_crd_weight = (port) ?
859 PBF_REG_COS2_WEIGHT_P1 : PBF_REG_COS2_WEIGHT_P0; 859 PBF_REG_COS2_WEIGHT_P1 : PBF_REG_COS2_WEIGHT_P0;
860 break; 860 break;
861 case 3: 861 case 3:
862 if (port) 862 if (port)
863 return -EINVAL; 863 return -EINVAL;
864 nig_reg_adress_crd_weight = 864 nig_reg_adress_crd_weight = NIG_REG_P0_TX_ARB_CREDIT_WEIGHT_3;
865 NIG_REG_P0_TX_ARB_CREDIT_WEIGHT_3; 865 pbf_reg_adress_crd_weight = PBF_REG_COS3_WEIGHT_P0;
866 pbf_reg_adress_crd_weight = 866 break;
867 PBF_REG_COS3_WEIGHT_P0;
868 break;
869 case 4: 867 case 4:
870 if (port) 868 if (port)
871 return -EINVAL; 869 return -EINVAL;
872 nig_reg_adress_crd_weight = 870 nig_reg_adress_crd_weight = NIG_REG_P0_TX_ARB_CREDIT_WEIGHT_4;
873 NIG_REG_P0_TX_ARB_CREDIT_WEIGHT_4; 871 pbf_reg_adress_crd_weight = PBF_REG_COS4_WEIGHT_P0;
874 pbf_reg_adress_crd_weight = PBF_REG_COS4_WEIGHT_P0; 872 break;
875 break;
876 case 5: 873 case 5:
877 if (port) 874 if (port)
878 return -EINVAL; 875 return -EINVAL;
879 nig_reg_adress_crd_weight = 876 nig_reg_adress_crd_weight = NIG_REG_P0_TX_ARB_CREDIT_WEIGHT_5;
880 NIG_REG_P0_TX_ARB_CREDIT_WEIGHT_5; 877 pbf_reg_adress_crd_weight = PBF_REG_COS5_WEIGHT_P0;
881 pbf_reg_adress_crd_weight = PBF_REG_COS5_WEIGHT_P0; 878 break;
882 break;
883 } 879 }
884 880
885 REG_WR(bp, nig_reg_adress_crd_weight, cos_bw_nig); 881 REG_WR(bp, nig_reg_adress_crd_weight, cos_bw_nig);
@@ -966,7 +962,7 @@ static int bnx2x_ets_e3b0_sp_pri_to_cos_set(const struct link_params *params,
966 if (pri >= max_num_of_cos) { 962 if (pri >= max_num_of_cos) {
967 DP(NETIF_MSG_LINK, "bnx2x_ets_e3b0_sp_pri_to_cos_set invalid " 963 DP(NETIF_MSG_LINK, "bnx2x_ets_e3b0_sp_pri_to_cos_set invalid "
968 "parameter Illegal strict priority\n"); 964 "parameter Illegal strict priority\n");
969 return -EINVAL; 965 return -EINVAL;
970 } 966 }
971 967
972 if (sp_pri_to_cos[pri] != DCBX_INVALID_COS) { 968 if (sp_pri_to_cos[pri] != DCBX_INVALID_COS) {
@@ -1845,28 +1841,28 @@ static int bnx2x_emac_enable(struct link_params *params,
1845 bnx2x_bits_en(bp, emac_base + EMAC_REG_EMAC_TX_MODE, 1841 bnx2x_bits_en(bp, emac_base + EMAC_REG_EMAC_TX_MODE,
1846 EMAC_TX_MODE_RESET); 1842 EMAC_TX_MODE_RESET);
1847 1843
1848 /* pause enable/disable */ 1844 /* pause enable/disable */
1849 bnx2x_bits_dis(bp, emac_base + EMAC_REG_EMAC_RX_MODE, 1845 bnx2x_bits_dis(bp, emac_base + EMAC_REG_EMAC_RX_MODE,
1850 EMAC_RX_MODE_FLOW_EN); 1846 EMAC_RX_MODE_FLOW_EN);
1851 1847
1852 bnx2x_bits_dis(bp, emac_base + EMAC_REG_EMAC_TX_MODE, 1848 bnx2x_bits_dis(bp, emac_base + EMAC_REG_EMAC_TX_MODE,
1853 (EMAC_TX_MODE_EXT_PAUSE_EN | 1849 (EMAC_TX_MODE_EXT_PAUSE_EN |
1854 EMAC_TX_MODE_FLOW_EN)); 1850 EMAC_TX_MODE_FLOW_EN));
1855 if (!(params->feature_config_flags & 1851 if (!(params->feature_config_flags &
1856 FEATURE_CONFIG_PFC_ENABLED)) { 1852 FEATURE_CONFIG_PFC_ENABLED)) {
1857 if (vars->flow_ctrl & BNX2X_FLOW_CTRL_RX) 1853 if (vars->flow_ctrl & BNX2X_FLOW_CTRL_RX)
1858 bnx2x_bits_en(bp, emac_base + 1854 bnx2x_bits_en(bp, emac_base +
1859 EMAC_REG_EMAC_RX_MODE, 1855 EMAC_REG_EMAC_RX_MODE,
1860 EMAC_RX_MODE_FLOW_EN); 1856 EMAC_RX_MODE_FLOW_EN);
1861 1857
1862 if (vars->flow_ctrl & BNX2X_FLOW_CTRL_TX) 1858 if (vars->flow_ctrl & BNX2X_FLOW_CTRL_TX)
1863 bnx2x_bits_en(bp, emac_base + 1859 bnx2x_bits_en(bp, emac_base +
1864 EMAC_REG_EMAC_TX_MODE, 1860 EMAC_REG_EMAC_TX_MODE,
1865 (EMAC_TX_MODE_EXT_PAUSE_EN | 1861 (EMAC_TX_MODE_EXT_PAUSE_EN |
1866 EMAC_TX_MODE_FLOW_EN)); 1862 EMAC_TX_MODE_FLOW_EN));
1867 } else 1863 } else
1868 bnx2x_bits_en(bp, emac_base + EMAC_REG_EMAC_TX_MODE, 1864 bnx2x_bits_en(bp, emac_base + EMAC_REG_EMAC_TX_MODE,
1869 EMAC_TX_MODE_FLOW_EN); 1865 EMAC_TX_MODE_FLOW_EN);
1870 1866
1871 /* KEEP_VLAN_TAG, promiscuous */ 1867 /* KEEP_VLAN_TAG, promiscuous */
1872 val = REG_RD(bp, emac_base + EMAC_REG_EMAC_RX_MODE); 1868 val = REG_RD(bp, emac_base + EMAC_REG_EMAC_RX_MODE);
@@ -6478,9 +6474,9 @@ int bnx2x_test_link(struct link_params *params, struct link_vars *vars,
6478 MDIO_REG_BANK_GP_STATUS, 6474 MDIO_REG_BANK_GP_STATUS,
6479 MDIO_GP_STATUS_TOP_AN_STATUS1, 6475 MDIO_GP_STATUS_TOP_AN_STATUS1,
6480 &gp_status); 6476 &gp_status);
6481 /* Link is up only if both local phy and external phy are up */ 6477 /* Link is up only if both local phy and external phy are up */
6482 if (!(gp_status & MDIO_GP_STATUS_TOP_AN_STATUS1_LINK_STATUS)) 6478 if (!(gp_status & MDIO_GP_STATUS_TOP_AN_STATUS1_LINK_STATUS))
6483 return -ESRCH; 6479 return -ESRCH;
6484 } 6480 }
6485 /* In XGXS loopback mode, do not check external PHY */ 6481 /* In XGXS loopback mode, do not check external PHY */
6486 if (params->loopback_mode == LOOPBACK_XGXS) 6482 if (params->loopback_mode == LOOPBACK_XGXS)
@@ -7293,8 +7289,8 @@ static int bnx2x_8073_xaui_wa(struct bnx2x *bp, struct bnx2x_phy *phy)
7293 DP(NETIF_MSG_LINK, 7289 DP(NETIF_MSG_LINK,
7294 "XAUI workaround has completed\n"); 7290 "XAUI workaround has completed\n");
7295 return 0; 7291 return 0;
7296 } 7292 }
7297 usleep_range(3000, 6000); 7293 usleep_range(3000, 6000);
7298 } 7294 }
7299 break; 7295 break;
7300 } 7296 }
@@ -12675,39 +12671,39 @@ static void bnx2x_init_bmac_loopback(struct link_params *params,
12675 struct link_vars *vars) 12671 struct link_vars *vars)
12676{ 12672{
12677 struct bnx2x *bp = params->bp; 12673 struct bnx2x *bp = params->bp;
12678 vars->link_up = 1; 12674 vars->link_up = 1;
12679 vars->line_speed = SPEED_10000; 12675 vars->line_speed = SPEED_10000;
12680 vars->duplex = DUPLEX_FULL; 12676 vars->duplex = DUPLEX_FULL;
12681 vars->flow_ctrl = BNX2X_FLOW_CTRL_NONE; 12677 vars->flow_ctrl = BNX2X_FLOW_CTRL_NONE;
12682 vars->mac_type = MAC_TYPE_BMAC; 12678 vars->mac_type = MAC_TYPE_BMAC;
12683 12679
12684 vars->phy_flags = PHY_XGXS_FLAG; 12680 vars->phy_flags = PHY_XGXS_FLAG;
12685 12681
12686 bnx2x_xgxs_deassert(params); 12682 bnx2x_xgxs_deassert(params);
12687 12683
12688 /* Set bmac loopback */ 12684 /* Set bmac loopback */
12689 bnx2x_bmac_enable(params, vars, 1, 1); 12685 bnx2x_bmac_enable(params, vars, 1, 1);
12690 12686
12691 REG_WR(bp, NIG_REG_EGRESS_DRAIN0_MODE + params->port*4, 0); 12687 REG_WR(bp, NIG_REG_EGRESS_DRAIN0_MODE + params->port * 4, 0);
12692} 12688}
12693 12689
12694static void bnx2x_init_emac_loopback(struct link_params *params, 12690static void bnx2x_init_emac_loopback(struct link_params *params,
12695 struct link_vars *vars) 12691 struct link_vars *vars)
12696{ 12692{
12697 struct bnx2x *bp = params->bp; 12693 struct bnx2x *bp = params->bp;
12698 vars->link_up = 1; 12694 vars->link_up = 1;
12699 vars->line_speed = SPEED_1000; 12695 vars->line_speed = SPEED_1000;
12700 vars->duplex = DUPLEX_FULL; 12696 vars->duplex = DUPLEX_FULL;
12701 vars->flow_ctrl = BNX2X_FLOW_CTRL_NONE; 12697 vars->flow_ctrl = BNX2X_FLOW_CTRL_NONE;
12702 vars->mac_type = MAC_TYPE_EMAC; 12698 vars->mac_type = MAC_TYPE_EMAC;
12703 12699
12704 vars->phy_flags = PHY_XGXS_FLAG; 12700 vars->phy_flags = PHY_XGXS_FLAG;
12705 12701
12706 bnx2x_xgxs_deassert(params); 12702 bnx2x_xgxs_deassert(params);
12707 /* Set bmac loopback */ 12703 /* Set bmac loopback */
12708 bnx2x_emac_enable(params, vars, 1); 12704 bnx2x_emac_enable(params, vars, 1);
12709 bnx2x_emac_program(params, vars); 12705 bnx2x_emac_program(params, vars);
12710 REG_WR(bp, NIG_REG_EGRESS_DRAIN0_MODE + params->port*4, 0); 12706 REG_WR(bp, NIG_REG_EGRESS_DRAIN0_MODE + params->port * 4, 0);
12711} 12707}
12712 12708
12713static void bnx2x_init_xmac_loopback(struct link_params *params, 12709static void bnx2x_init_xmac_loopback(struct link_params *params,
@@ -13073,12 +13069,12 @@ int bnx2x_link_reset(struct link_params *params, struct link_vars *vars,
13073 REG_WR(bp, NIG_REG_EGRESS_EMAC0_OUT_EN + port*4, 0); 13069 REG_WR(bp, NIG_REG_EGRESS_EMAC0_OUT_EN + port*4, 0);
13074 } 13070 }
13075 13071
13076 if (!CHIP_IS_E3(bp)) { 13072 if (!CHIP_IS_E3(bp)) {
13077 bnx2x_set_bmac_rx(bp, params->chip_id, port, 0); 13073 bnx2x_set_bmac_rx(bp, params->chip_id, port, 0);
13078 } else { 13074 } else {
13079 bnx2x_set_xmac_rxtx(params, 0); 13075 bnx2x_set_xmac_rxtx(params, 0);
13080 bnx2x_set_umac_rxtx(params, 0); 13076 bnx2x_set_umac_rxtx(params, 0);
13081 } 13077 }
13082 /* Disable emac */ 13078 /* Disable emac */
13083 if (!CHIP_IS_E3(bp)) 13079 if (!CHIP_IS_E3(bp))
13084 REG_WR(bp, NIG_REG_NIG_EMAC0_EN + port*4, 0); 13080 REG_WR(bp, NIG_REG_NIG_EMAC0_EN + port*4, 0);
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index 3b5b47e98c73..0cec82450e19 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -11298,7 +11298,7 @@ static void bnx2x_link_settings_supported(struct bnx2x *bp, u32 switch_cfg)
11298 dev_info.port_hw_config[port].external_phy_config), 11298 dev_info.port_hw_config[port].external_phy_config),
11299 SHMEM_RD(bp, 11299 SHMEM_RD(bp,
11300 dev_info.port_hw_config[port].external_phy_config2)); 11300 dev_info.port_hw_config[port].external_phy_config2));
11301 return; 11301 return;
11302 } 11302 }
11303 11303
11304 if (CHIP_IS_E3(bp)) 11304 if (CHIP_IS_E3(bp))
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c
index a9eaaf3e73a4..7b22a6d8514c 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c
@@ -2977,8 +2977,8 @@ static inline void bnx2x_mcast_hdl_pending_del_e2(struct bnx2x *bp,
2977 2977
2978 cmd_pos->data.macs_num--; 2978 cmd_pos->data.macs_num--;
2979 2979
2980 DP(BNX2X_MSG_SP, "Deleting MAC. %d left,cnt is %d\n", 2980 DP(BNX2X_MSG_SP, "Deleting MAC. %d left,cnt is %d\n",
2981 cmd_pos->data.macs_num, cnt); 2981 cmd_pos->data.macs_num, cnt);
2982 2982
2983 /* Break if we reached the maximum 2983 /* Break if we reached the maximum
2984 * number of rules. 2984 * number of rules.
@@ -3597,8 +3597,8 @@ static int bnx2x_mcast_validate_e1(struct bnx2x *bp,
3597 /* RESTORE command will restore the entire multicast configuration */ 3597 /* RESTORE command will restore the entire multicast configuration */
3598 case BNX2X_MCAST_CMD_RESTORE: 3598 case BNX2X_MCAST_CMD_RESTORE:
3599 p->mcast_list_len = reg_sz; 3599 p->mcast_list_len = reg_sz;
3600 DP(BNX2X_MSG_SP, "Command %d, p->mcast_list_len=%d\n", 3600 DP(BNX2X_MSG_SP, "Command %d, p->mcast_list_len=%d\n",
3601 cmd, p->mcast_list_len); 3601 cmd, p->mcast_list_len);
3602 break; 3602 break;
3603 3603
3604 case BNX2X_MCAST_CMD_ADD: 3604 case BNX2X_MCAST_CMD_ADD:
@@ -3735,8 +3735,8 @@ static inline int bnx2x_mcast_handle_restore_cmd_e1(
3735 3735
3736 i++; 3736 i++;
3737 3737
3738 DP(BNX2X_MSG_SP, "About to configure %pM mcast MAC\n", 3738 DP(BNX2X_MSG_SP, "About to configure %pM mcast MAC\n",
3739 cfg_data.mac); 3739 cfg_data.mac);
3740 } 3740 }
3741 3741
3742 *rdata_idx = i; 3742 *rdata_idx = i;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/l2t.c b/drivers/net/ethernet/chelsio/cxgb4/l2t.c
index 4852febbfec3..1a407d3c1d67 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/l2t.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/l2t.c
@@ -646,7 +646,7 @@ struct l2t_data *t4_init_l2t(unsigned int l2t_start, unsigned int l2t_end)
646 if (l2t_size < L2T_MIN_HASH_BUCKETS) 646 if (l2t_size < L2T_MIN_HASH_BUCKETS)
647 return NULL; 647 return NULL;
648 648
649 d = kvzalloc(sizeof(*d) + l2t_size * sizeof(struct l2t_entry), GFP_KERNEL); 649 d = kvzalloc(struct_size(d, l2tab, l2t_size), GFP_KERNEL);
650 if (!d) 650 if (!d)
651 return NULL; 651 return NULL;
652 652
diff --git a/drivers/net/ethernet/davicom/dm9000.c b/drivers/net/ethernet/davicom/dm9000.c
index 0a82fcf16d35..c2586f44c29d 100644
--- a/drivers/net/ethernet/davicom/dm9000.c
+++ b/drivers/net/ethernet/davicom/dm9000.c
@@ -395,6 +395,7 @@ static void dm9000_set_io(struct board_info *db, int byte_width)
395 395
396 case 3: 396 case 3:
397 dev_dbg(db->dev, ": 3 byte IO, falling back to 16bit\n"); 397 dev_dbg(db->dev, ": 3 byte IO, falling back to 16bit\n");
398 /* fall through */
398 case 2: 399 case 2:
399 db->dumpblk = dm9000_dumpblk_16bit; 400 db->dumpblk = dm9000_dumpblk_16bit;
400 db->outblk = dm9000_outblk_16bit; 401 db->outblk = dm9000_outblk_16bit;
diff --git a/drivers/net/ethernet/freescale/dpaa2/Makefile b/drivers/net/ethernet/freescale/dpaa2/Makefile
index 2f424e0a8225..d1e78cdd512f 100644
--- a/drivers/net/ethernet/freescale/dpaa2/Makefile
+++ b/drivers/net/ethernet/freescale/dpaa2/Makefile
@@ -7,6 +7,7 @@ obj-$(CONFIG_FSL_DPAA2_ETH) += fsl-dpaa2-eth.o
7obj-$(CONFIG_FSL_DPAA2_PTP_CLOCK) += fsl-dpaa2-ptp.o 7obj-$(CONFIG_FSL_DPAA2_PTP_CLOCK) += fsl-dpaa2-ptp.o
8 8
9fsl-dpaa2-eth-objs := dpaa2-eth.o dpaa2-ethtool.o dpni.o 9fsl-dpaa2-eth-objs := dpaa2-eth.o dpaa2-ethtool.o dpni.o
10fsl-dpaa2-eth-${CONFIG_DEBUG_FS} += dpaa2-eth-debugfs.o
10fsl-dpaa2-ptp-objs := dpaa2-ptp.o dprtc.o 11fsl-dpaa2-ptp-objs := dpaa2-ptp.o dprtc.o
11 12
12# Needed by the tracing framework 13# Needed by the tracing framework
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-debugfs.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-debugfs.c
new file mode 100644
index 000000000000..a027f4a9d0cc
--- /dev/null
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-debugfs.c
@@ -0,0 +1,237 @@
1// SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause)
2/* Copyright 2015 Freescale Semiconductor Inc.
3 * Copyright 2018-2019 NXP
4 */
5#include <linux/module.h>
6#include <linux/debugfs.h>
7#include "dpaa2-eth.h"
8#include "dpaa2-eth-debugfs.h"
9
10#define DPAA2_ETH_DBG_ROOT "dpaa2-eth"
11
12static struct dentry *dpaa2_dbg_root;
13
14static int dpaa2_dbg_cpu_show(struct seq_file *file, void *offset)
15{
16 struct dpaa2_eth_priv *priv = (struct dpaa2_eth_priv *)file->private;
17 struct rtnl_link_stats64 *stats;
18 struct dpaa2_eth_drv_stats *extras;
19 int i;
20
21 seq_printf(file, "Per-CPU stats for %s\n", priv->net_dev->name);
22 seq_printf(file, "%s%16s%16s%16s%16s%16s%16s%16s%16s%16s\n",
23 "CPU", "Rx", "Rx Err", "Rx SG", "Tx", "Tx Err", "Tx conf",
24 "Tx SG", "Tx realloc", "Enq busy");
25
26 for_each_online_cpu(i) {
27 stats = per_cpu_ptr(priv->percpu_stats, i);
28 extras = per_cpu_ptr(priv->percpu_extras, i);
29 seq_printf(file, "%3d%16llu%16llu%16llu%16llu%16llu%16llu%16llu%16llu%16llu\n",
30 i,
31 stats->rx_packets,
32 stats->rx_errors,
33 extras->rx_sg_frames,
34 stats->tx_packets,
35 stats->tx_errors,
36 extras->tx_conf_frames,
37 extras->tx_sg_frames,
38 extras->tx_reallocs,
39 extras->tx_portal_busy);
40 }
41
42 return 0;
43}
44
45static int dpaa2_dbg_cpu_open(struct inode *inode, struct file *file)
46{
47 int err;
48 struct dpaa2_eth_priv *priv = (struct dpaa2_eth_priv *)inode->i_private;
49
50 err = single_open(file, dpaa2_dbg_cpu_show, priv);
51 if (err < 0)
52 netdev_err(priv->net_dev, "single_open() failed\n");
53
54 return err;
55}
56
57static const struct file_operations dpaa2_dbg_cpu_ops = {
58 .open = dpaa2_dbg_cpu_open,
59 .read = seq_read,
60 .llseek = seq_lseek,
61 .release = single_release,
62};
63
64static char *fq_type_to_str(struct dpaa2_eth_fq *fq)
65{
66 switch (fq->type) {
67 case DPAA2_RX_FQ:
68 return "Rx";
69 case DPAA2_TX_CONF_FQ:
70 return "Tx conf";
71 default:
72 return "N/A";
73 }
74}
75
76static int dpaa2_dbg_fqs_show(struct seq_file *file, void *offset)
77{
78 struct dpaa2_eth_priv *priv = (struct dpaa2_eth_priv *)file->private;
79 struct dpaa2_eth_fq *fq;
80 u32 fcnt, bcnt;
81 int i, err;
82
83 seq_printf(file, "FQ stats for %s:\n", priv->net_dev->name);
84 seq_printf(file, "%s%16s%16s%16s%16s\n",
85 "VFQID", "CPU", "Type", "Frames", "Pending frames");
86
87 for (i = 0; i < priv->num_fqs; i++) {
88 fq = &priv->fq[i];
89 err = dpaa2_io_query_fq_count(NULL, fq->fqid, &fcnt, &bcnt);
90 if (err)
91 fcnt = 0;
92
93 seq_printf(file, "%5d%16d%16s%16llu%16u\n",
94 fq->fqid,
95 fq->target_cpu,
96 fq_type_to_str(fq),
97 fq->stats.frames,
98 fcnt);
99 }
100
101 return 0;
102}
103
104static int dpaa2_dbg_fqs_open(struct inode *inode, struct file *file)
105{
106 int err;
107 struct dpaa2_eth_priv *priv = (struct dpaa2_eth_priv *)inode->i_private;
108
109 err = single_open(file, dpaa2_dbg_fqs_show, priv);
110 if (err < 0)
111 netdev_err(priv->net_dev, "single_open() failed\n");
112
113 return err;
114}
115
116static const struct file_operations dpaa2_dbg_fq_ops = {
117 .open = dpaa2_dbg_fqs_open,
118 .read = seq_read,
119 .llseek = seq_lseek,
120 .release = single_release,
121};
122
123static int dpaa2_dbg_ch_show(struct seq_file *file, void *offset)
124{
125 struct dpaa2_eth_priv *priv = (struct dpaa2_eth_priv *)file->private;
126 struct dpaa2_eth_channel *ch;
127 int i;
128
129 seq_printf(file, "Channel stats for %s:\n", priv->net_dev->name);
130 seq_printf(file, "%s%16s%16s%16s%16s\n",
131 "CHID", "CPU", "Deq busy", "CDANs", "Buf count");
132
133 for (i = 0; i < priv->num_channels; i++) {
134 ch = priv->channel[i];
135 seq_printf(file, "%4d%16d%16llu%16llu%16d\n",
136 ch->ch_id,
137 ch->nctx.desired_cpu,
138 ch->stats.dequeue_portal_busy,
139 ch->stats.cdan,
140 ch->buf_count);
141 }
142
143 return 0;
144}
145
146static int dpaa2_dbg_ch_open(struct inode *inode, struct file *file)
147{
148 int err;
149 struct dpaa2_eth_priv *priv = (struct dpaa2_eth_priv *)inode->i_private;
150
151 err = single_open(file, dpaa2_dbg_ch_show, priv);
152 if (err < 0)
153 netdev_err(priv->net_dev, "single_open() failed\n");
154
155 return err;
156}
157
158static const struct file_operations dpaa2_dbg_ch_ops = {
159 .open = dpaa2_dbg_ch_open,
160 .read = seq_read,
161 .llseek = seq_lseek,
162 .release = single_release,
163};
164
165void dpaa2_dbg_add(struct dpaa2_eth_priv *priv)
166{
167 if (!dpaa2_dbg_root)
168 return;
169
170 /* Create a directory for the interface */
171 priv->dbg.dir = debugfs_create_dir(priv->net_dev->name,
172 dpaa2_dbg_root);
173 if (!priv->dbg.dir) {
174 netdev_err(priv->net_dev, "debugfs_create_dir() failed\n");
175 return;
176 }
177
178 /* per-cpu stats file */
179 priv->dbg.cpu_stats = debugfs_create_file("cpu_stats", 0444,
180 priv->dbg.dir, priv,
181 &dpaa2_dbg_cpu_ops);
182 if (!priv->dbg.cpu_stats) {
183 netdev_err(priv->net_dev, "debugfs_create_file() failed\n");
184 goto err_cpu_stats;
185 }
186
187 /* per-fq stats file */
188 priv->dbg.fq_stats = debugfs_create_file("fq_stats", 0444,
189 priv->dbg.dir, priv,
190 &dpaa2_dbg_fq_ops);
191 if (!priv->dbg.fq_stats) {
192 netdev_err(priv->net_dev, "debugfs_create_file() failed\n");
193 goto err_fq_stats;
194 }
195
196 /* per-fq stats file */
197 priv->dbg.ch_stats = debugfs_create_file("ch_stats", 0444,
198 priv->dbg.dir, priv,
199 &dpaa2_dbg_ch_ops);
200 if (!priv->dbg.fq_stats) {
201 netdev_err(priv->net_dev, "debugfs_create_file() failed\n");
202 goto err_ch_stats;
203 }
204
205 return;
206
207err_ch_stats:
208 debugfs_remove(priv->dbg.fq_stats);
209err_fq_stats:
210 debugfs_remove(priv->dbg.cpu_stats);
211err_cpu_stats:
212 debugfs_remove(priv->dbg.dir);
213}
214
215void dpaa2_dbg_remove(struct dpaa2_eth_priv *priv)
216{
217 debugfs_remove(priv->dbg.fq_stats);
218 debugfs_remove(priv->dbg.ch_stats);
219 debugfs_remove(priv->dbg.cpu_stats);
220 debugfs_remove(priv->dbg.dir);
221}
222
223void dpaa2_eth_dbg_init(void)
224{
225 dpaa2_dbg_root = debugfs_create_dir(DPAA2_ETH_DBG_ROOT, NULL);
226 if (!dpaa2_dbg_root) {
227 pr_err("DPAA2-ETH: debugfs create failed\n");
228 return;
229 }
230
231 pr_debug("DPAA2-ETH: debugfs created\n");
232}
233
234void dpaa2_eth_dbg_exit(void)
235{
236 debugfs_remove(dpaa2_dbg_root);
237}
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-debugfs.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-debugfs.h
new file mode 100644
index 000000000000..4f63de997a26
--- /dev/null
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-debugfs.h
@@ -0,0 +1,31 @@
1/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
2/* Copyright 2015 Freescale Semiconductor Inc.
3 * Copyright 2018-2019 NXP
4 */
5#ifndef DPAA2_ETH_DEBUGFS_H
6#define DPAA2_ETH_DEBUGFS_H
7
8#include <linux/dcache.h>
9
10struct dpaa2_eth_priv;
11
12struct dpaa2_debugfs {
13 struct dentry *dir;
14 struct dentry *fq_stats;
15 struct dentry *ch_stats;
16 struct dentry *cpu_stats;
17};
18
19#ifdef CONFIG_DEBUG_FS
20void dpaa2_eth_dbg_init(void);
21void dpaa2_eth_dbg_exit(void);
22void dpaa2_dbg_add(struct dpaa2_eth_priv *priv);
23void dpaa2_dbg_remove(struct dpaa2_eth_priv *priv);
24#else
25static inline void dpaa2_eth_dbg_init(void) {}
26static inline void dpaa2_eth_dbg_exit(void) {}
27static inline void dpaa2_dbg_add(struct dpaa2_eth_priv *priv) {}
28static inline void dpaa2_dbg_remove(struct dpaa2_eth_priv *priv) {}
29#endif /* CONFIG_DEBUG_FS */
30
31#endif /* DPAA2_ETH_DEBUGFS_H */
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
index 1ca9a18139ec..04925c731f0b 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
@@ -1243,34 +1243,36 @@ enable_err:
1243 return err; 1243 return err;
1244} 1244}
1245 1245
1246/* The DPIO store must be empty when we call this, 1246/* Total number of in-flight frames on ingress queues */
1247 * at the end of every NAPI cycle. 1247static u32 ingress_fq_count(struct dpaa2_eth_priv *priv)
1248 */
1249static u32 drain_channel(struct dpaa2_eth_channel *ch)
1250{ 1248{
1251 u32 drained = 0, total = 0; 1249 struct dpaa2_eth_fq *fq;
1250 u32 fcnt = 0, bcnt = 0, total = 0;
1251 int i, err;
1252 1252
1253 do { 1253 for (i = 0; i < priv->num_fqs; i++) {
1254 pull_channel(ch); 1254 fq = &priv->fq[i];
1255 drained = consume_frames(ch, NULL); 1255 err = dpaa2_io_query_fq_count(NULL, fq->fqid, &fcnt, &bcnt);
1256 total += drained; 1256 if (err) {
1257 } while (drained); 1257 netdev_warn(priv->net_dev, "query_fq_count failed");
1258 break;
1259 }
1260 total += fcnt;
1261 }
1258 1262
1259 return total; 1263 return total;
1260} 1264}
1261 1265
1262static u32 drain_ingress_frames(struct dpaa2_eth_priv *priv) 1266static void wait_for_fq_empty(struct dpaa2_eth_priv *priv)
1263{ 1267{
1264 struct dpaa2_eth_channel *ch; 1268 int retries = 10;
1265 int i; 1269 u32 pending;
1266 u32 drained = 0;
1267
1268 for (i = 0; i < priv->num_channels; i++) {
1269 ch = priv->channel[i];
1270 drained += drain_channel(ch);
1271 }
1272 1270
1273 return drained; 1271 do {
1272 pending = ingress_fq_count(priv);
1273 if (pending)
1274 msleep(100);
1275 } while (pending && --retries);
1274} 1276}
1275 1277
1276static int dpaa2_eth_stop(struct net_device *net_dev) 1278static int dpaa2_eth_stop(struct net_device *net_dev)
@@ -1278,14 +1280,22 @@ static int dpaa2_eth_stop(struct net_device *net_dev)
1278 struct dpaa2_eth_priv *priv = netdev_priv(net_dev); 1280 struct dpaa2_eth_priv *priv = netdev_priv(net_dev);
1279 int dpni_enabled = 0; 1281 int dpni_enabled = 0;
1280 int retries = 10; 1282 int retries = 10;
1281 u32 drained;
1282 1283
1283 netif_tx_stop_all_queues(net_dev); 1284 netif_tx_stop_all_queues(net_dev);
1284 netif_carrier_off(net_dev); 1285 netif_carrier_off(net_dev);
1285 1286
1286 /* Loop while dpni_disable() attempts to drain the egress FQs 1287 /* On dpni_disable(), the MC firmware will:
1287 * and confirm them back to us. 1288 * - stop MAC Rx and wait for all Rx frames to be enqueued to software
1289 * - cut off WRIOP dequeues from egress FQs and wait until transmission
1290 * of all in flight Tx frames is finished (and corresponding Tx conf
1291 * frames are enqueued back to software)
1292 *
1293 * Before calling dpni_disable(), we wait for all Tx frames to arrive
1294 * on WRIOP. After it finishes, wait until all remaining frames on Rx
1295 * and Tx conf queues are consumed on NAPI poll.
1288 */ 1296 */
1297 msleep(500);
1298
1289 do { 1299 do {
1290 dpni_disable(priv->mc_io, 0, priv->mc_token); 1300 dpni_disable(priv->mc_io, 0, priv->mc_token);
1291 dpni_is_enabled(priv->mc_io, 0, priv->mc_token, &dpni_enabled); 1301 dpni_is_enabled(priv->mc_io, 0, priv->mc_token, &dpni_enabled);
@@ -1300,19 +1310,9 @@ static int dpaa2_eth_stop(struct net_device *net_dev)
1300 */ 1310 */
1301 } 1311 }
1302 1312
1303 /* Wait for NAPI to complete on every core and disable it. 1313 wait_for_fq_empty(priv);
1304 * In particular, this will also prevent NAPI from being rescheduled if
1305 * a new CDAN is serviced, effectively discarding the CDAN. We therefore
1306 * don't even need to disarm the channels, except perhaps for the case
1307 * of a huge coalescing value.
1308 */
1309 disable_ch_napi(priv); 1314 disable_ch_napi(priv);
1310 1315
1311 /* Manually drain the Rx and TxConf queues */
1312 drained = drain_ingress_frames(priv);
1313 if (drained)
1314 netdev_dbg(net_dev, "Drained %d frames.\n", drained);
1315
1316 /* Empty the buffer pool */ 1316 /* Empty the buffer pool */
1317 drain_pool(priv); 1317 drain_pool(priv);
1318 1318
@@ -3083,6 +3083,10 @@ static int dpaa2_eth_probe(struct fsl_mc_device *dpni_dev)
3083 goto err_netdev_reg; 3083 goto err_netdev_reg;
3084 } 3084 }
3085 3085
3086#ifdef CONFIG_DEBUG_FS
3087 dpaa2_dbg_add(priv);
3088#endif
3089
3086 dev_info(dev, "Probed interface %s\n", net_dev->name); 3090 dev_info(dev, "Probed interface %s\n", net_dev->name);
3087 return 0; 3091 return 0;
3088 3092
@@ -3126,6 +3130,9 @@ static int dpaa2_eth_remove(struct fsl_mc_device *ls_dev)
3126 net_dev = dev_get_drvdata(dev); 3130 net_dev = dev_get_drvdata(dev);
3127 priv = netdev_priv(net_dev); 3131 priv = netdev_priv(net_dev);
3128 3132
3133#ifdef CONFIG_DEBUG_FS
3134 dpaa2_dbg_remove(priv);
3135#endif
3129 unregister_netdev(net_dev); 3136 unregister_netdev(net_dev);
3130 3137
3131 if (priv->do_link_poll) 3138 if (priv->do_link_poll)
@@ -3170,4 +3177,25 @@ static struct fsl_mc_driver dpaa2_eth_driver = {
3170 .match_id_table = dpaa2_eth_match_id_table 3177 .match_id_table = dpaa2_eth_match_id_table
3171}; 3178};
3172 3179
3173module_fsl_mc_driver(dpaa2_eth_driver); 3180static int __init dpaa2_eth_driver_init(void)
3181{
3182 int err;
3183
3184 dpaa2_eth_dbg_init();
3185 err = fsl_mc_driver_register(&dpaa2_eth_driver);
3186 if (err) {
3187 dpaa2_eth_dbg_exit();
3188 return err;
3189 }
3190
3191 return 0;
3192}
3193
3194static void __exit dpaa2_eth_driver_exit(void)
3195{
3196 dpaa2_eth_dbg_exit();
3197 fsl_mc_driver_unregister(&dpaa2_eth_driver);
3198}
3199
3200module_init(dpaa2_eth_driver_init);
3201module_exit(dpaa2_eth_driver_exit);
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
index 69c965de192b..31fe486ec25f 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
@@ -16,6 +16,7 @@
16#include "dpni-cmd.h" 16#include "dpni-cmd.h"
17 17
18#include "dpaa2-eth-trace.h" 18#include "dpaa2-eth-trace.h"
19#include "dpaa2-eth-debugfs.h"
19 20
20#define DPAA2_WRIOP_VERSION(x, y, z) ((x) << 10 | (y) << 5 | (z) << 0) 21#define DPAA2_WRIOP_VERSION(x, y, z) ((x) << 10 | (y) << 5 | (z) << 0)
21 22
@@ -365,6 +366,9 @@ struct dpaa2_eth_priv {
365 struct dpaa2_eth_cls_rule *cls_rules; 366 struct dpaa2_eth_cls_rule *cls_rules;
366 u8 rx_cls_enabled; 367 u8 rx_cls_enabled;
367 struct bpf_prog *xdp_prog; 368 struct bpf_prog *xdp_prog;
369#ifdef CONFIG_DEBUG_FS
370 struct dpaa2_debugfs dbg;
371#endif
368}; 372};
369 373
370#define DPAA2_RXH_SUPPORTED (RXH_L2DA | RXH_VLAN | RXH_L3_PROTO \ 374#define DPAA2_RXH_SUPPORTED (RXH_L2DA | RXH_VLAN | RXH_L3_PROTO \
@@ -405,6 +409,10 @@ static inline int dpaa2_eth_cmp_dpni_ver(struct dpaa2_eth_priv *priv,
405#define dpaa2_eth_fs_count(priv) \ 409#define dpaa2_eth_fs_count(priv) \
406 ((priv)->dpni_attrs.fs_entries) 410 ((priv)->dpni_attrs.fs_entries)
407 411
412/* We have exactly one {Rx, Tx conf} queue per channel */
413#define dpaa2_eth_queue_count(priv) \
414 ((priv)->num_channels)
415
408enum dpaa2_eth_rx_dist { 416enum dpaa2_eth_rx_dist {
409 DPAA2_ETH_RX_DIST_HASH, 417 DPAA2_ETH_RX_DIST_HASH,
410 DPAA2_ETH_RX_DIST_CLS 418 DPAA2_ETH_RX_DIST_CLS
@@ -447,12 +455,6 @@ static inline unsigned int dpaa2_eth_rx_head_room(struct dpaa2_eth_priv *priv)
447 DPAA2_ETH_RX_HWA_SIZE; 455 DPAA2_ETH_RX_HWA_SIZE;
448} 456}
449 457
450/* We have exactly one {Rx, Tx conf} queue per channel */
451static int dpaa2_eth_queue_count(struct dpaa2_eth_priv *priv)
452{
453 return priv->num_channels;
454}
455
456int dpaa2_eth_set_hash(struct net_device *net_dev, u64 flags); 458int dpaa2_eth_set_hash(struct net_device *net_dev, u64 flags);
457int dpaa2_eth_cls_key_size(void); 459int dpaa2_eth_cls_key_size(void);
458int dpaa2_eth_cls_fld_off(int prot, int field); 460int dpaa2_eth_cls_fld_off(int prot, int field);
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c
index 5d64519b9b1d..6bf346c11b25 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c
@@ -788,8 +788,9 @@ int hns_rcb_common_get_cfg(struct dsaf_device *dsaf_dev,
788 int ring_num = hns_rcb_get_ring_num(dsaf_dev); 788 int ring_num = hns_rcb_get_ring_num(dsaf_dev);
789 789
790 rcb_common = 790 rcb_common =
791 devm_kzalloc(dsaf_dev->dev, sizeof(*rcb_common) + 791 devm_kzalloc(dsaf_dev->dev,
792 ring_num * sizeof(struct ring_pair_cb), GFP_KERNEL); 792 struct_size(rcb_common, ring_pair_cb, ring_num),
793 GFP_KERNEL);
793 if (!rcb_common) { 794 if (!rcb_common) {
794 dev_err(dsaf_dev->dev, "rcb common devm_kzalloc fail!\n"); 795 dev_err(dsaf_dev->dev, "rcb common devm_kzalloc fail!\n");
795 return -ENOMEM; 796 return -ENOMEM;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index 36eab37d8a40..d486748d5883 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -124,6 +124,7 @@ enum hnae3_reset_notify_type {
124 HNAE3_DOWN_CLIENT, 124 HNAE3_DOWN_CLIENT,
125 HNAE3_INIT_CLIENT, 125 HNAE3_INIT_CLIENT,
126 HNAE3_UNINIT_CLIENT, 126 HNAE3_UNINIT_CLIENT,
127 HNAE3_RESTORE_CLIENT,
127}; 128};
128 129
129enum hnae3_reset_type { 130enum hnae3_reset_type {
@@ -500,6 +501,7 @@ struct hnae3_tc_info {
500struct hnae3_knic_private_info { 501struct hnae3_knic_private_info {
501 struct net_device *netdev; /* Set by KNIC client when init instance */ 502 struct net_device *netdev; /* Set by KNIC client when init instance */
502 u16 rss_size; /* Allocated RSS queues */ 503 u16 rss_size; /* Allocated RSS queues */
504 u16 req_rss_size;
503 u16 rx_buf_len; 505 u16 rx_buf_len;
504 u16 num_desc; 506 u16 num_desc;
505 507
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index 1bf7a5f116a0..9dd8949381bc 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -3185,6 +3185,9 @@ static int hns3_nic_uninit_vector_data(struct hns3_nic_priv *priv)
3185 for (i = 0; i < priv->vector_num; i++) { 3185 for (i = 0; i < priv->vector_num; i++) {
3186 tqp_vector = &priv->tqp_vector[i]; 3186 tqp_vector = &priv->tqp_vector[i];
3187 3187
3188 if (!tqp_vector->rx_group.ring && !tqp_vector->tx_group.ring)
3189 continue;
3190
3188 ret = hns3_get_vector_ring_chain(tqp_vector, 3191 ret = hns3_get_vector_ring_chain(tqp_vector,
3189 &vector_ring_chain); 3192 &vector_ring_chain);
3190 if (ret) 3193 if (ret)
@@ -3205,7 +3208,6 @@ static int hns3_nic_uninit_vector_data(struct hns3_nic_priv *priv)
3205 tqp_vector->irq_init_flag = HNS3_VECTOR_NOT_INITED; 3208 tqp_vector->irq_init_flag = HNS3_VECTOR_NOT_INITED;
3206 } 3209 }
3207 3210
3208 priv->ring_data[i].ring->irq_init_flag = HNS3_VECTOR_NOT_INITED;
3209 hns3_clear_ring_group(&tqp_vector->rx_group); 3211 hns3_clear_ring_group(&tqp_vector->rx_group);
3210 hns3_clear_ring_group(&tqp_vector->tx_group); 3212 hns3_clear_ring_group(&tqp_vector->tx_group);
3211 netif_napi_del(&priv->tqp_vector[i].napi); 3213 netif_napi_del(&priv->tqp_vector[i].napi);
@@ -3238,6 +3240,7 @@ static int hns3_ring_get_cfg(struct hnae3_queue *q, struct hns3_nic_priv *priv,
3238{ 3240{
3239 struct hns3_nic_ring_data *ring_data = priv->ring_data; 3241 struct hns3_nic_ring_data *ring_data = priv->ring_data;
3240 int queue_num = priv->ae_handle->kinfo.num_tqps; 3242 int queue_num = priv->ae_handle->kinfo.num_tqps;
3243 int desc_num = priv->ae_handle->kinfo.num_desc;
3241 struct pci_dev *pdev = priv->ae_handle->pdev; 3244 struct pci_dev *pdev = priv->ae_handle->pdev;
3242 struct hns3_enet_ring *ring; 3245 struct hns3_enet_ring *ring;
3243 3246
@@ -3263,7 +3266,7 @@ static int hns3_ring_get_cfg(struct hnae3_queue *q, struct hns3_nic_priv *priv,
3263 ring->dev = priv->dev; 3266 ring->dev = priv->dev;
3264 ring->desc_dma_addr = 0; 3267 ring->desc_dma_addr = 0;
3265 ring->buf_size = q->buf_size; 3268 ring->buf_size = q->buf_size;
3266 ring->desc_num = q->desc_num; 3269 ring->desc_num = desc_num;
3267 ring->next_to_use = 0; 3270 ring->next_to_use = 0;
3268 ring->next_to_clean = 0; 3271 ring->next_to_clean = 0;
3269 3272
@@ -3725,7 +3728,6 @@ static int hns3_client_setup_tc(struct hnae3_handle *handle, u8 tc)
3725{ 3728{
3726 struct hnae3_knic_private_info *kinfo = &handle->kinfo; 3729 struct hnae3_knic_private_info *kinfo = &handle->kinfo;
3727 struct net_device *ndev = kinfo->netdev; 3730 struct net_device *ndev = kinfo->netdev;
3728 bool if_running;
3729 int ret; 3731 int ret;
3730 3732
3731 if (tc > HNAE3_MAX_TC) 3733 if (tc > HNAE3_MAX_TC)
@@ -3734,24 +3736,13 @@ static int hns3_client_setup_tc(struct hnae3_handle *handle, u8 tc)
3734 if (!ndev) 3736 if (!ndev)
3735 return -ENODEV; 3737 return -ENODEV;
3736 3738
3737 if_running = netif_running(ndev);
3738
3739 if (if_running) {
3740 (void)hns3_nic_net_stop(ndev);
3741 msleep(100);
3742 }
3743
3744 ret = (kinfo->dcb_ops && kinfo->dcb_ops->map_update) ? 3739 ret = (kinfo->dcb_ops && kinfo->dcb_ops->map_update) ?
3745 kinfo->dcb_ops->map_update(handle) : -EOPNOTSUPP; 3740 kinfo->dcb_ops->map_update(handle) : -EOPNOTSUPP;
3746 if (ret) 3741 if (ret)
3747 goto err_out; 3742 return ret;
3748 3743
3749 ret = hns3_nic_set_real_num_queue(ndev); 3744 ret = hns3_nic_set_real_num_queue(ndev);
3750 3745
3751err_out:
3752 if (if_running)
3753 (void)hns3_nic_net_open(ndev);
3754
3755 return ret; 3746 return ret;
3756} 3747}
3757 3748
@@ -4013,41 +4004,18 @@ static int hns3_reset_notify_init_enet(struct hnae3_handle *handle)
4013{ 4004{
4014 struct net_device *netdev = handle->kinfo.netdev; 4005 struct net_device *netdev = handle->kinfo.netdev;
4015 struct hns3_nic_priv *priv = netdev_priv(netdev); 4006 struct hns3_nic_priv *priv = netdev_priv(netdev);
4016 bool vlan_filter_enable;
4017 int ret; 4007 int ret;
4018 4008
4019 ret = hns3_init_mac_addr(netdev, false); 4009 /* Carrier off reporting is important to ethtool even BEFORE open */
4020 if (ret) 4010 netif_carrier_off(netdev);
4021 return ret;
4022
4023 ret = hns3_recover_hw_addr(netdev);
4024 if (ret)
4025 return ret;
4026
4027 ret = hns3_update_promisc_mode(netdev, handle->netdev_flags);
4028 if (ret)
4029 return ret;
4030
4031 vlan_filter_enable = netdev->flags & IFF_PROMISC ? false : true;
4032 hns3_enable_vlan_filter(netdev, vlan_filter_enable);
4033
4034 /* Hardware table is only clear when pf resets */
4035 if (!(handle->flags & HNAE3_SUPPORT_VF)) {
4036 ret = hns3_restore_vlan(netdev);
4037 if (ret)
4038 return ret;
4039 }
4040 4011
4041 ret = hns3_restore_fd_rules(netdev); 4012 ret = hns3_get_ring_config(priv);
4042 if (ret) 4013 if (ret)
4043 return ret; 4014 return ret;
4044 4015
4045 /* Carrier off reporting is important to ethtool even BEFORE open */
4046 netif_carrier_off(netdev);
4047
4048 ret = hns3_nic_alloc_vector_data(priv); 4016 ret = hns3_nic_alloc_vector_data(priv);
4049 if (ret) 4017 if (ret)
4050 return ret; 4018 goto err_put_ring;
4051 4019
4052 hns3_restore_coal(priv); 4020 hns3_restore_coal(priv);
4053 4021
@@ -4068,10 +4036,44 @@ err_uninit_vector:
4068 priv->ring_data = NULL; 4036 priv->ring_data = NULL;
4069err_dealloc_vector: 4037err_dealloc_vector:
4070 hns3_nic_dealloc_vector_data(priv); 4038 hns3_nic_dealloc_vector_data(priv);
4039err_put_ring:
4040 hns3_put_ring_config(priv);
4041 priv->ring_data = NULL;
4071 4042
4072 return ret; 4043 return ret;
4073} 4044}
4074 4045
4046static int hns3_reset_notify_restore_enet(struct hnae3_handle *handle)
4047{
4048 struct net_device *netdev = handle->kinfo.netdev;
4049 bool vlan_filter_enable;
4050 int ret;
4051
4052 ret = hns3_init_mac_addr(netdev, false);
4053 if (ret)
4054 return ret;
4055
4056 ret = hns3_recover_hw_addr(netdev);
4057 if (ret)
4058 return ret;
4059
4060 ret = hns3_update_promisc_mode(netdev, handle->netdev_flags);
4061 if (ret)
4062 return ret;
4063
4064 vlan_filter_enable = netdev->flags & IFF_PROMISC ? false : true;
4065 hns3_enable_vlan_filter(netdev, vlan_filter_enable);
4066
4067 /* Hardware table is only clear when pf resets */
4068 if (!(handle->flags & HNAE3_SUPPORT_VF)) {
4069 ret = hns3_restore_vlan(netdev);
4070 if (ret)
4071 return ret;
4072 }
4073
4074 return hns3_restore_fd_rules(netdev);
4075}
4076
4075static int hns3_reset_notify_uninit_enet(struct hnae3_handle *handle) 4077static int hns3_reset_notify_uninit_enet(struct hnae3_handle *handle)
4076{ 4078{
4077 struct net_device *netdev = handle->kinfo.netdev; 4079 struct net_device *netdev = handle->kinfo.netdev;
@@ -4101,6 +4103,9 @@ static int hns3_reset_notify_uninit_enet(struct hnae3_handle *handle)
4101 if (ret) 4103 if (ret)
4102 netdev_err(netdev, "uninit ring error\n"); 4104 netdev_err(netdev, "uninit ring error\n");
4103 4105
4106 hns3_put_ring_config(priv);
4107 priv->ring_data = NULL;
4108
4104 clear_bit(HNS3_NIC_STATE_INITED, &priv->state); 4109 clear_bit(HNS3_NIC_STATE_INITED, &priv->state);
4105 4110
4106 return ret; 4111 return ret;
@@ -4124,6 +4129,9 @@ static int hns3_reset_notify(struct hnae3_handle *handle,
4124 case HNAE3_UNINIT_CLIENT: 4129 case HNAE3_UNINIT_CLIENT:
4125 ret = hns3_reset_notify_uninit_enet(handle); 4130 ret = hns3_reset_notify_uninit_enet(handle);
4126 break; 4131 break;
4132 case HNAE3_RESTORE_CLIENT:
4133 ret = hns3_reset_notify_restore_enet(handle);
4134 break;
4127 default: 4135 default:
4128 break; 4136 break;
4129 } 4137 }
@@ -4131,57 +4139,11 @@ static int hns3_reset_notify(struct hnae3_handle *handle,
4131 return ret; 4139 return ret;
4132} 4140}
4133 4141
4134static int hns3_modify_tqp_num(struct net_device *netdev, u16 new_tqp_num)
4135{
4136 struct hns3_nic_priv *priv = netdev_priv(netdev);
4137 struct hnae3_handle *h = hns3_get_handle(netdev);
4138 int ret;
4139
4140 ret = h->ae_algo->ops->set_channels(h, new_tqp_num);
4141 if (ret)
4142 return ret;
4143
4144 ret = hns3_get_ring_config(priv);
4145 if (ret)
4146 return ret;
4147
4148 ret = hns3_nic_alloc_vector_data(priv);
4149 if (ret)
4150 goto err_alloc_vector;
4151
4152 hns3_restore_coal(priv);
4153
4154 ret = hns3_nic_init_vector_data(priv);
4155 if (ret)
4156 goto err_uninit_vector;
4157
4158 ret = hns3_init_all_ring(priv);
4159 if (ret)
4160 goto err_put_ring;
4161
4162 return 0;
4163
4164err_put_ring:
4165 hns3_put_ring_config(priv);
4166err_uninit_vector:
4167 hns3_nic_uninit_vector_data(priv);
4168err_alloc_vector:
4169 hns3_nic_dealloc_vector_data(priv);
4170 return ret;
4171}
4172
4173static int hns3_adjust_tqps_num(u8 num_tc, u32 new_tqp_num)
4174{
4175 return (new_tqp_num / num_tc) * num_tc;
4176}
4177
4178int hns3_set_channels(struct net_device *netdev, 4142int hns3_set_channels(struct net_device *netdev,
4179 struct ethtool_channels *ch) 4143 struct ethtool_channels *ch)
4180{ 4144{
4181 struct hns3_nic_priv *priv = netdev_priv(netdev);
4182 struct hnae3_handle *h = hns3_get_handle(netdev); 4145 struct hnae3_handle *h = hns3_get_handle(netdev);
4183 struct hnae3_knic_private_info *kinfo = &h->kinfo; 4146 struct hnae3_knic_private_info *kinfo = &h->kinfo;
4184 bool if_running = netif_running(netdev);
4185 u32 new_tqp_num = ch->combined_count; 4147 u32 new_tqp_num = ch->combined_count;
4186 u16 org_tqp_num; 4148 u16 org_tqp_num;
4187 int ret; 4149 int ret;
@@ -4190,39 +4152,28 @@ int hns3_set_channels(struct net_device *netdev,
4190 return -EINVAL; 4152 return -EINVAL;
4191 4153
4192 if (new_tqp_num > hns3_get_max_available_channels(h) || 4154 if (new_tqp_num > hns3_get_max_available_channels(h) ||
4193 new_tqp_num < kinfo->num_tc) { 4155 new_tqp_num < 1) {
4194 dev_err(&netdev->dev, 4156 dev_err(&netdev->dev,
4195 "Change tqps fail, the tqp range is from %d to %d", 4157 "Change tqps fail, the tqp range is from 1 to %d",
4196 kinfo->num_tc,
4197 hns3_get_max_available_channels(h)); 4158 hns3_get_max_available_channels(h));
4198 return -EINVAL; 4159 return -EINVAL;
4199 } 4160 }
4200 4161
4201 new_tqp_num = hns3_adjust_tqps_num(kinfo->num_tc, new_tqp_num); 4162 if (kinfo->rss_size == new_tqp_num)
4202 if (kinfo->num_tqps == new_tqp_num)
4203 return 0; 4163 return 0;
4204 4164
4205 if (if_running) 4165 ret = hns3_reset_notify(h, HNAE3_DOWN_CLIENT);
4206 hns3_nic_net_stop(netdev); 4166 if (ret)
4207 4167 return ret;
4208 ret = hns3_nic_uninit_vector_data(priv);
4209 if (ret) {
4210 dev_err(&netdev->dev,
4211 "Unbind vector with tqp fail, nothing is changed");
4212 goto open_netdev;
4213 }
4214
4215 hns3_store_coal(priv);
4216
4217 hns3_nic_dealloc_vector_data(priv);
4218 4168
4219 hns3_uninit_all_ring(priv); 4169 ret = hns3_reset_notify(h, HNAE3_UNINIT_CLIENT);
4220 hns3_put_ring_config(priv); 4170 if (ret)
4171 return ret;
4221 4172
4222 org_tqp_num = h->kinfo.num_tqps; 4173 org_tqp_num = h->kinfo.num_tqps;
4223 ret = hns3_modify_tqp_num(netdev, new_tqp_num); 4174 ret = h->ae_algo->ops->set_channels(h, new_tqp_num);
4224 if (ret) { 4175 if (ret) {
4225 ret = hns3_modify_tqp_num(netdev, org_tqp_num); 4176 ret = h->ae_algo->ops->set_channels(h, org_tqp_num);
4226 if (ret) { 4177 if (ret) {
4227 /* If revert to old tqp failed, fatal error occurred */ 4178 /* If revert to old tqp failed, fatal error occurred */
4228 dev_err(&netdev->dev, 4179 dev_err(&netdev->dev,
@@ -4232,12 +4183,11 @@ int hns3_set_channels(struct net_device *netdev,
4232 dev_info(&netdev->dev, 4183 dev_info(&netdev->dev,
4233 "Change tqp num fail, Revert to old tqp num"); 4184 "Change tqp num fail, Revert to old tqp num");
4234 } 4185 }
4186 ret = hns3_reset_notify(h, HNAE3_INIT_CLIENT);
4187 if (ret)
4188 return ret;
4235 4189
4236open_netdev: 4190 return hns3_reset_notify(h, HNAE3_UP_CLIENT);
4237 if (if_running)
4238 hns3_nic_net_open(netdev);
4239
4240 return ret;
4241} 4191}
4242 4192
4243static const struct hnae3_client_ops client_ops = { 4193static const struct hnae3_client_ops client_ops = {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
index e55995e93bb0..f59ab7387b1f 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
@@ -412,7 +412,6 @@ struct hns3_enet_ring {
412 unsigned char *va; /* first buffer address for current packet */ 412 unsigned char *va; /* first buffer address for current packet */
413 413
414 u32 flag; /* ring attribute */ 414 u32 flag; /* ring attribute */
415 int irq_init_flag;
416 415
417 int numa_node; 416 int numa_node;
418 cpumask_t affinity_mask; 417 cpumask_t affinity_mask;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c
index f6323b2501dc..4ec0b9cd15ae 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c
@@ -222,6 +222,16 @@ static int hclge_ieee_setets(struct hnae3_handle *h, struct ieee_ets *ets)
222 if (ret) 222 if (ret)
223 return ret; 223 return ret;
224 224
225 if (map_changed) {
226 ret = hclge_notify_client(hdev, HNAE3_DOWN_CLIENT);
227 if (ret)
228 return ret;
229
230 ret = hclge_notify_client(hdev, HNAE3_UNINIT_CLIENT);
231 if (ret)
232 return ret;
233 }
234
225 hclge_tm_schd_info_update(hdev, num_tc); 235 hclge_tm_schd_info_update(hdev, num_tc);
226 236
227 ret = hclge_ieee_ets_to_tm_info(hdev, ets); 237 ret = hclge_ieee_ets_to_tm_info(hdev, ets);
@@ -232,6 +242,13 @@ static int hclge_ieee_setets(struct hnae3_handle *h, struct ieee_ets *ets)
232 ret = hclge_client_setup_tc(hdev); 242 ret = hclge_client_setup_tc(hdev);
233 if (ret) 243 if (ret)
234 return ret; 244 return ret;
245 ret = hclge_notify_client(hdev, HNAE3_INIT_CLIENT);
246 if (ret)
247 return ret;
248
249 ret = hclge_notify_client(hdev, HNAE3_UP_CLIENT);
250 if (ret)
251 return ret;
235 } 252 }
236 253
237 return hclge_tm_dwrr_cfg(hdev); 254 return hclge_tm_dwrr_cfg(hdev);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index f7637c08bb3a..00d7acb4d45a 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -1068,14 +1068,14 @@ static int hclge_map_tqps_to_func(struct hclge_dev *hdev, u16 func_id,
1068 return ret; 1068 return ret;
1069} 1069}
1070 1070
1071static int hclge_assign_tqp(struct hclge_vport *vport) 1071static int hclge_assign_tqp(struct hclge_vport *vport, u16 num_tqps)
1072{ 1072{
1073 struct hnae3_knic_private_info *kinfo = &vport->nic.kinfo; 1073 struct hnae3_knic_private_info *kinfo = &vport->nic.kinfo;
1074 struct hclge_dev *hdev = vport->back; 1074 struct hclge_dev *hdev = vport->back;
1075 int i, alloced; 1075 int i, alloced;
1076 1076
1077 for (i = 0, alloced = 0; i < hdev->num_tqps && 1077 for (i = 0, alloced = 0; i < hdev->num_tqps &&
1078 alloced < kinfo->num_tqps; i++) { 1078 alloced < num_tqps; i++) {
1079 if (!hdev->htqp[i].alloced) { 1079 if (!hdev->htqp[i].alloced) {
1080 hdev->htqp[i].q.handle = &vport->nic; 1080 hdev->htqp[i].q.handle = &vport->nic;
1081 hdev->htqp[i].q.tqp_index = alloced; 1081 hdev->htqp[i].q.tqp_index = alloced;
@@ -1085,7 +1085,9 @@ static int hclge_assign_tqp(struct hclge_vport *vport)
1085 alloced++; 1085 alloced++;
1086 } 1086 }
1087 } 1087 }
1088 vport->alloc_tqps = kinfo->num_tqps; 1088 vport->alloc_tqps = alloced;
1089 kinfo->rss_size = min_t(u16, hdev->rss_size_max,
1090 vport->alloc_tqps / hdev->tm_info.num_tc);
1089 1091
1090 return 0; 1092 return 0;
1091} 1093}
@@ -1096,36 +1098,17 @@ static int hclge_knic_setup(struct hclge_vport *vport,
1096 struct hnae3_handle *nic = &vport->nic; 1098 struct hnae3_handle *nic = &vport->nic;
1097 struct hnae3_knic_private_info *kinfo = &nic->kinfo; 1099 struct hnae3_knic_private_info *kinfo = &nic->kinfo;
1098 struct hclge_dev *hdev = vport->back; 1100 struct hclge_dev *hdev = vport->back;
1099 int i, ret; 1101 int ret;
1100 1102
1101 kinfo->num_desc = num_desc; 1103 kinfo->num_desc = num_desc;
1102 kinfo->rx_buf_len = hdev->rx_buf_len; 1104 kinfo->rx_buf_len = hdev->rx_buf_len;
1103 kinfo->num_tc = min_t(u16, num_tqps, hdev->tm_info.num_tc);
1104 kinfo->rss_size
1105 = min_t(u16, hdev->rss_size_max, num_tqps / kinfo->num_tc);
1106 kinfo->num_tqps = kinfo->rss_size * kinfo->num_tc;
1107 1105
1108 for (i = 0; i < HNAE3_MAX_TC; i++) { 1106 kinfo->tqp = devm_kcalloc(&hdev->pdev->dev, num_tqps,
1109 if (hdev->hw_tc_map & BIT(i)) {
1110 kinfo->tc_info[i].enable = true;
1111 kinfo->tc_info[i].tqp_offset = i * kinfo->rss_size;
1112 kinfo->tc_info[i].tqp_count = kinfo->rss_size;
1113 kinfo->tc_info[i].tc = i;
1114 } else {
1115 /* Set to default queue if TC is disable */
1116 kinfo->tc_info[i].enable = false;
1117 kinfo->tc_info[i].tqp_offset = 0;
1118 kinfo->tc_info[i].tqp_count = 1;
1119 kinfo->tc_info[i].tc = 0;
1120 }
1121 }
1122
1123 kinfo->tqp = devm_kcalloc(&hdev->pdev->dev, kinfo->num_tqps,
1124 sizeof(struct hnae3_queue *), GFP_KERNEL); 1107 sizeof(struct hnae3_queue *), GFP_KERNEL);
1125 if (!kinfo->tqp) 1108 if (!kinfo->tqp)
1126 return -ENOMEM; 1109 return -ENOMEM;
1127 1110
1128 ret = hclge_assign_tqp(vport); 1111 ret = hclge_assign_tqp(vport, num_tqps);
1129 if (ret) 1112 if (ret)
1130 dev_err(&hdev->pdev->dev, "fail to assign TQPs %d.\n", ret); 1113 dev_err(&hdev->pdev->dev, "fail to assign TQPs %d.\n", ret);
1131 1114
@@ -1140,7 +1123,7 @@ static int hclge_map_tqp_to_vport(struct hclge_dev *hdev,
1140 u16 i; 1123 u16 i;
1141 1124
1142 kinfo = &nic->kinfo; 1125 kinfo = &nic->kinfo;
1143 for (i = 0; i < kinfo->num_tqps; i++) { 1126 for (i = 0; i < vport->alloc_tqps; i++) {
1144 struct hclge_tqp *q = 1127 struct hclge_tqp *q =
1145 container_of(kinfo->tqp[i], struct hclge_tqp, q); 1128 container_of(kinfo->tqp[i], struct hclge_tqp, q);
1146 bool is_pf; 1129 bool is_pf;
@@ -2418,8 +2401,8 @@ static void hclge_misc_irq_uninit(struct hclge_dev *hdev)
2418 hclge_free_vector(hdev, 0); 2401 hclge_free_vector(hdev, 0);
2419} 2402}
2420 2403
2421static int hclge_notify_client(struct hclge_dev *hdev, 2404int hclge_notify_client(struct hclge_dev *hdev,
2422 enum hnae3_reset_notify_type type) 2405 enum hnae3_reset_notify_type type)
2423{ 2406{
2424 struct hnae3_client *client = hdev->nic_client; 2407 struct hnae3_client *client = hdev->nic_client;
2425 u16 i; 2408 u16 i;
@@ -2883,6 +2866,10 @@ static void hclge_reset(struct hclge_dev *hdev)
2883 if (ret) 2866 if (ret)
2884 goto err_reset_lock; 2867 goto err_reset_lock;
2885 2868
2869 ret = hclge_notify_client(hdev, HNAE3_RESTORE_CLIENT);
2870 if (ret)
2871 goto err_reset_lock;
2872
2886 hclge_clear_reset_cause(hdev); 2873 hclge_clear_reset_cause(hdev);
2887 2874
2888 ret = hclge_reset_prepare_up(hdev); 2875 ret = hclge_reset_prepare_up(hdev);
@@ -5258,6 +5245,7 @@ static int hclge_set_loopback(struct hnae3_handle *handle,
5258 enum hnae3_loop loop_mode, bool en) 5245 enum hnae3_loop loop_mode, bool en)
5259{ 5246{
5260 struct hclge_vport *vport = hclge_get_vport(handle); 5247 struct hclge_vport *vport = hclge_get_vport(handle);
5248 struct hnae3_knic_private_info *kinfo;
5261 struct hclge_dev *hdev = vport->back; 5249 struct hclge_dev *hdev = vport->back;
5262 int i, ret; 5250 int i, ret;
5263 5251
@@ -5276,7 +5264,8 @@ static int hclge_set_loopback(struct hnae3_handle *handle,
5276 break; 5264 break;
5277 } 5265 }
5278 5266
5279 for (i = 0; i < vport->alloc_tqps; i++) { 5267 kinfo = &vport->nic.kinfo;
5268 for (i = 0; i < kinfo->num_tqps; i++) {
5280 ret = hclge_tqp_enable(hdev, i, 0, en); 5269 ret = hclge_tqp_enable(hdev, i, 0, en);
5281 if (ret) 5270 if (ret)
5282 return ret; 5271 return ret;
@@ -5288,11 +5277,13 @@ static int hclge_set_loopback(struct hnae3_handle *handle,
5288static void hclge_reset_tqp_stats(struct hnae3_handle *handle) 5277static void hclge_reset_tqp_stats(struct hnae3_handle *handle)
5289{ 5278{
5290 struct hclge_vport *vport = hclge_get_vport(handle); 5279 struct hclge_vport *vport = hclge_get_vport(handle);
5280 struct hnae3_knic_private_info *kinfo;
5291 struct hnae3_queue *queue; 5281 struct hnae3_queue *queue;
5292 struct hclge_tqp *tqp; 5282 struct hclge_tqp *tqp;
5293 int i; 5283 int i;
5294 5284
5295 for (i = 0; i < vport->alloc_tqps; i++) { 5285 kinfo = &vport->nic.kinfo;
5286 for (i = 0; i < kinfo->num_tqps; i++) {
5296 queue = handle->kinfo.tqp[i]; 5287 queue = handle->kinfo.tqp[i];
5297 tqp = container_of(queue, struct hclge_tqp, q); 5288 tqp = container_of(queue, struct hclge_tqp, q);
5298 memset(&tqp->tqp_stats, 0, sizeof(tqp->tqp_stats)); 5289 memset(&tqp->tqp_stats, 0, sizeof(tqp->tqp_stats));
@@ -7523,18 +7514,17 @@ static u32 hclge_get_max_channels(struct hnae3_handle *handle)
7523 struct hclge_vport *vport = hclge_get_vport(handle); 7514 struct hclge_vport *vport = hclge_get_vport(handle);
7524 struct hclge_dev *hdev = vport->back; 7515 struct hclge_dev *hdev = vport->back;
7525 7516
7526 return min_t(u32, hdev->rss_size_max * kinfo->num_tc, hdev->num_tqps); 7517 return min_t(u32, hdev->rss_size_max,
7518 vport->alloc_tqps / kinfo->num_tc);
7527} 7519}
7528 7520
7529static void hclge_get_channels(struct hnae3_handle *handle, 7521static void hclge_get_channels(struct hnae3_handle *handle,
7530 struct ethtool_channels *ch) 7522 struct ethtool_channels *ch)
7531{ 7523{
7532 struct hclge_vport *vport = hclge_get_vport(handle);
7533
7534 ch->max_combined = hclge_get_max_channels(handle); 7524 ch->max_combined = hclge_get_max_channels(handle);
7535 ch->other_count = 1; 7525 ch->other_count = 1;
7536 ch->max_other = 1; 7526 ch->max_other = 1;
7537 ch->combined_count = vport->alloc_tqps; 7527 ch->combined_count = handle->kinfo.rss_size;
7538} 7528}
7539 7529
7540static void hclge_get_tqps_and_rss_info(struct hnae3_handle *handle, 7530static void hclge_get_tqps_and_rss_info(struct hnae3_handle *handle,
@@ -7547,25 +7537,6 @@ static void hclge_get_tqps_and_rss_info(struct hnae3_handle *handle,
7547 *max_rss_size = hdev->rss_size_max; 7537 *max_rss_size = hdev->rss_size_max;
7548} 7538}
7549 7539
7550static void hclge_release_tqp(struct hclge_vport *vport)
7551{
7552 struct hnae3_knic_private_info *kinfo = &vport->nic.kinfo;
7553 struct hclge_dev *hdev = vport->back;
7554 int i;
7555
7556 for (i = 0; i < kinfo->num_tqps; i++) {
7557 struct hclge_tqp *tqp =
7558 container_of(kinfo->tqp[i], struct hclge_tqp, q);
7559
7560 tqp->q.handle = NULL;
7561 tqp->q.tqp_index = 0;
7562 tqp->alloced = false;
7563 }
7564
7565 devm_kfree(&hdev->pdev->dev, kinfo->tqp);
7566 kinfo->tqp = NULL;
7567}
7568
7569static int hclge_set_channels(struct hnae3_handle *handle, u32 new_tqps_num) 7540static int hclge_set_channels(struct hnae3_handle *handle, u32 new_tqps_num)
7570{ 7541{
7571 struct hclge_vport *vport = hclge_get_vport(handle); 7542 struct hclge_vport *vport = hclge_get_vport(handle);
@@ -7580,24 +7551,11 @@ static int hclge_set_channels(struct hnae3_handle *handle, u32 new_tqps_num)
7580 u32 *rss_indir; 7551 u32 *rss_indir;
7581 int ret, i; 7552 int ret, i;
7582 7553
7583 /* Free old tqps, and reallocate with new tqp number when nic setup */ 7554 kinfo->req_rss_size = new_tqps_num;
7584 hclge_release_tqp(vport);
7585
7586 ret = hclge_knic_setup(vport, new_tqps_num, kinfo->num_desc);
7587 if (ret) {
7588 dev_err(&hdev->pdev->dev, "setup nic fail, ret =%d\n", ret);
7589 return ret;
7590 }
7591
7592 ret = hclge_map_tqp_to_vport(hdev, vport);
7593 if (ret) {
7594 dev_err(&hdev->pdev->dev, "map vport tqp fail, ret =%d\n", ret);
7595 return ret;
7596 }
7597 7555
7598 ret = hclge_tm_schd_init(hdev); 7556 ret = hclge_tm_vport_map_update(hdev);
7599 if (ret) { 7557 if (ret) {
7600 dev_err(&hdev->pdev->dev, "tm schd init fail, ret =%d\n", ret); 7558 dev_err(&hdev->pdev->dev, "tm vport map fail, ret =%d\n", ret);
7601 return ret; 7559 return ret;
7602 } 7560 }
7603 7561
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
index 6615b85a1c52..279ed2d83cb8 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
@@ -878,4 +878,6 @@ void hclge_vport_stop(struct hclge_vport *vport);
878int hclge_set_vport_mtu(struct hclge_vport *vport, int new_mtu); 878int hclge_set_vport_mtu(struct hclge_vport *vport, int new_mtu);
879int hclge_dbg_run_cmd(struct hnae3_handle *handle, char *cmd_buf); 879int hclge_dbg_run_cmd(struct hnae3_handle *handle, char *cmd_buf);
880u16 hclge_covert_handle_qid_global(struct hnae3_handle *handle, u16 queue_id); 880u16 hclge_covert_handle_qid_global(struct hnae3_handle *handle, u16 queue_id);
881int hclge_notify_client(struct hclge_dev *hdev,
882 enum hnae3_reset_notify_type type);
881#endif 883#endif
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
index 00458da67503..fb8596a3e5e4 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
@@ -517,19 +517,32 @@ static void hclge_tm_vport_tc_info_update(struct hclge_vport *vport)
517{ 517{
518 struct hnae3_knic_private_info *kinfo = &vport->nic.kinfo; 518 struct hnae3_knic_private_info *kinfo = &vport->nic.kinfo;
519 struct hclge_dev *hdev = vport->back; 519 struct hclge_dev *hdev = vport->back;
520 u16 max_rss_size;
520 u8 i; 521 u8 i;
521 522
522 vport->bw_limit = hdev->tm_info.pg_info[0].bw_limit; 523 vport->bw_limit = hdev->tm_info.pg_info[0].bw_limit;
523 kinfo->num_tc = 524 kinfo->num_tc = min_t(u16, vport->alloc_tqps, hdev->tm_info.num_tc);
524 min_t(u16, kinfo->num_tqps, hdev->tm_info.num_tc); 525 max_rss_size = min_t(u16, hdev->rss_size_max,
525 kinfo->rss_size 526 vport->alloc_tqps / kinfo->num_tc);
526 = min_t(u16, hdev->rss_size_max, 527
527 kinfo->num_tqps / kinfo->num_tc); 528 if (kinfo->req_rss_size != kinfo->rss_size && kinfo->req_rss_size &&
529 kinfo->req_rss_size <= max_rss_size) {
530 dev_info(&hdev->pdev->dev, "rss changes from %d to %d\n",
531 kinfo->rss_size, kinfo->req_rss_size);
532 kinfo->rss_size = kinfo->req_rss_size;
533 } else if (kinfo->rss_size > max_rss_size ||
534 (!kinfo->req_rss_size && kinfo->rss_size < max_rss_size)) {
535 dev_info(&hdev->pdev->dev, "rss changes from %d to %d\n",
536 kinfo->rss_size, max_rss_size);
537 kinfo->rss_size = max_rss_size;
538 }
539
540 kinfo->num_tqps = kinfo->num_tc * kinfo->rss_size;
528 vport->qs_offset = hdev->tm_info.num_tc * vport->vport_id; 541 vport->qs_offset = hdev->tm_info.num_tc * vport->vport_id;
529 vport->dwrr = 100; /* 100 percent as init */ 542 vport->dwrr = 100; /* 100 percent as init */
530 vport->alloc_rss_size = kinfo->rss_size; 543 vport->alloc_rss_size = kinfo->rss_size;
531 544
532 for (i = 0; i < kinfo->num_tc; i++) { 545 for (i = 0; i < HNAE3_MAX_TC; i++) {
533 if (hdev->hw_tc_map & BIT(i)) { 546 if (hdev->hw_tc_map & BIT(i)) {
534 kinfo->tc_info[i].enable = true; 547 kinfo->tc_info[i].enable = true;
535 kinfo->tc_info[i].tqp_offset = i * kinfo->rss_size; 548 kinfo->tc_info[i].tqp_offset = i * kinfo->rss_size;
@@ -1228,10 +1241,23 @@ static int hclge_mac_pause_setup_hw(struct hclge_dev *hdev)
1228 return hclge_mac_pause_en_cfg(hdev, tx_en, rx_en); 1241 return hclge_mac_pause_en_cfg(hdev, tx_en, rx_en);
1229} 1242}
1230 1243
1244static int hclge_tm_bp_setup(struct hclge_dev *hdev)
1245{
1246 int ret = 0;
1247 int i;
1248
1249 for (i = 0; i < hdev->tm_info.num_tc; i++) {
1250 ret = hclge_bp_setup_hw(hdev, i);
1251 if (ret)
1252 return ret;
1253 }
1254
1255 return ret;
1256}
1257
1231int hclge_pause_setup_hw(struct hclge_dev *hdev) 1258int hclge_pause_setup_hw(struct hclge_dev *hdev)
1232{ 1259{
1233 int ret; 1260 int ret;
1234 u8 i;
1235 1261
1236 ret = hclge_pause_param_setup_hw(hdev); 1262 ret = hclge_pause_param_setup_hw(hdev);
1237 if (ret) 1263 if (ret)
@@ -1250,13 +1276,7 @@ int hclge_pause_setup_hw(struct hclge_dev *hdev)
1250 if (ret) 1276 if (ret)
1251 dev_warn(&hdev->pdev->dev, "set pfc pause failed:%d\n", ret); 1277 dev_warn(&hdev->pdev->dev, "set pfc pause failed:%d\n", ret);
1252 1278
1253 for (i = 0; i < hdev->tm_info.num_tc; i++) { 1279 return hclge_tm_bp_setup(hdev);
1254 ret = hclge_bp_setup_hw(hdev, i);
1255 if (ret)
1256 return ret;
1257 }
1258
1259 return 0;
1260} 1280}
1261 1281
1262void hclge_tm_prio_tc_info_update(struct hclge_dev *hdev, u8 *prio_tc) 1282void hclge_tm_prio_tc_info_update(struct hclge_dev *hdev, u8 *prio_tc)
@@ -1327,3 +1347,20 @@ int hclge_tm_schd_init(struct hclge_dev *hdev)
1327 1347
1328 return hclge_tm_init_hw(hdev); 1348 return hclge_tm_init_hw(hdev);
1329} 1349}
1350
1351int hclge_tm_vport_map_update(struct hclge_dev *hdev)
1352{
1353 struct hclge_vport *vport = hdev->vport;
1354 int ret;
1355
1356 hclge_tm_vport_tc_info_update(vport);
1357
1358 ret = hclge_vport_q_to_qs_map(hdev, vport);
1359 if (ret)
1360 return ret;
1361
1362 if (!(hdev->flag & HCLGE_FLAG_DCB_ENABLE))
1363 return 0;
1364
1365 return hclge_tm_bp_setup(hdev);
1366}
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
index b6496a439304..898163c4d400 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
@@ -142,6 +142,7 @@ struct hclge_port_shapping_cmd {
142 (HCLGE_TM_SHAP_##string##_LSH)) 142 (HCLGE_TM_SHAP_##string##_LSH))
143 143
144int hclge_tm_schd_init(struct hclge_dev *hdev); 144int hclge_tm_schd_init(struct hclge_dev *hdev);
145int hclge_tm_vport_map_update(struct hclge_dev *hdev);
145int hclge_pause_setup_hw(struct hclge_dev *hdev); 146int hclge_pause_setup_hw(struct hclge_dev *hdev);
146int hclge_tm_schd_mode_hw(struct hclge_dev *hdev); 147int hclge_tm_schd_mode_hw(struct hclge_dev *hdev);
147void hclge_tm_prio_tc_info_update(struct hclge_dev *hdev, u8 *prio_tc); 148void hclge_tm_prio_tc_info_update(struct hclge_dev *hdev, u8 *prio_tc);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
index 82103d5fa815..bb9f45200ef5 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
@@ -1264,7 +1264,7 @@ static int hclgevf_reset_stack(struct hclgevf_dev *hdev)
1264 if (ret) 1264 if (ret)
1265 return ret; 1265 return ret;
1266 1266
1267 return 0; 1267 return hclgevf_notify_client(hdev, HNAE3_RESTORE_CLIENT);
1268} 1268}
1269 1269
1270static int hclgevf_reset_prepare_wait(struct hclgevf_dev *hdev) 1270static int hclgevf_reset_prepare_wait(struct hclgevf_dev *hdev)
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c
index 6b19607a4caa..3875f39f43bb 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c
@@ -1008,3 +1008,16 @@ int hinic_hwdev_hw_ci_addr_set(struct hinic_hwdev *hwdev, struct hinic_sq *sq,
1008 &hw_ci, sizeof(hw_ci), NULL, 1008 &hw_ci, sizeof(hw_ci), NULL,
1009 NULL, HINIC_MGMT_MSG_SYNC); 1009 NULL, HINIC_MGMT_MSG_SYNC);
1010} 1010}
1011
1012/**
1013 * hinic_hwdev_set_msix_state- set msix state
1014 * @hwdev: the NIC HW device
1015 * @msix_index: IRQ corresponding index number
1016 * @flag: msix state
1017 *
1018 **/
1019void hinic_hwdev_set_msix_state(struct hinic_hwdev *hwdev, u16 msix_index,
1020 enum hinic_msix_state flag)
1021{
1022 hinic_set_msix_state(hwdev->hwif, msix_index, flag);
1023}
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h
index d1a7d2522d82..c9e621e19dd0 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h
@@ -240,4 +240,7 @@ int hinic_hwdev_msix_set(struct hinic_hwdev *hwdev, u16 msix_index,
240int hinic_hwdev_hw_ci_addr_set(struct hinic_hwdev *hwdev, struct hinic_sq *sq, 240int hinic_hwdev_hw_ci_addr_set(struct hinic_hwdev *hwdev, struct hinic_sq *sq,
241 u8 pending_limit, u8 coalesc_timer); 241 u8 pending_limit, u8 coalesc_timer);
242 242
243void hinic_hwdev_set_msix_state(struct hinic_hwdev *hwdev, u16 msix_index,
244 enum hinic_msix_state flag);
245
243#endif 246#endif
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_if.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_if.c
index 823a17061a97..9b160f076904 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_if.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_if.c
@@ -168,6 +168,22 @@ void hinic_db_state_set(struct hinic_hwif *hwif,
168 hinic_hwif_write_reg(hwif, HINIC_CSR_FUNC_ATTR4_ADDR, attr4); 168 hinic_hwif_write_reg(hwif, HINIC_CSR_FUNC_ATTR4_ADDR, attr4);
169} 169}
170 170
171void hinic_set_msix_state(struct hinic_hwif *hwif, u16 msix_idx,
172 enum hinic_msix_state flag)
173{
174 u32 offset = msix_idx * HINIC_PCI_MSIX_ENTRY_SIZE +
175 HINIC_PCI_MSIX_ENTRY_VECTOR_CTRL;
176 u32 mask_bits;
177
178 mask_bits = readl(hwif->intr_regs_base + offset);
179 mask_bits &= ~HINIC_PCI_MSIX_ENTRY_CTRL_MASKBIT;
180
181 if (flag)
182 mask_bits |= HINIC_PCI_MSIX_ENTRY_CTRL_MASKBIT;
183
184 writel(mask_bits, hwif->intr_regs_base + offset);
185}
186
171/** 187/**
172 * hwif_ready - test if the HW is ready for use 188 * hwif_ready - test if the HW is ready for use
173 * @hwif: the HW interface of a pci function device 189 * @hwif: the HW interface of a pci function device
@@ -321,6 +337,13 @@ int hinic_init_hwif(struct hinic_hwif *hwif, struct pci_dev *pdev)
321 return -ENOMEM; 337 return -ENOMEM;
322 } 338 }
323 339
340 hwif->intr_regs_base = pci_ioremap_bar(pdev, HINIC_PCI_INTR_REGS_BAR);
341 if (!hwif->intr_regs_base) {
342 dev_err(&pdev->dev, "Failed to map configuration regs\n");
343 err = -ENOMEM;
344 goto err_map_intr_bar;
345 }
346
324 err = hwif_ready(hwif); 347 err = hwif_ready(hwif);
325 if (err) { 348 if (err) {
326 dev_err(&pdev->dev, "HW interface is not ready\n"); 349 dev_err(&pdev->dev, "HW interface is not ready\n");
@@ -337,7 +360,11 @@ int hinic_init_hwif(struct hinic_hwif *hwif, struct pci_dev *pdev)
337 return 0; 360 return 0;
338 361
339err_hwif_ready: 362err_hwif_ready:
363 iounmap(hwif->intr_regs_base);
364
365err_map_intr_bar:
340 iounmap(hwif->cfg_regs_bar); 366 iounmap(hwif->cfg_regs_bar);
367
341 return err; 368 return err;
342} 369}
343 370
@@ -347,5 +374,6 @@ err_hwif_ready:
347 **/ 374 **/
348void hinic_free_hwif(struct hinic_hwif *hwif) 375void hinic_free_hwif(struct hinic_hwif *hwif)
349{ 376{
377 iounmap(hwif->intr_regs_base);
350 iounmap(hwif->cfg_regs_bar); 378 iounmap(hwif->cfg_regs_bar);
351} 379}
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_if.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_if.h
index 5b4760c0e9f5..22ec7f73e0a6 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_if.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_if.h
@@ -152,6 +152,7 @@
152#define HINIC_IS_PPF(hwif) (HINIC_FUNC_TYPE(hwif) == HINIC_PPF) 152#define HINIC_IS_PPF(hwif) (HINIC_FUNC_TYPE(hwif) == HINIC_PPF)
153 153
154#define HINIC_PCI_CFG_REGS_BAR 0 154#define HINIC_PCI_CFG_REGS_BAR 0
155#define HINIC_PCI_INTR_REGS_BAR 2
155#define HINIC_PCI_DB_BAR 4 156#define HINIC_PCI_DB_BAR 4
156 157
157#define HINIC_PCIE_ST_DISABLE 0 158#define HINIC_PCIE_ST_DISABLE 0
@@ -164,6 +165,10 @@
164#define HINIC_EQ_MSIX_LLI_CREDIT_LIMIT_DEFAULT 0 /* Disabled */ 165#define HINIC_EQ_MSIX_LLI_CREDIT_LIMIT_DEFAULT 0 /* Disabled */
165#define HINIC_EQ_MSIX_RESEND_TIMER_DEFAULT 7 /* max */ 166#define HINIC_EQ_MSIX_RESEND_TIMER_DEFAULT 7 /* max */
166 167
168#define HINIC_PCI_MSIX_ENTRY_SIZE 16
169#define HINIC_PCI_MSIX_ENTRY_VECTOR_CTRL 12
170#define HINIC_PCI_MSIX_ENTRY_CTRL_MASKBIT 1
171
167enum hinic_pcie_nosnoop { 172enum hinic_pcie_nosnoop {
168 HINIC_PCIE_SNOOP = 0, 173 HINIC_PCIE_SNOOP = 0,
169 HINIC_PCIE_NO_SNOOP = 1, 174 HINIC_PCIE_NO_SNOOP = 1,
@@ -207,6 +212,11 @@ enum hinic_db_state {
207 HINIC_DB_DISABLE = 1, 212 HINIC_DB_DISABLE = 1,
208}; 213};
209 214
215enum hinic_msix_state {
216 HINIC_MSIX_ENABLE,
217 HINIC_MSIX_DISABLE,
218};
219
210struct hinic_func_attr { 220struct hinic_func_attr {
211 u16 func_idx; 221 u16 func_idx;
212 u8 pf_idx; 222 u8 pf_idx;
@@ -226,6 +236,7 @@ struct hinic_func_attr {
226struct hinic_hwif { 236struct hinic_hwif {
227 struct pci_dev *pdev; 237 struct pci_dev *pdev;
228 void __iomem *cfg_regs_bar; 238 void __iomem *cfg_regs_bar;
239 void __iomem *intr_regs_base;
229 240
230 struct hinic_func_attr attr; 241 struct hinic_func_attr attr;
231}; 242};
@@ -251,6 +262,9 @@ int hinic_msix_attr_get(struct hinic_hwif *hwif, u16 msix_index,
251 u8 *lli_timer, u8 *lli_credit_limit, 262 u8 *lli_timer, u8 *lli_credit_limit,
252 u8 *resend_timer); 263 u8 *resend_timer);
253 264
265void hinic_set_msix_state(struct hinic_hwif *hwif, u16 msix_idx,
266 enum hinic_msix_state flag);
267
254int hinic_msix_attr_cnt_clear(struct hinic_hwif *hwif, u16 msix_index); 268int hinic_msix_attr_cnt_clear(struct hinic_hwif *hwif, u16 msix_index);
255 269
256void hinic_set_pf_action(struct hinic_hwif *hwif, enum hinic_pf_action action); 270void hinic_set_pf_action(struct hinic_hwif *hwif, enum hinic_pf_action action);
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_rx.c b/drivers/net/ethernet/huawei/hinic/hinic_rx.c
index 0098b206e7e9..b6d218768ec1 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_rx.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_rx.c
@@ -381,6 +381,7 @@ static int rxq_recv(struct hinic_rxq *rxq, int budget)
381static int rx_poll(struct napi_struct *napi, int budget) 381static int rx_poll(struct napi_struct *napi, int budget)
382{ 382{
383 struct hinic_rxq *rxq = container_of(napi, struct hinic_rxq, napi); 383 struct hinic_rxq *rxq = container_of(napi, struct hinic_rxq, napi);
384 struct hinic_dev *nic_dev = netdev_priv(rxq->netdev);
384 struct hinic_rq *rq = rxq->rq; 385 struct hinic_rq *rq = rxq->rq;
385 int pkts; 386 int pkts;
386 387
@@ -389,7 +390,10 @@ static int rx_poll(struct napi_struct *napi, int budget)
389 return budget; 390 return budget;
390 391
391 napi_complete(napi); 392 napi_complete(napi);
392 enable_irq(rq->irq); 393 hinic_hwdev_set_msix_state(nic_dev->hwdev,
394 rq->msix_entry,
395 HINIC_MSIX_ENABLE);
396
393 return pkts; 397 return pkts;
394} 398}
395 399
@@ -414,7 +418,10 @@ static irqreturn_t rx_irq(int irq, void *data)
414 struct hinic_dev *nic_dev; 418 struct hinic_dev *nic_dev;
415 419
416 /* Disable the interrupt until napi will be completed */ 420 /* Disable the interrupt until napi will be completed */
417 disable_irq_nosync(rq->irq); 421 nic_dev = netdev_priv(rxq->netdev);
422 hinic_hwdev_set_msix_state(nic_dev->hwdev,
423 rq->msix_entry,
424 HINIC_MSIX_DISABLE);
418 425
419 nic_dev = netdev_priv(rxq->netdev); 426 nic_dev = netdev_priv(rxq->netdev);
420 hinic_hwdev_msix_cnt_set(nic_dev->hwdev, rq->msix_entry); 427 hinic_hwdev_msix_cnt_set(nic_dev->hwdev, rq->msix_entry);
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_tx.c b/drivers/net/ethernet/huawei/hinic/hinic_tx.c
index 11e73e67358d..e17bf33eba0c 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_tx.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_tx.c
@@ -655,7 +655,9 @@ static int free_tx_poll(struct napi_struct *napi, int budget)
655 655
656 if (pkts < budget) { 656 if (pkts < budget) {
657 napi_complete(napi); 657 napi_complete(napi);
658 enable_irq(sq->irq); 658 hinic_hwdev_set_msix_state(nic_dev->hwdev,
659 sq->msix_entry,
660 HINIC_MSIX_ENABLE);
659 return pkts; 661 return pkts;
660 } 662 }
661 663
@@ -682,7 +684,9 @@ static irqreturn_t tx_irq(int irq, void *data)
682 nic_dev = netdev_priv(txq->netdev); 684 nic_dev = netdev_priv(txq->netdev);
683 685
684 /* Disable the interrupt until napi will be completed */ 686 /* Disable the interrupt until napi will be completed */
685 disable_irq_nosync(txq->sq->irq); 687 hinic_hwdev_set_msix_state(nic_dev->hwdev,
688 txq->sq->msix_entry,
689 HINIC_MSIX_DISABLE);
686 690
687 hinic_hwdev_msix_cnt_set(nic_dev->hwdev, txq->sq->msix_entry); 691 hinic_hwdev_msix_cnt_set(nic_dev->hwdev, txq->sq->msix_entry);
688 692
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index f52e2c46e6a7..0ee641c41be4 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -11644,7 +11644,8 @@ static int i40e_get_phys_port_id(struct net_device *netdev,
11644static int i40e_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], 11644static int i40e_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
11645 struct net_device *dev, 11645 struct net_device *dev,
11646 const unsigned char *addr, u16 vid, 11646 const unsigned char *addr, u16 vid,
11647 u16 flags) 11647 u16 flags,
11648 struct netlink_ext_ack *extack)
11648{ 11649{
11649 struct i40e_netdev_priv *np = netdev_priv(dev); 11650 struct i40e_netdev_priv *np = netdev_priv(dev);
11650 struct i40e_pf *pf = np->vsi->back; 11651 struct i40e_pf *pf = np->vsi->back;
diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index a385575600f6..55944e089558 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -26,6 +26,7 @@
26#include <linux/bitmap.h> 26#include <linux/bitmap.h>
27#include <linux/log2.h> 27#include <linux/log2.h>
28#include <linux/ip.h> 28#include <linux/ip.h>
29#include <linux/sctp.h>
29#include <linux/ipv6.h> 30#include <linux/ipv6.h>
30#include <linux/if_bridge.h> 31#include <linux/if_bridge.h>
31#include <linux/avf/virtchnl.h> 32#include <linux/avf/virtchnl.h>
@@ -110,6 +111,9 @@ extern const char ice_drv_ver[];
110#define ice_for_each_alloc_rxq(vsi, i) \ 111#define ice_for_each_alloc_rxq(vsi, i) \
111 for ((i) = 0; (i) < (vsi)->alloc_rxq; (i)++) 112 for ((i) = 0; (i) < (vsi)->alloc_rxq; (i)++)
112 113
114#define ice_for_each_q_vector(vsi, i) \
115 for ((i) = 0; (i) < (vsi)->num_q_vectors; (i)++)
116
113struct ice_tc_info { 117struct ice_tc_info {
114 u16 qoffset; 118 u16 qoffset;
115 u16 qcount_tx; 119 u16 qcount_tx;
@@ -129,6 +133,17 @@ struct ice_res_tracker {
129 u16 list[1]; 133 u16 list[1];
130}; 134};
131 135
136struct ice_qs_cfg {
137 struct mutex *qs_mutex; /* will be assgined to &pf->avail_q_mutex */
138 unsigned long *pf_map;
139 unsigned long pf_map_size;
140 unsigned int q_count;
141 unsigned int scatter_count;
142 u16 *vsi_map;
143 u16 vsi_map_offset;
144 u8 mapping_mode;
145};
146
132struct ice_sw { 147struct ice_sw {
133 struct ice_pf *pf; 148 struct ice_pf *pf;
134 u16 sw_id; /* switch ID for this switch */ 149 u16 sw_id; /* switch ID for this switch */
@@ -270,6 +285,7 @@ enum ice_pf_flags {
270 ICE_FLAG_RSS_ENA, 285 ICE_FLAG_RSS_ENA,
271 ICE_FLAG_SRIOV_ENA, 286 ICE_FLAG_SRIOV_ENA,
272 ICE_FLAG_SRIOV_CAPABLE, 287 ICE_FLAG_SRIOV_CAPABLE,
288 ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA,
273 ICE_PF_FLAGS_NBITS /* must be last */ 289 ICE_PF_FLAGS_NBITS /* must be last */
274}; 290};
275 291
diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
index fcdcd80b18e7..242c78469181 100644
--- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
@@ -657,8 +657,13 @@ struct ice_aqc_get_topo {
657 657
658/* Update TSE (indirect 0x0403) 658/* Update TSE (indirect 0x0403)
659 * Get TSE (indirect 0x0404) 659 * Get TSE (indirect 0x0404)
660 * Add TSE (indirect 0x0401)
661 * Delete TSE (indirect 0x040F)
662 * Move TSE (indirect 0x0408)
663 * Suspend Nodes (indirect 0x0409)
664 * Resume Nodes (indirect 0x040A)
660 */ 665 */
661struct ice_aqc_get_cfg_elem { 666struct ice_aqc_sched_elem_cmd {
662 __le16 num_elem_req; /* Used by commands */ 667 __le16 num_elem_req; /* Used by commands */
663 __le16 num_elem_resp; /* Used by responses */ 668 __le16 num_elem_resp; /* Used by responses */
664 __le32 reserved; 669 __le32 reserved;
@@ -674,18 +679,6 @@ struct ice_aqc_suspend_resume_elem {
674 __le32 teid[1]; 679 __le32 teid[1];
675}; 680};
676 681
677/* Add TSE (indirect 0x0401)
678 * Delete TSE (indirect 0x040F)
679 * Move TSE (indirect 0x0408)
680 */
681struct ice_aqc_add_move_delete_elem {
682 __le16 num_grps_req;
683 __le16 num_grps_updated;
684 __le32 reserved;
685 __le32 addr_high;
686 __le32 addr_low;
687};
688
689struct ice_aqc_elem_info_bw { 682struct ice_aqc_elem_info_bw {
690 __le16 bw_profile_idx; 683 __le16 bw_profile_idx;
691 __le16 bw_alloc; 684 __le16 bw_alloc;
@@ -854,11 +847,46 @@ struct ice_aqc_get_phy_caps {
854#define ICE_PHY_TYPE_LOW_40GBASE_KR4 BIT_ULL(33) 847#define ICE_PHY_TYPE_LOW_40GBASE_KR4 BIT_ULL(33)
855#define ICE_PHY_TYPE_LOW_40G_XLAUI_AOC_ACC BIT_ULL(34) 848#define ICE_PHY_TYPE_LOW_40G_XLAUI_AOC_ACC BIT_ULL(34)
856#define ICE_PHY_TYPE_LOW_40G_XLAUI BIT_ULL(35) 849#define ICE_PHY_TYPE_LOW_40G_XLAUI BIT_ULL(35)
850#define ICE_PHY_TYPE_LOW_50GBASE_CR2 BIT_ULL(36)
851#define ICE_PHY_TYPE_LOW_50GBASE_SR2 BIT_ULL(37)
852#define ICE_PHY_TYPE_LOW_50GBASE_LR2 BIT_ULL(38)
853#define ICE_PHY_TYPE_LOW_50GBASE_KR2 BIT_ULL(39)
854#define ICE_PHY_TYPE_LOW_50G_LAUI2_AOC_ACC BIT_ULL(40)
855#define ICE_PHY_TYPE_LOW_50G_LAUI2 BIT_ULL(41)
856#define ICE_PHY_TYPE_LOW_50G_AUI2_AOC_ACC BIT_ULL(42)
857#define ICE_PHY_TYPE_LOW_50G_AUI2 BIT_ULL(43)
858#define ICE_PHY_TYPE_LOW_50GBASE_CP BIT_ULL(44)
859#define ICE_PHY_TYPE_LOW_50GBASE_SR BIT_ULL(45)
860#define ICE_PHY_TYPE_LOW_50GBASE_FR BIT_ULL(46)
861#define ICE_PHY_TYPE_LOW_50GBASE_LR BIT_ULL(47)
862#define ICE_PHY_TYPE_LOW_50GBASE_KR_PAM4 BIT_ULL(48)
863#define ICE_PHY_TYPE_LOW_50G_AUI1_AOC_ACC BIT_ULL(49)
864#define ICE_PHY_TYPE_LOW_50G_AUI1 BIT_ULL(50)
865#define ICE_PHY_TYPE_LOW_100GBASE_CR4 BIT_ULL(51)
866#define ICE_PHY_TYPE_LOW_100GBASE_SR4 BIT_ULL(52)
867#define ICE_PHY_TYPE_LOW_100GBASE_LR4 BIT_ULL(53)
868#define ICE_PHY_TYPE_LOW_100GBASE_KR4 BIT_ULL(54)
869#define ICE_PHY_TYPE_LOW_100G_CAUI4_AOC_ACC BIT_ULL(55)
870#define ICE_PHY_TYPE_LOW_100G_CAUI4 BIT_ULL(56)
871#define ICE_PHY_TYPE_LOW_100G_AUI4_AOC_ACC BIT_ULL(57)
872#define ICE_PHY_TYPE_LOW_100G_AUI4 BIT_ULL(58)
873#define ICE_PHY_TYPE_LOW_100GBASE_CR_PAM4 BIT_ULL(59)
874#define ICE_PHY_TYPE_LOW_100GBASE_KR_PAM4 BIT_ULL(60)
875#define ICE_PHY_TYPE_LOW_100GBASE_CP2 BIT_ULL(61)
876#define ICE_PHY_TYPE_LOW_100GBASE_SR2 BIT_ULL(62)
877#define ICE_PHY_TYPE_LOW_100GBASE_DR BIT_ULL(63)
857#define ICE_PHY_TYPE_LOW_MAX_INDEX 63 878#define ICE_PHY_TYPE_LOW_MAX_INDEX 63
879/* The second set of defines is for phy_type_high. */
880#define ICE_PHY_TYPE_HIGH_100GBASE_KR2_PAM4 BIT_ULL(0)
881#define ICE_PHY_TYPE_HIGH_100G_CAUI2_AOC_ACC BIT_ULL(1)
882#define ICE_PHY_TYPE_HIGH_100G_CAUI2 BIT_ULL(2)
883#define ICE_PHY_TYPE_HIGH_100G_AUI2_AOC_ACC BIT_ULL(3)
884#define ICE_PHY_TYPE_HIGH_100G_AUI2 BIT_ULL(4)
885#define ICE_PHY_TYPE_HIGH_MAX_INDEX 19
858 886
859struct ice_aqc_get_phy_caps_data { 887struct ice_aqc_get_phy_caps_data {
860 __le64 phy_type_low; /* Use values from ICE_PHY_TYPE_LOW_* */ 888 __le64 phy_type_low; /* Use values from ICE_PHY_TYPE_LOW_* */
861 __le64 reserved; 889 __le64 phy_type_high; /* Use values from ICE_PHY_TYPE_HIGH_* */
862 u8 caps; 890 u8 caps;
863#define ICE_AQC_PHY_EN_TX_LINK_PAUSE BIT(0) 891#define ICE_AQC_PHY_EN_TX_LINK_PAUSE BIT(0)
864#define ICE_AQC_PHY_EN_RX_LINK_PAUSE BIT(1) 892#define ICE_AQC_PHY_EN_RX_LINK_PAUSE BIT(1)
@@ -923,7 +951,7 @@ struct ice_aqc_set_phy_cfg {
923/* Set PHY config command data structure */ 951/* Set PHY config command data structure */
924struct ice_aqc_set_phy_cfg_data { 952struct ice_aqc_set_phy_cfg_data {
925 __le64 phy_type_low; /* Use values from ICE_PHY_TYPE_LOW_* */ 953 __le64 phy_type_low; /* Use values from ICE_PHY_TYPE_LOW_* */
926 __le64 rsvd0; 954 __le64 phy_type_high; /* Use values from ICE_PHY_TYPE_HIGH_* */
927 u8 caps; 955 u8 caps;
928#define ICE_AQ_PHY_ENA_TX_PAUSE_ABILITY BIT(0) 956#define ICE_AQ_PHY_ENA_TX_PAUSE_ABILITY BIT(0)
929#define ICE_AQ_PHY_ENA_RX_PAUSE_ABILITY BIT(1) 957#define ICE_AQ_PHY_ENA_RX_PAUSE_ABILITY BIT(1)
@@ -1032,10 +1060,12 @@ struct ice_aqc_get_link_status_data {
1032#define ICE_AQ_LINK_SPEED_20GB BIT(6) 1060#define ICE_AQ_LINK_SPEED_20GB BIT(6)
1033#define ICE_AQ_LINK_SPEED_25GB BIT(7) 1061#define ICE_AQ_LINK_SPEED_25GB BIT(7)
1034#define ICE_AQ_LINK_SPEED_40GB BIT(8) 1062#define ICE_AQ_LINK_SPEED_40GB BIT(8)
1063#define ICE_AQ_LINK_SPEED_50GB BIT(9)
1064#define ICE_AQ_LINK_SPEED_100GB BIT(10)
1035#define ICE_AQ_LINK_SPEED_UNKNOWN BIT(15) 1065#define ICE_AQ_LINK_SPEED_UNKNOWN BIT(15)
1036 __le32 reserved3; /* Aligns next field to 8-byte boundary */ 1066 __le32 reserved3; /* Aligns next field to 8-byte boundary */
1037 __le64 phy_type_low; /* Use values from ICE_PHY_TYPE_LOW_* */ 1067 __le64 phy_type_low; /* Use values from ICE_PHY_TYPE_LOW_* */
1038 __le64 reserved4; 1068 __le64 phy_type_high; /* Use values from ICE_PHY_TYPE_HIGH_* */
1039}; 1069};
1040 1070
1041/* Set event mask command (direct 0x0613) */ 1071/* Set event mask command (direct 0x0613) */
@@ -1055,6 +1085,16 @@ struct ice_aqc_set_event_mask {
1055 u8 reserved1[6]; 1085 u8 reserved1[6];
1056}; 1086};
1057 1087
1088/* Set Port Identification LED (direct, 0x06E9) */
1089struct ice_aqc_set_port_id_led {
1090 u8 lport_num;
1091 u8 lport_num_valid;
1092 u8 ident_mode;
1093#define ICE_AQC_PORT_IDENT_LED_BLINK BIT(0)
1094#define ICE_AQC_PORT_IDENT_LED_ORIG 0
1095 u8 rsvd[13];
1096};
1097
1058/* NVM Read command (indirect 0x0701) 1098/* NVM Read command (indirect 0x0701)
1059 * NVM Erase commands (direct 0x0702) 1099 * NVM Erase commands (direct 0x0702)
1060 * NVM Update commands (indirect 0x0703) 1100 * NVM Update commands (indirect 0x0703)
@@ -1341,12 +1381,12 @@ struct ice_aq_desc {
1341 struct ice_aqc_get_phy_caps get_phy; 1381 struct ice_aqc_get_phy_caps get_phy;
1342 struct ice_aqc_set_phy_cfg set_phy; 1382 struct ice_aqc_set_phy_cfg set_phy;
1343 struct ice_aqc_restart_an restart_an; 1383 struct ice_aqc_restart_an restart_an;
1384 struct ice_aqc_set_port_id_led set_port_id_led;
1344 struct ice_aqc_get_sw_cfg get_sw_conf; 1385 struct ice_aqc_get_sw_cfg get_sw_conf;
1345 struct ice_aqc_sw_rules sw_rules; 1386 struct ice_aqc_sw_rules sw_rules;
1346 struct ice_aqc_get_topo get_topo; 1387 struct ice_aqc_get_topo get_topo;
1347 struct ice_aqc_get_cfg_elem get_update_elem; 1388 struct ice_aqc_sched_elem_cmd sched_elem_cmd;
1348 struct ice_aqc_query_txsched_res query_sched_res; 1389 struct ice_aqc_query_txsched_res query_sched_res;
1349 struct ice_aqc_add_move_delete_elem add_move_delete_elem;
1350 struct ice_aqc_nvm nvm; 1390 struct ice_aqc_nvm nvm;
1351 struct ice_aqc_pf_vf_msg virt; 1391 struct ice_aqc_pf_vf_msg virt;
1352 struct ice_aqc_get_set_rss_lut get_set_rss_lut; 1392 struct ice_aqc_get_set_rss_lut get_set_rss_lut;
@@ -1442,6 +1482,7 @@ enum ice_adminq_opc {
1442 ice_aqc_opc_restart_an = 0x0605, 1482 ice_aqc_opc_restart_an = 0x0605,
1443 ice_aqc_opc_get_link_status = 0x0607, 1483 ice_aqc_opc_get_link_status = 0x0607,
1444 ice_aqc_opc_set_event_mask = 0x0613, 1484 ice_aqc_opc_set_event_mask = 0x0613,
1485 ice_aqc_opc_set_port_id_led = 0x06E9,
1445 1486
1446 /* NVM commands */ 1487 /* NVM commands */
1447 ice_aqc_opc_nvm_read = 0x0701, 1488 ice_aqc_opc_nvm_read = 0x0701,
diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c
index 4c1d35da940d..b17ade424423 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.c
+++ b/drivers/net/ethernet/intel/ice/ice_common.c
@@ -165,8 +165,10 @@ ice_aq_get_phy_caps(struct ice_port_info *pi, bool qual_mods, u8 report_mode,
165 cmd->param0 |= cpu_to_le16(report_mode); 165 cmd->param0 |= cpu_to_le16(report_mode);
166 status = ice_aq_send_cmd(pi->hw, &desc, pcaps, pcaps_size, cd); 166 status = ice_aq_send_cmd(pi->hw, &desc, pcaps, pcaps_size, cd);
167 167
168 if (!status && report_mode == ICE_AQC_REPORT_TOPO_CAP) 168 if (!status && report_mode == ICE_AQC_REPORT_TOPO_CAP) {
169 pi->phy.phy_type_low = le64_to_cpu(pcaps->phy_type_low); 169 pi->phy.phy_type_low = le64_to_cpu(pcaps->phy_type_low);
170 pi->phy.phy_type_high = le64_to_cpu(pcaps->phy_type_high);
171 }
170 172
171 return status; 173 return status;
172} 174}
@@ -183,6 +185,9 @@ static enum ice_media_type ice_get_media_type(struct ice_port_info *pi)
183 return ICE_MEDIA_UNKNOWN; 185 return ICE_MEDIA_UNKNOWN;
184 186
185 hw_link_info = &pi->phy.link_info; 187 hw_link_info = &pi->phy.link_info;
188 if (hw_link_info->phy_type_low && hw_link_info->phy_type_high)
189 /* If more than one media type is selected, report unknown */
190 return ICE_MEDIA_UNKNOWN;
186 191
187 if (hw_link_info->phy_type_low) { 192 if (hw_link_info->phy_type_low) {
188 switch (hw_link_info->phy_type_low) { 193 switch (hw_link_info->phy_type_low) {
@@ -196,6 +201,15 @@ static enum ice_media_type ice_get_media_type(struct ice_port_info *pi)
196 case ICE_PHY_TYPE_LOW_25G_AUI_C2C: 201 case ICE_PHY_TYPE_LOW_25G_AUI_C2C:
197 case ICE_PHY_TYPE_LOW_40GBASE_SR4: 202 case ICE_PHY_TYPE_LOW_40GBASE_SR4:
198 case ICE_PHY_TYPE_LOW_40GBASE_LR4: 203 case ICE_PHY_TYPE_LOW_40GBASE_LR4:
204 case ICE_PHY_TYPE_LOW_50GBASE_SR2:
205 case ICE_PHY_TYPE_LOW_50GBASE_LR2:
206 case ICE_PHY_TYPE_LOW_50GBASE_SR:
207 case ICE_PHY_TYPE_LOW_50GBASE_FR:
208 case ICE_PHY_TYPE_LOW_50GBASE_LR:
209 case ICE_PHY_TYPE_LOW_100GBASE_SR4:
210 case ICE_PHY_TYPE_LOW_100GBASE_LR4:
211 case ICE_PHY_TYPE_LOW_100GBASE_SR2:
212 case ICE_PHY_TYPE_LOW_100GBASE_DR:
199 return ICE_MEDIA_FIBER; 213 return ICE_MEDIA_FIBER;
200 case ICE_PHY_TYPE_LOW_100BASE_TX: 214 case ICE_PHY_TYPE_LOW_100BASE_TX:
201 case ICE_PHY_TYPE_LOW_1000BASE_T: 215 case ICE_PHY_TYPE_LOW_1000BASE_T:
@@ -209,6 +223,11 @@ static enum ice_media_type ice_get_media_type(struct ice_port_info *pi)
209 case ICE_PHY_TYPE_LOW_25GBASE_CR_S: 223 case ICE_PHY_TYPE_LOW_25GBASE_CR_S:
210 case ICE_PHY_TYPE_LOW_25GBASE_CR1: 224 case ICE_PHY_TYPE_LOW_25GBASE_CR1:
211 case ICE_PHY_TYPE_LOW_40GBASE_CR4: 225 case ICE_PHY_TYPE_LOW_40GBASE_CR4:
226 case ICE_PHY_TYPE_LOW_50GBASE_CR2:
227 case ICE_PHY_TYPE_LOW_50GBASE_CP:
228 case ICE_PHY_TYPE_LOW_100GBASE_CR4:
229 case ICE_PHY_TYPE_LOW_100GBASE_CR_PAM4:
230 case ICE_PHY_TYPE_LOW_100GBASE_CP2:
212 return ICE_MEDIA_DA; 231 return ICE_MEDIA_DA;
213 case ICE_PHY_TYPE_LOW_1000BASE_KX: 232 case ICE_PHY_TYPE_LOW_1000BASE_KX:
214 case ICE_PHY_TYPE_LOW_2500BASE_KX: 233 case ICE_PHY_TYPE_LOW_2500BASE_KX:
@@ -219,10 +238,18 @@ static enum ice_media_type ice_get_media_type(struct ice_port_info *pi)
219 case ICE_PHY_TYPE_LOW_25GBASE_KR1: 238 case ICE_PHY_TYPE_LOW_25GBASE_KR1:
220 case ICE_PHY_TYPE_LOW_25GBASE_KR_S: 239 case ICE_PHY_TYPE_LOW_25GBASE_KR_S:
221 case ICE_PHY_TYPE_LOW_40GBASE_KR4: 240 case ICE_PHY_TYPE_LOW_40GBASE_KR4:
241 case ICE_PHY_TYPE_LOW_50GBASE_KR_PAM4:
242 case ICE_PHY_TYPE_LOW_50GBASE_KR2:
243 case ICE_PHY_TYPE_LOW_100GBASE_KR4:
244 case ICE_PHY_TYPE_LOW_100GBASE_KR_PAM4:
245 return ICE_MEDIA_BACKPLANE;
246 }
247 } else {
248 switch (hw_link_info->phy_type_high) {
249 case ICE_PHY_TYPE_HIGH_100GBASE_KR2_PAM4:
222 return ICE_MEDIA_BACKPLANE; 250 return ICE_MEDIA_BACKPLANE;
223 } 251 }
224 } 252 }
225
226 return ICE_MEDIA_UNKNOWN; 253 return ICE_MEDIA_UNKNOWN;
227} 254}
228 255
@@ -274,6 +301,7 @@ ice_aq_get_link_info(struct ice_port_info *pi, bool ena_lse,
274 /* update current link status information */ 301 /* update current link status information */
275 hw_link_info->link_speed = le16_to_cpu(link_data.link_speed); 302 hw_link_info->link_speed = le16_to_cpu(link_data.link_speed);
276 hw_link_info->phy_type_low = le64_to_cpu(link_data.phy_type_low); 303 hw_link_info->phy_type_low = le64_to_cpu(link_data.phy_type_low);
304 hw_link_info->phy_type_high = le64_to_cpu(link_data.phy_type_high);
277 *hw_media_type = ice_get_media_type(pi); 305 *hw_media_type = ice_get_media_type(pi);
278 hw_link_info->link_info = link_data.link_info; 306 hw_link_info->link_info = link_data.link_info;
279 hw_link_info->an_info = link_data.an_info; 307 hw_link_info->an_info = link_data.an_info;
@@ -750,6 +778,7 @@ enum ice_status ice_init_hw(struct ice_hw *hw)
750 status = ICE_ERR_CFG; 778 status = ICE_ERR_CFG;
751 goto err_unroll_sched; 779 goto err_unroll_sched;
752 } 780 }
781 INIT_LIST_HEAD(&hw->agg_list);
753 782
754 status = ice_init_fltr_mgmt_struct(hw); 783 status = ice_init_fltr_mgmt_struct(hw);
755 if (status) 784 if (status)
@@ -800,6 +829,7 @@ void ice_deinit_hw(struct ice_hw *hw)
800 ice_cleanup_fltr_mgmt_struct(hw); 829 ice_cleanup_fltr_mgmt_struct(hw);
801 830
802 ice_sched_cleanup_all(hw); 831 ice_sched_cleanup_all(hw);
832 ice_sched_clear_agg(hw);
803 833
804 if (hw->port_info) { 834 if (hw->port_info) {
805 devm_kfree(ice_hw_to_dev(hw), hw->port_info); 835 devm_kfree(ice_hw_to_dev(hw), hw->port_info);
@@ -1655,7 +1685,7 @@ enum ice_status ice_get_caps(struct ice_hw *hw)
1655 * This function is used to write MAC address to the NVM (0x0108). 1685 * This function is used to write MAC address to the NVM (0x0108).
1656 */ 1686 */
1657enum ice_status 1687enum ice_status
1658ice_aq_manage_mac_write(struct ice_hw *hw, u8 *mac_addr, u8 flags, 1688ice_aq_manage_mac_write(struct ice_hw *hw, const u8 *mac_addr, u8 flags,
1659 struct ice_sq_cd *cd) 1689 struct ice_sq_cd *cd)
1660{ 1690{
1661 struct ice_aqc_manage_mac_write *cmd; 1691 struct ice_aqc_manage_mac_write *cmd;
@@ -1667,8 +1697,8 @@ ice_aq_manage_mac_write(struct ice_hw *hw, u8 *mac_addr, u8 flags,
1667 cmd->flags = flags; 1697 cmd->flags = flags;
1668 1698
1669 /* Prep values for flags, sah, sal */ 1699 /* Prep values for flags, sah, sal */
1670 cmd->sah = htons(*((u16 *)mac_addr)); 1700 cmd->sah = htons(*((const u16 *)mac_addr));
1671 cmd->sal = htonl(*((u32 *)(mac_addr + 2))); 1701 cmd->sal = htonl(*((const u32 *)(mac_addr + 2)));
1672 1702
1673 return ice_aq_send_cmd(hw, &desc, NULL, 0, cd); 1703 return ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
1674} 1704}
@@ -1705,16 +1735,20 @@ void ice_clear_pxe_mode(struct ice_hw *hw)
1705/** 1735/**
1706 * ice_get_link_speed_based_on_phy_type - returns link speed 1736 * ice_get_link_speed_based_on_phy_type - returns link speed
1707 * @phy_type_low: lower part of phy_type 1737 * @phy_type_low: lower part of phy_type
1738 * @phy_type_high: higher part of phy_type
1708 * 1739 *
1709 * This helper function will convert a phy_type_low to its corresponding link 1740 * This helper function will convert an entry in phy type structure
1741 * [phy_type_low, phy_type_high] to its corresponding link speed.
1742 * Note: In the structure of [phy_type_low, phy_type_high], there should
1743 * be one bit set, as this function will convert one phy type to its
1710 * speed. 1744 * speed.
1711 * Note: In the structure of phy_type_low, there should be one bit set, as
1712 * this function will convert one phy type to its speed.
1713 * If no bit gets set, ICE_LINK_SPEED_UNKNOWN will be returned 1745 * If no bit gets set, ICE_LINK_SPEED_UNKNOWN will be returned
1714 * If more than one bit gets set, ICE_LINK_SPEED_UNKNOWN will be returned 1746 * If more than one bit gets set, ICE_LINK_SPEED_UNKNOWN will be returned
1715 */ 1747 */
1716static u16 ice_get_link_speed_based_on_phy_type(u64 phy_type_low) 1748static u16
1749ice_get_link_speed_based_on_phy_type(u64 phy_type_low, u64 phy_type_high)
1717{ 1750{
1751 u16 speed_phy_type_high = ICE_AQ_LINK_SPEED_UNKNOWN;
1718 u16 speed_phy_type_low = ICE_AQ_LINK_SPEED_UNKNOWN; 1752 u16 speed_phy_type_low = ICE_AQ_LINK_SPEED_UNKNOWN;
1719 1753
1720 switch (phy_type_low) { 1754 switch (phy_type_low) {
@@ -1768,41 +1802,110 @@ static u16 ice_get_link_speed_based_on_phy_type(u64 phy_type_low)
1768 case ICE_PHY_TYPE_LOW_40G_XLAUI: 1802 case ICE_PHY_TYPE_LOW_40G_XLAUI:
1769 speed_phy_type_low = ICE_AQ_LINK_SPEED_40GB; 1803 speed_phy_type_low = ICE_AQ_LINK_SPEED_40GB;
1770 break; 1804 break;
1805 case ICE_PHY_TYPE_LOW_50GBASE_CR2:
1806 case ICE_PHY_TYPE_LOW_50GBASE_SR2:
1807 case ICE_PHY_TYPE_LOW_50GBASE_LR2:
1808 case ICE_PHY_TYPE_LOW_50GBASE_KR2:
1809 case ICE_PHY_TYPE_LOW_50G_LAUI2_AOC_ACC:
1810 case ICE_PHY_TYPE_LOW_50G_LAUI2:
1811 case ICE_PHY_TYPE_LOW_50G_AUI2_AOC_ACC:
1812 case ICE_PHY_TYPE_LOW_50G_AUI2:
1813 case ICE_PHY_TYPE_LOW_50GBASE_CP:
1814 case ICE_PHY_TYPE_LOW_50GBASE_SR:
1815 case ICE_PHY_TYPE_LOW_50GBASE_FR:
1816 case ICE_PHY_TYPE_LOW_50GBASE_LR:
1817 case ICE_PHY_TYPE_LOW_50GBASE_KR_PAM4:
1818 case ICE_PHY_TYPE_LOW_50G_AUI1_AOC_ACC:
1819 case ICE_PHY_TYPE_LOW_50G_AUI1:
1820 speed_phy_type_low = ICE_AQ_LINK_SPEED_50GB;
1821 break;
1822 case ICE_PHY_TYPE_LOW_100GBASE_CR4:
1823 case ICE_PHY_TYPE_LOW_100GBASE_SR4:
1824 case ICE_PHY_TYPE_LOW_100GBASE_LR4:
1825 case ICE_PHY_TYPE_LOW_100GBASE_KR4:
1826 case ICE_PHY_TYPE_LOW_100G_CAUI4_AOC_ACC:
1827 case ICE_PHY_TYPE_LOW_100G_CAUI4:
1828 case ICE_PHY_TYPE_LOW_100G_AUI4_AOC_ACC:
1829 case ICE_PHY_TYPE_LOW_100G_AUI4:
1830 case ICE_PHY_TYPE_LOW_100GBASE_CR_PAM4:
1831 case ICE_PHY_TYPE_LOW_100GBASE_KR_PAM4:
1832 case ICE_PHY_TYPE_LOW_100GBASE_CP2:
1833 case ICE_PHY_TYPE_LOW_100GBASE_SR2:
1834 case ICE_PHY_TYPE_LOW_100GBASE_DR:
1835 speed_phy_type_low = ICE_AQ_LINK_SPEED_100GB;
1836 break;
1771 default: 1837 default:
1772 speed_phy_type_low = ICE_AQ_LINK_SPEED_UNKNOWN; 1838 speed_phy_type_low = ICE_AQ_LINK_SPEED_UNKNOWN;
1773 break; 1839 break;
1774 } 1840 }
1775 1841
1776 return speed_phy_type_low; 1842 switch (phy_type_high) {
1843 case ICE_PHY_TYPE_HIGH_100GBASE_KR2_PAM4:
1844 case ICE_PHY_TYPE_HIGH_100G_CAUI2_AOC_ACC:
1845 case ICE_PHY_TYPE_HIGH_100G_CAUI2:
1846 case ICE_PHY_TYPE_HIGH_100G_AUI2_AOC_ACC:
1847 case ICE_PHY_TYPE_HIGH_100G_AUI2:
1848 speed_phy_type_high = ICE_AQ_LINK_SPEED_100GB;
1849 break;
1850 default:
1851 speed_phy_type_high = ICE_AQ_LINK_SPEED_UNKNOWN;
1852 break;
1853 }
1854
1855 if (speed_phy_type_low == ICE_AQ_LINK_SPEED_UNKNOWN &&
1856 speed_phy_type_high == ICE_AQ_LINK_SPEED_UNKNOWN)
1857 return ICE_AQ_LINK_SPEED_UNKNOWN;
1858 else if (speed_phy_type_low != ICE_AQ_LINK_SPEED_UNKNOWN &&
1859 speed_phy_type_high != ICE_AQ_LINK_SPEED_UNKNOWN)
1860 return ICE_AQ_LINK_SPEED_UNKNOWN;
1861 else if (speed_phy_type_low != ICE_AQ_LINK_SPEED_UNKNOWN &&
1862 speed_phy_type_high == ICE_AQ_LINK_SPEED_UNKNOWN)
1863 return speed_phy_type_low;
1864 else
1865 return speed_phy_type_high;
1777} 1866}
1778 1867
1779/** 1868/**
1780 * ice_update_phy_type 1869 * ice_update_phy_type
1781 * @phy_type_low: pointer to the lower part of phy_type 1870 * @phy_type_low: pointer to the lower part of phy_type
1871 * @phy_type_high: pointer to the higher part of phy_type
1782 * @link_speeds_bitmap: targeted link speeds bitmap 1872 * @link_speeds_bitmap: targeted link speeds bitmap
1783 * 1873 *
1784 * Note: For the link_speeds_bitmap structure, you can check it at 1874 * Note: For the link_speeds_bitmap structure, you can check it at
1785 * [ice_aqc_get_link_status->link_speed]. Caller can pass in 1875 * [ice_aqc_get_link_status->link_speed]. Caller can pass in
1786 * link_speeds_bitmap include multiple speeds. 1876 * link_speeds_bitmap include multiple speeds.
1787 * 1877 *
1788 * The value of phy_type_low will present a certain link speed. This helper 1878 * Each entry in this [phy_type_low, phy_type_high] structure will
1789 * function will turn on bits in the phy_type_low based on the value of 1879 * present a certain link speed. This helper function will turn on bits
1880 * in [phy_type_low, phy_type_high] structure based on the value of
1790 * link_speeds_bitmap input parameter. 1881 * link_speeds_bitmap input parameter.
1791 */ 1882 */
1792void ice_update_phy_type(u64 *phy_type_low, u16 link_speeds_bitmap) 1883void
1884ice_update_phy_type(u64 *phy_type_low, u64 *phy_type_high,
1885 u16 link_speeds_bitmap)
1793{ 1886{
1794 u16 speed = ICE_AQ_LINK_SPEED_UNKNOWN; 1887 u16 speed = ICE_AQ_LINK_SPEED_UNKNOWN;
1888 u64 pt_high;
1795 u64 pt_low; 1889 u64 pt_low;
1796 int index; 1890 int index;
1797 1891
1798 /* We first check with low part of phy_type */ 1892 /* We first check with low part of phy_type */
1799 for (index = 0; index <= ICE_PHY_TYPE_LOW_MAX_INDEX; index++) { 1893 for (index = 0; index <= ICE_PHY_TYPE_LOW_MAX_INDEX; index++) {
1800 pt_low = BIT_ULL(index); 1894 pt_low = BIT_ULL(index);
1801 speed = ice_get_link_speed_based_on_phy_type(pt_low); 1895 speed = ice_get_link_speed_based_on_phy_type(pt_low, 0);
1802 1896
1803 if (link_speeds_bitmap & speed) 1897 if (link_speeds_bitmap & speed)
1804 *phy_type_low |= BIT_ULL(index); 1898 *phy_type_low |= BIT_ULL(index);
1805 } 1899 }
1900
1901 /* We then check with high part of phy_type */
1902 for (index = 0; index <= ICE_PHY_TYPE_HIGH_MAX_INDEX; index++) {
1903 pt_high = BIT_ULL(index);
1904 speed = ice_get_link_speed_based_on_phy_type(0, pt_high);
1905
1906 if (link_speeds_bitmap & speed)
1907 *phy_type_high |= BIT_ULL(index);
1908 }
1806} 1909}
1807 1910
1808/** 1911/**
@@ -1934,6 +2037,7 @@ ice_set_fc(struct ice_port_info *pi, u8 *aq_failures, bool ena_auto_link_update)
1934 if (ena_auto_link_update) 2037 if (ena_auto_link_update)
1935 cfg.caps |= ICE_AQ_PHY_ENA_AUTO_LINK_UPDT; 2038 cfg.caps |= ICE_AQ_PHY_ENA_AUTO_LINK_UPDT;
1936 /* Copy over all the old settings */ 2039 /* Copy over all the old settings */
2040 cfg.phy_type_high = pcaps->phy_type_high;
1937 cfg.phy_type_low = pcaps->phy_type_low; 2041 cfg.phy_type_low = pcaps->phy_type_low;
1938 cfg.low_power_ctrl = pcaps->low_power_ctrl; 2042 cfg.low_power_ctrl = pcaps->low_power_ctrl;
1939 cfg.eee_cap = pcaps->eee_cap; 2043 cfg.eee_cap = pcaps->eee_cap;
@@ -2032,6 +2136,34 @@ ice_aq_set_link_restart_an(struct ice_port_info *pi, bool ena_link,
2032} 2136}
2033 2137
2034/** 2138/**
2139 * ice_aq_set_port_id_led
2140 * @pi: pointer to the port information
2141 * @is_orig_mode: is this LED set to original mode (by the net-list)
2142 * @cd: pointer to command details structure or NULL
2143 *
2144 * Set LED value for the given port (0x06e9)
2145 */
2146enum ice_status
2147ice_aq_set_port_id_led(struct ice_port_info *pi, bool is_orig_mode,
2148 struct ice_sq_cd *cd)
2149{
2150 struct ice_aqc_set_port_id_led *cmd;
2151 struct ice_hw *hw = pi->hw;
2152 struct ice_aq_desc desc;
2153
2154 cmd = &desc.params.set_port_id_led;
2155
2156 ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_set_port_id_led);
2157
2158 if (is_orig_mode)
2159 cmd->ident_mode = ICE_AQC_PORT_IDENT_LED_ORIG;
2160 else
2161 cmd->ident_mode = ICE_AQC_PORT_IDENT_LED_BLINK;
2162
2163 return ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
2164}
2165
2166/**
2035 * __ice_aq_get_set_rss_lut 2167 * __ice_aq_get_set_rss_lut
2036 * @hw: pointer to the hardware structure 2168 * @hw: pointer to the hardware structure
2037 * @vsi_id: VSI FW index 2169 * @vsi_id: VSI FW index
diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h
index cf760c24a6aa..d7c7c2ed8823 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.h
+++ b/drivers/net/ethernet/intel/ice/ice_common.h
@@ -28,6 +28,8 @@ ice_acquire_res(struct ice_hw *hw, enum ice_aq_res_ids res,
28 enum ice_aq_res_access_type access, u32 timeout); 28 enum ice_aq_res_access_type access, u32 timeout);
29void ice_release_res(struct ice_hw *hw, enum ice_aq_res_ids res); 29void ice_release_res(struct ice_hw *hw, enum ice_aq_res_ids res);
30enum ice_status ice_init_nvm(struct ice_hw *hw); 30enum ice_status ice_init_nvm(struct ice_hw *hw);
31enum ice_status ice_read_sr_buf(struct ice_hw *hw, u16 offset, u16 *words,
32 u16 *data);
31enum ice_status 33enum ice_status
32ice_sq_send_cmd(struct ice_hw *hw, struct ice_ctl_q_info *cq, 34ice_sq_send_cmd(struct ice_hw *hw, struct ice_ctl_q_info *cq,
33 struct ice_aq_desc *desc, void *buf, u16 buf_size, 35 struct ice_aq_desc *desc, void *buf, u16 buf_size,
@@ -70,9 +72,10 @@ ice_aq_get_phy_caps(struct ice_port_info *pi, bool qual_mods, u8 report_mode,
70 struct ice_aqc_get_phy_caps_data *caps, 72 struct ice_aqc_get_phy_caps_data *caps,
71 struct ice_sq_cd *cd); 73 struct ice_sq_cd *cd);
72void 74void
73ice_update_phy_type(u64 *phy_type_low, u16 link_speeds_bitmap); 75ice_update_phy_type(u64 *phy_type_low, u64 *phy_type_high,
76 u16 link_speeds_bitmap);
74enum ice_status 77enum ice_status
75ice_aq_manage_mac_write(struct ice_hw *hw, u8 *mac_addr, u8 flags, 78ice_aq_manage_mac_write(struct ice_hw *hw, const u8 *mac_addr, u8 flags,
76 struct ice_sq_cd *cd); 79 struct ice_sq_cd *cd);
77enum ice_status ice_clear_pf_cfg(struct ice_hw *hw); 80enum ice_status ice_clear_pf_cfg(struct ice_hw *hw);
78enum ice_status 81enum ice_status
@@ -86,6 +89,10 @@ enum ice_status
86ice_aq_set_link_restart_an(struct ice_port_info *pi, bool ena_link, 89ice_aq_set_link_restart_an(struct ice_port_info *pi, bool ena_link,
87 struct ice_sq_cd *cd); 90 struct ice_sq_cd *cd);
88enum ice_status 91enum ice_status
92ice_aq_set_port_id_led(struct ice_port_info *pi, bool is_orig_mode,
93 struct ice_sq_cd *cd);
94
95enum ice_status
89ice_dis_vsi_txq(struct ice_port_info *pi, u8 num_queues, u16 *q_ids, 96ice_dis_vsi_txq(struct ice_port_info *pi, u8 num_queues, u16 *q_ids,
90 u32 *q_teids, enum ice_disq_rst_src rst_src, u16 vmvf_num, 97 u32 *q_teids, enum ice_disq_rst_src rst_src, u16 vmvf_num,
91 struct ice_sq_cd *cmd_details); 98 struct ice_sq_cd *cmd_details);
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
index 3b6e387f5440..a82f0202652d 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
@@ -114,6 +114,22 @@ static const u32 ice_regs_dump_list[] = {
114 QRX_ITR(0), 114 QRX_ITR(0),
115}; 115};
116 116
117struct ice_priv_flag {
118 char name[ETH_GSTRING_LEN];
119 u32 bitno; /* bit position in pf->flags */
120};
121
122#define ICE_PRIV_FLAG(_name, _bitno) { \
123 .name = _name, \
124 .bitno = _bitno, \
125}
126
127static const struct ice_priv_flag ice_gstrings_priv_flags[] = {
128 ICE_PRIV_FLAG("link-down-on-close", ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA),
129};
130
131#define ICE_PRIV_FLAG_ARRAY_SIZE ARRAY_SIZE(ice_gstrings_priv_flags)
132
117/** 133/**
118 * ice_nvm_version_str - format the NVM version strings 134 * ice_nvm_version_str - format the NVM version strings
119 * @hw: ptr to the hardware info 135 * @hw: ptr to the hardware info
@@ -152,6 +168,7 @@ ice_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo)
152 sizeof(drvinfo->fw_version)); 168 sizeof(drvinfo->fw_version));
153 strlcpy(drvinfo->bus_info, pci_name(pf->pdev), 169 strlcpy(drvinfo->bus_info, pci_name(pf->pdev),
154 sizeof(drvinfo->bus_info)); 170 sizeof(drvinfo->bus_info));
171 drvinfo->n_priv_flags = ICE_PRIV_FLAG_ARRAY_SIZE;
155} 172}
156 173
157static int ice_get_regs_len(struct net_device __always_unused *netdev) 174static int ice_get_regs_len(struct net_device __always_unused *netdev)
@@ -203,6 +220,55 @@ static void ice_set_msglevel(struct net_device *netdev, u32 data)
203#endif /* !CONFIG_DYNAMIC_DEBUG */ 220#endif /* !CONFIG_DYNAMIC_DEBUG */
204} 221}
205 222
223static int ice_get_eeprom_len(struct net_device *netdev)
224{
225 struct ice_netdev_priv *np = netdev_priv(netdev);
226 struct ice_pf *pf = np->vsi->back;
227
228 return (int)(pf->hw.nvm.sr_words * sizeof(u16));
229}
230
231static int
232ice_get_eeprom(struct net_device *netdev, struct ethtool_eeprom *eeprom,
233 u8 *bytes)
234{
235 struct ice_netdev_priv *np = netdev_priv(netdev);
236 u16 first_word, last_word, nwords;
237 struct ice_vsi *vsi = np->vsi;
238 struct ice_pf *pf = vsi->back;
239 struct ice_hw *hw = &pf->hw;
240 enum ice_status status;
241 struct device *dev;
242 int ret = 0;
243 u16 *buf;
244
245 dev = &pf->pdev->dev;
246
247 eeprom->magic = hw->vendor_id | (hw->device_id << 16);
248
249 first_word = eeprom->offset >> 1;
250 last_word = (eeprom->offset + eeprom->len - 1) >> 1;
251 nwords = last_word - first_word + 1;
252
253 buf = devm_kcalloc(dev, nwords, sizeof(u16), GFP_KERNEL);
254 if (!buf)
255 return -ENOMEM;
256
257 status = ice_read_sr_buf(hw, first_word, &nwords, buf);
258 if (status) {
259 dev_err(dev, "ice_read_sr_buf failed, err %d aq_err %d\n",
260 status, hw->adminq.sq_last_status);
261 eeprom->len = sizeof(u16) * nwords;
262 ret = -EIO;
263 goto out;
264 }
265
266 memcpy(bytes, (u8 *)buf + (eeprom->offset & 1), eeprom->len);
267out:
268 devm_kfree(dev, buf);
269 return ret;
270}
271
206static void ice_get_strings(struct net_device *netdev, u32 stringset, u8 *data) 272static void ice_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
207{ 273{
208 struct ice_netdev_priv *np = netdev_priv(netdev); 274 struct ice_netdev_priv *np = netdev_priv(netdev);
@@ -244,11 +310,99 @@ static void ice_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
244 } 310 }
245 311
246 break; 312 break;
313 case ETH_SS_PRIV_FLAGS:
314 for (i = 0; i < ICE_PRIV_FLAG_ARRAY_SIZE; i++) {
315 snprintf(p, ETH_GSTRING_LEN, "%s",
316 ice_gstrings_priv_flags[i].name);
317 p += ETH_GSTRING_LEN;
318 }
319 break;
247 default: 320 default:
248 break; 321 break;
249 } 322 }
250} 323}
251 324
325static int
326ice_set_phys_id(struct net_device *netdev, enum ethtool_phys_id_state state)
327{
328 struct ice_netdev_priv *np = netdev_priv(netdev);
329 bool led_active;
330
331 switch (state) {
332 case ETHTOOL_ID_ACTIVE:
333 led_active = true;
334 break;
335 case ETHTOOL_ID_INACTIVE:
336 led_active = false;
337 break;
338 default:
339 return -EINVAL;
340 }
341
342 if (ice_aq_set_port_id_led(np->vsi->port_info, !led_active, NULL))
343 return -EIO;
344
345 return 0;
346}
347
348/**
349 * ice_get_priv_flags - report device private flags
350 * @netdev: network interface device structure
351 *
352 * The get string set count and the string set should be matched for each
353 * flag returned. Add new strings for each flag to the ice_gstrings_priv_flags
354 * array.
355 *
356 * Returns a u32 bitmap of flags.
357 */
358static u32 ice_get_priv_flags(struct net_device *netdev)
359{
360 struct ice_netdev_priv *np = netdev_priv(netdev);
361 struct ice_vsi *vsi = np->vsi;
362 struct ice_pf *pf = vsi->back;
363 u32 i, ret_flags = 0;
364
365 for (i = 0; i < ICE_PRIV_FLAG_ARRAY_SIZE; i++) {
366 const struct ice_priv_flag *priv_flag;
367
368 priv_flag = &ice_gstrings_priv_flags[i];
369
370 if (test_bit(priv_flag->bitno, pf->flags))
371 ret_flags |= BIT(i);
372 }
373
374 return ret_flags;
375}
376
377/**
378 * ice_set_priv_flags - set private flags
379 * @netdev: network interface device structure
380 * @flags: bit flags to be set
381 */
382static int ice_set_priv_flags(struct net_device *netdev, u32 flags)
383{
384 struct ice_netdev_priv *np = netdev_priv(netdev);
385 struct ice_vsi *vsi = np->vsi;
386 struct ice_pf *pf = vsi->back;
387 u32 i;
388
389 if (flags > BIT(ICE_PRIV_FLAG_ARRAY_SIZE))
390 return -EINVAL;
391
392 for (i = 0; i < ICE_PRIV_FLAG_ARRAY_SIZE; i++) {
393 const struct ice_priv_flag *priv_flag;
394
395 priv_flag = &ice_gstrings_priv_flags[i];
396
397 if (flags & BIT(i))
398 set_bit(priv_flag->bitno, pf->flags);
399 else
400 clear_bit(priv_flag->bitno, pf->flags);
401 }
402
403 return 0;
404}
405
252static int ice_get_sset_count(struct net_device *netdev, int sset) 406static int ice_get_sset_count(struct net_device *netdev, int sset)
253{ 407{
254 switch (sset) { 408 switch (sset) {
@@ -272,6 +426,8 @@ static int ice_get_sset_count(struct net_device *netdev, int sset)
272 * not safe. 426 * not safe.
273 */ 427 */
274 return ICE_ALL_STATS_LEN(netdev); 428 return ICE_ALL_STATS_LEN(netdev);
429 case ETH_SS_PRIV_FLAGS:
430 return ICE_PRIV_FLAG_ARRAY_SIZE;
275 default: 431 default:
276 return -EOPNOTSUPP; 432 return -EOPNOTSUPP;
277 } 433 }
@@ -337,16 +493,20 @@ ice_get_ethtool_stats(struct net_device *netdev,
337 * @netdev: network interface device structure 493 * @netdev: network interface device structure
338 * @ks: ethtool link ksettings struct to fill out 494 * @ks: ethtool link ksettings struct to fill out
339 */ 495 */
340static void ice_phy_type_to_ethtool(struct net_device *netdev, 496static void
341 struct ethtool_link_ksettings *ks) 497ice_phy_type_to_ethtool(struct net_device *netdev,
498 struct ethtool_link_ksettings *ks)
342{ 499{
343 struct ice_netdev_priv *np = netdev_priv(netdev); 500 struct ice_netdev_priv *np = netdev_priv(netdev);
344 struct ice_link_status *hw_link_info; 501 struct ice_link_status *hw_link_info;
502 bool need_add_adv_mode = false;
345 struct ice_vsi *vsi = np->vsi; 503 struct ice_vsi *vsi = np->vsi;
504 u64 phy_types_high;
346 u64 phy_types_low; 505 u64 phy_types_low;
347 506
348 hw_link_info = &vsi->port_info->phy.link_info; 507 hw_link_info = &vsi->port_info->phy.link_info;
349 phy_types_low = vsi->port_info->phy.phy_type_low; 508 phy_types_low = vsi->port_info->phy.phy_type_low;
509 phy_types_high = vsi->port_info->phy.phy_type_high;
350 510
351 ethtool_link_ksettings_zero_link_mode(ks, supported); 511 ethtool_link_ksettings_zero_link_mode(ks, supported);
352 ethtool_link_ksettings_zero_link_mode(ks, advertising); 512 ethtool_link_ksettings_zero_link_mode(ks, advertising);
@@ -495,6 +655,95 @@ static void ice_phy_type_to_ethtool(struct net_device *netdev,
495 ethtool_link_ksettings_add_link_mode(ks, advertising, 655 ethtool_link_ksettings_add_link_mode(ks, advertising,
496 40000baseLR4_Full); 656 40000baseLR4_Full);
497 } 657 }
658 if (phy_types_low & ICE_PHY_TYPE_LOW_50GBASE_CR2 ||
659 phy_types_low & ICE_PHY_TYPE_LOW_50G_LAUI2_AOC_ACC ||
660 phy_types_low & ICE_PHY_TYPE_LOW_50G_LAUI2 ||
661 phy_types_low & ICE_PHY_TYPE_LOW_50G_AUI2_AOC_ACC ||
662 phy_types_low & ICE_PHY_TYPE_LOW_50G_AUI2 ||
663 phy_types_low & ICE_PHY_TYPE_LOW_50GBASE_CP ||
664 phy_types_low & ICE_PHY_TYPE_LOW_50GBASE_SR ||
665 phy_types_low & ICE_PHY_TYPE_LOW_50G_AUI1_AOC_ACC ||
666 phy_types_low & ICE_PHY_TYPE_LOW_50G_AUI1) {
667 ethtool_link_ksettings_add_link_mode(ks, supported,
668 50000baseCR2_Full);
669 if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_50GB)
670 ethtool_link_ksettings_add_link_mode(ks, advertising,
671 50000baseCR2_Full);
672 }
673 if (phy_types_low & ICE_PHY_TYPE_LOW_50GBASE_KR2 ||
674 phy_types_low & ICE_PHY_TYPE_LOW_50GBASE_KR_PAM4) {
675 ethtool_link_ksettings_add_link_mode(ks, supported,
676 50000baseKR2_Full);
677 if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_50GB)
678 ethtool_link_ksettings_add_link_mode(ks, advertising,
679 50000baseKR2_Full);
680 }
681 if (phy_types_low & ICE_PHY_TYPE_LOW_50GBASE_SR2 ||
682 phy_types_low & ICE_PHY_TYPE_LOW_50GBASE_LR2 ||
683 phy_types_low & ICE_PHY_TYPE_LOW_50GBASE_FR ||
684 phy_types_low & ICE_PHY_TYPE_LOW_50GBASE_LR) {
685 ethtool_link_ksettings_add_link_mode(ks, supported,
686 50000baseSR2_Full);
687 if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_50GB)
688 ethtool_link_ksettings_add_link_mode(ks, advertising,
689 50000baseSR2_Full);
690 }
691 if (phy_types_low & ICE_PHY_TYPE_LOW_100GBASE_CR4 ||
692 phy_types_low & ICE_PHY_TYPE_LOW_100G_CAUI4_AOC_ACC ||
693 phy_types_low & ICE_PHY_TYPE_LOW_100G_CAUI4 ||
694 phy_types_low & ICE_PHY_TYPE_LOW_100G_AUI4_AOC_ACC ||
695 phy_types_low & ICE_PHY_TYPE_LOW_100G_AUI4 ||
696 phy_types_low & ICE_PHY_TYPE_LOW_100GBASE_CR_PAM4 ||
697 phy_types_low & ICE_PHY_TYPE_LOW_100GBASE_CP2 ||
698 phy_types_high & ICE_PHY_TYPE_HIGH_100G_CAUI2_AOC_ACC ||
699 phy_types_high & ICE_PHY_TYPE_HIGH_100G_CAUI2 ||
700 phy_types_high & ICE_PHY_TYPE_HIGH_100G_AUI2_AOC_ACC ||
701 phy_types_high & ICE_PHY_TYPE_HIGH_100G_AUI2) {
702 ethtool_link_ksettings_add_link_mode(ks, supported,
703 100000baseCR4_Full);
704 if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_100GB)
705 need_add_adv_mode = true;
706 }
707 if (need_add_adv_mode) {
708 need_add_adv_mode = false;
709 ethtool_link_ksettings_add_link_mode(ks, advertising,
710 100000baseCR4_Full);
711 }
712 if (phy_types_low & ICE_PHY_TYPE_LOW_100GBASE_SR4 ||
713 phy_types_low & ICE_PHY_TYPE_LOW_100GBASE_SR2) {
714 ethtool_link_ksettings_add_link_mode(ks, supported,
715 100000baseSR4_Full);
716 if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_100GB)
717 need_add_adv_mode = true;
718 }
719 if (need_add_adv_mode) {
720 need_add_adv_mode = false;
721 ethtool_link_ksettings_add_link_mode(ks, advertising,
722 100000baseSR4_Full);
723 }
724 if (phy_types_low & ICE_PHY_TYPE_LOW_100GBASE_LR4 ||
725 phy_types_low & ICE_PHY_TYPE_LOW_100GBASE_DR) {
726 ethtool_link_ksettings_add_link_mode(ks, supported,
727 100000baseLR4_ER4_Full);
728 if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_100GB)
729 need_add_adv_mode = true;
730 }
731 if (need_add_adv_mode) {
732 need_add_adv_mode = false;
733 ethtool_link_ksettings_add_link_mode(ks, advertising,
734 100000baseLR4_ER4_Full);
735 }
736 if (phy_types_low & ICE_PHY_TYPE_LOW_100GBASE_KR4 ||
737 phy_types_low & ICE_PHY_TYPE_LOW_100GBASE_KR_PAM4 ||
738 phy_types_high & ICE_PHY_TYPE_HIGH_100GBASE_KR2_PAM4) {
739 ethtool_link_ksettings_add_link_mode(ks, supported,
740 100000baseKR4_Full);
741 if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_100GB)
742 need_add_adv_mode = true;
743 }
744 if (need_add_adv_mode)
745 ethtool_link_ksettings_add_link_mode(ks, advertising,
746 100000baseKR4_Full);
498 747
499 /* Autoneg PHY types */ 748 /* Autoneg PHY types */
500 if (phy_types_low & ICE_PHY_TYPE_LOW_100BASE_TX || 749 if (phy_types_low & ICE_PHY_TYPE_LOW_100BASE_TX ||
@@ -520,6 +769,24 @@ static void ice_phy_type_to_ethtool(struct net_device *netdev,
520 ethtool_link_ksettings_add_link_mode(ks, advertising, 769 ethtool_link_ksettings_add_link_mode(ks, advertising,
521 Autoneg); 770 Autoneg);
522 } 771 }
772 if (phy_types_low & ICE_PHY_TYPE_LOW_50GBASE_CR2 ||
773 phy_types_low & ICE_PHY_TYPE_LOW_50GBASE_KR2 ||
774 phy_types_low & ICE_PHY_TYPE_LOW_50GBASE_CP ||
775 phy_types_low & ICE_PHY_TYPE_LOW_50GBASE_KR_PAM4) {
776 ethtool_link_ksettings_add_link_mode(ks, supported,
777 Autoneg);
778 ethtool_link_ksettings_add_link_mode(ks, advertising,
779 Autoneg);
780 }
781 if (phy_types_low & ICE_PHY_TYPE_LOW_100GBASE_CR4 ||
782 phy_types_low & ICE_PHY_TYPE_LOW_100GBASE_KR4 ||
783 phy_types_low & ICE_PHY_TYPE_LOW_100GBASE_KR_PAM4 ||
784 phy_types_low & ICE_PHY_TYPE_LOW_100GBASE_CP2) {
785 ethtool_link_ksettings_add_link_mode(ks, supported,
786 Autoneg);
787 ethtool_link_ksettings_add_link_mode(ks, advertising,
788 Autoneg);
789 }
523} 790}
524 791
525#define TEST_SET_BITS_TIMEOUT 50 792#define TEST_SET_BITS_TIMEOUT 50
@@ -531,13 +798,15 @@ static void ice_phy_type_to_ethtool(struct net_device *netdev,
531 * @ks: ethtool ksettings to fill in 798 * @ks: ethtool ksettings to fill in
532 * @netdev: network interface device structure 799 * @netdev: network interface device structure
533 */ 800 */
534static void ice_get_settings_link_up(struct ethtool_link_ksettings *ks, 801static void
535 struct net_device *netdev) 802ice_get_settings_link_up(struct ethtool_link_ksettings *ks,
803 struct net_device *netdev)
536{ 804{
537 struct ice_netdev_priv *np = netdev_priv(netdev); 805 struct ice_netdev_priv *np = netdev_priv(netdev);
538 struct ethtool_link_ksettings cap_ksettings; 806 struct ethtool_link_ksettings cap_ksettings;
539 struct ice_link_status *link_info; 807 struct ice_link_status *link_info;
540 struct ice_vsi *vsi = np->vsi; 808 struct ice_vsi *vsi = np->vsi;
809 bool unrecog_phy_high = false;
541 bool unrecog_phy_low = false; 810 bool unrecog_phy_low = false;
542 811
543 link_info = &vsi->port_info->phy.link_info; 812 link_info = &vsi->port_info->phy.link_info;
@@ -699,14 +968,116 @@ static void ice_get_settings_link_up(struct ethtool_link_ksettings *ks,
699 ethtool_link_ksettings_add_link_mode(ks, advertising, 968 ethtool_link_ksettings_add_link_mode(ks, advertising,
700 40000baseKR4_Full); 969 40000baseKR4_Full);
701 break; 970 break;
971 case ICE_PHY_TYPE_LOW_50GBASE_CR2:
972 case ICE_PHY_TYPE_LOW_50GBASE_CP:
973 ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
974 ethtool_link_ksettings_add_link_mode(ks, supported,
975 50000baseCR2_Full);
976 ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
977 ethtool_link_ksettings_add_link_mode(ks, advertising,
978 50000baseCR2_Full);
979 break;
980 case ICE_PHY_TYPE_LOW_50G_LAUI2_AOC_ACC:
981 case ICE_PHY_TYPE_LOW_50G_LAUI2:
982 case ICE_PHY_TYPE_LOW_50G_AUI2_AOC_ACC:
983 case ICE_PHY_TYPE_LOW_50G_AUI2:
984 case ICE_PHY_TYPE_LOW_50GBASE_SR:
985 case ICE_PHY_TYPE_LOW_50G_AUI1_AOC_ACC:
986 case ICE_PHY_TYPE_LOW_50G_AUI1:
987 ethtool_link_ksettings_add_link_mode(ks, supported,
988 50000baseCR2_Full);
989 break;
990 case ICE_PHY_TYPE_LOW_50GBASE_KR2:
991 case ICE_PHY_TYPE_LOW_50GBASE_KR_PAM4:
992 ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
993 ethtool_link_ksettings_add_link_mode(ks, supported,
994 50000baseKR2_Full);
995 ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
996 ethtool_link_ksettings_add_link_mode(ks, advertising,
997 50000baseKR2_Full);
998 break;
999 case ICE_PHY_TYPE_LOW_50GBASE_SR2:
1000 case ICE_PHY_TYPE_LOW_50GBASE_LR2:
1001 case ICE_PHY_TYPE_LOW_50GBASE_FR:
1002 case ICE_PHY_TYPE_LOW_50GBASE_LR:
1003 ethtool_link_ksettings_add_link_mode(ks, supported,
1004 50000baseSR2_Full);
1005 break;
1006 case ICE_PHY_TYPE_LOW_100GBASE_CR4:
1007 ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
1008 ethtool_link_ksettings_add_link_mode(ks, supported,
1009 100000baseCR4_Full);
1010 ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
1011 ethtool_link_ksettings_add_link_mode(ks, advertising,
1012 100000baseCR4_Full);
1013 break;
1014 case ICE_PHY_TYPE_LOW_100G_CAUI4_AOC_ACC:
1015 case ICE_PHY_TYPE_LOW_100G_CAUI4:
1016 case ICE_PHY_TYPE_LOW_100G_AUI4_AOC_ACC:
1017 case ICE_PHY_TYPE_LOW_100G_AUI4:
1018 case ICE_PHY_TYPE_LOW_100GBASE_CR_PAM4:
1019 ethtool_link_ksettings_add_link_mode(ks, supported,
1020 100000baseCR4_Full);
1021 break;
1022 case ICE_PHY_TYPE_LOW_100GBASE_CP2:
1023 ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
1024 ethtool_link_ksettings_add_link_mode(ks, supported,
1025 100000baseCR4_Full);
1026 ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
1027 ethtool_link_ksettings_add_link_mode(ks, advertising,
1028 100000baseCR4_Full);
1029 break;
1030 case ICE_PHY_TYPE_LOW_100GBASE_SR4:
1031 case ICE_PHY_TYPE_LOW_100GBASE_SR2:
1032 ethtool_link_ksettings_add_link_mode(ks, supported,
1033 100000baseSR4_Full);
1034 break;
1035 case ICE_PHY_TYPE_LOW_100GBASE_LR4:
1036 case ICE_PHY_TYPE_LOW_100GBASE_DR:
1037 ethtool_link_ksettings_add_link_mode(ks, supported,
1038 100000baseLR4_ER4_Full);
1039 break;
1040 case ICE_PHY_TYPE_LOW_100GBASE_KR4:
1041 case ICE_PHY_TYPE_LOW_100GBASE_KR_PAM4:
1042 ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
1043 ethtool_link_ksettings_add_link_mode(ks, supported,
1044 100000baseKR4_Full);
1045 ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
1046 ethtool_link_ksettings_add_link_mode(ks, advertising,
1047 100000baseKR4_Full);
1048 break;
702 default: 1049 default:
703 unrecog_phy_low = true; 1050 unrecog_phy_low = true;
704 } 1051 }
705 1052
706 if (unrecog_phy_low) { 1053 switch (link_info->phy_type_high) {
1054 case ICE_PHY_TYPE_HIGH_100GBASE_KR2_PAM4:
1055 ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
1056 ethtool_link_ksettings_add_link_mode(ks, supported,
1057 100000baseKR4_Full);
1058 ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
1059 ethtool_link_ksettings_add_link_mode(ks, advertising,
1060 100000baseKR4_Full);
1061 break;
1062 case ICE_PHY_TYPE_HIGH_100G_CAUI2_AOC_ACC:
1063 case ICE_PHY_TYPE_HIGH_100G_CAUI2:
1064 case ICE_PHY_TYPE_HIGH_100G_AUI2_AOC_ACC:
1065 case ICE_PHY_TYPE_HIGH_100G_AUI2:
1066 ethtool_link_ksettings_add_link_mode(ks, supported,
1067 100000baseCR4_Full);
1068 break;
1069 default:
1070 unrecog_phy_high = true;
1071 }
1072
1073 if (unrecog_phy_low && unrecog_phy_high) {
707 /* if we got here and link is up something bad is afoot */ 1074 /* if we got here and link is up something bad is afoot */
708 netdev_info(netdev, "WARNING: Unrecognized PHY_Low (0x%llx).\n", 1075 netdev_info(netdev,
1076 "WARNING: Unrecognized PHY_Low (0x%llx).\n",
709 (u64)link_info->phy_type_low); 1077 (u64)link_info->phy_type_low);
1078 netdev_info(netdev,
1079 "WARNING: Unrecognized PHY_High (0x%llx).\n",
1080 (u64)link_info->phy_type_high);
710 } 1081 }
711 1082
712 /* Now that we've worked out everything that could be supported by the 1083 /* Now that we've worked out everything that could be supported by the
@@ -718,6 +1089,12 @@ static void ice_get_settings_link_up(struct ethtool_link_ksettings *ks,
718 ethtool_intersect_link_masks(ks, &cap_ksettings); 1089 ethtool_intersect_link_masks(ks, &cap_ksettings);
719 1090
720 switch (link_info->link_speed) { 1091 switch (link_info->link_speed) {
1092 case ICE_AQ_LINK_SPEED_100GB:
1093 ks->base.speed = SPEED_100000;
1094 break;
1095 case ICE_AQ_LINK_SPEED_50GB:
1096 ks->base.speed = SPEED_50000;
1097 break;
721 case ICE_AQ_LINK_SPEED_40GB: 1098 case ICE_AQ_LINK_SPEED_40GB:
722 ks->base.speed = SPEED_40000; 1099 ks->base.speed = SPEED_40000;
723 break; 1100 break;
@@ -911,6 +1288,23 @@ ice_ksettings_find_adv_link_speed(const struct ethtool_link_ksettings *ks)
911 ethtool_link_ksettings_test_link_mode(ks, advertising, 1288 ethtool_link_ksettings_test_link_mode(ks, advertising,
912 40000baseKR4_Full)) 1289 40000baseKR4_Full))
913 adv_link_speed |= ICE_AQ_LINK_SPEED_40GB; 1290 adv_link_speed |= ICE_AQ_LINK_SPEED_40GB;
1291 if (ethtool_link_ksettings_test_link_mode(ks, advertising,
1292 50000baseCR2_Full) ||
1293 ethtool_link_ksettings_test_link_mode(ks, advertising,
1294 50000baseKR2_Full))
1295 adv_link_speed |= ICE_AQ_LINK_SPEED_50GB;
1296 if (ethtool_link_ksettings_test_link_mode(ks, advertising,
1297 50000baseSR2_Full))
1298 adv_link_speed |= ICE_AQ_LINK_SPEED_50GB;
1299 if (ethtool_link_ksettings_test_link_mode(ks, advertising,
1300 100000baseCR4_Full) ||
1301 ethtool_link_ksettings_test_link_mode(ks, advertising,
1302 100000baseSR4_Full) ||
1303 ethtool_link_ksettings_test_link_mode(ks, advertising,
1304 100000baseLR4_ER4_Full) ||
1305 ethtool_link_ksettings_test_link_mode(ks, advertising,
1306 100000baseKR4_Full))
1307 adv_link_speed |= ICE_AQ_LINK_SPEED_100GB;
914 1308
915 return adv_link_speed; 1309 return adv_link_speed;
916} 1310}
@@ -981,8 +1375,9 @@ ice_setup_autoneg(struct ice_port_info *p, struct ethtool_link_ksettings *ks,
981 * 1375 *
982 * Set speed/duplex per media_types advertised/forced 1376 * Set speed/duplex per media_types advertised/forced
983 */ 1377 */
984static int ice_set_link_ksettings(struct net_device *netdev, 1378static int
985 const struct ethtool_link_ksettings *ks) 1379ice_set_link_ksettings(struct net_device *netdev,
1380 const struct ethtool_link_ksettings *ks)
986{ 1381{
987 u8 autoneg, timeout = TEST_SET_BITS_TIMEOUT, lport = 0; 1382 u8 autoneg, timeout = TEST_SET_BITS_TIMEOUT, lport = 0;
988 struct ice_netdev_priv *np = netdev_priv(netdev); 1383 struct ice_netdev_priv *np = netdev_priv(netdev);
@@ -994,6 +1389,7 @@ static int ice_set_link_ksettings(struct net_device *netdev,
994 struct ice_port_info *p; 1389 struct ice_port_info *p;
995 u8 autoneg_changed = 0; 1390 u8 autoneg_changed = 0;
996 enum ice_status status; 1391 enum ice_status status;
1392 u64 phy_type_high;
997 u64 phy_type_low; 1393 u64 phy_type_low;
998 int err = 0; 1394 int err = 0;
999 bool linkup; 1395 bool linkup;
@@ -1109,7 +1505,7 @@ static int ice_set_link_ksettings(struct net_device *netdev,
1109 adv_link_speed = curr_link_speed; 1505 adv_link_speed = curr_link_speed;
1110 1506
1111 /* Convert the advertise link speeds to their corresponded PHY_TYPE */ 1507 /* Convert the advertise link speeds to their corresponded PHY_TYPE */
1112 ice_update_phy_type(&phy_type_low, adv_link_speed); 1508 ice_update_phy_type(&phy_type_low, &phy_type_high, adv_link_speed);
1113 1509
1114 if (!autoneg_changed && adv_link_speed == curr_link_speed) { 1510 if (!autoneg_changed && adv_link_speed == curr_link_speed) {
1115 netdev_info(netdev, "Nothing changed, exiting without setting anything.\n"); 1511 netdev_info(netdev, "Nothing changed, exiting without setting anything.\n");
@@ -1128,7 +1524,9 @@ static int ice_set_link_ksettings(struct net_device *netdev,
1128 /* set link and auto negotiation so changes take effect */ 1524 /* set link and auto negotiation so changes take effect */
1129 config.caps |= ICE_AQ_PHY_ENA_LINK; 1525 config.caps |= ICE_AQ_PHY_ENA_LINK;
1130 1526
1131 if (phy_type_low) { 1527 if (phy_type_low || phy_type_high) {
1528 config.phy_type_high = cpu_to_le64(phy_type_high) &
1529 abilities->phy_type_high;
1132 config.phy_type_low = cpu_to_le64(phy_type_low) & 1530 config.phy_type_low = cpu_to_le64(phy_type_low) &
1133 abilities->phy_type_low; 1531 abilities->phy_type_low;
1134 } else { 1532 } else {
@@ -1667,6 +2065,258 @@ static int ice_set_rxfh(struct net_device *netdev, const u32 *indir,
1667 return 0; 2065 return 0;
1668} 2066}
1669 2067
2068enum ice_container_type {
2069 ICE_RX_CONTAINER,
2070 ICE_TX_CONTAINER,
2071};
2072
2073/**
2074 * ice_get_rc_coalesce - get ITR values for specific ring container
2075 * @ec: ethtool structure to fill with driver's coalesce settings
2076 * @c_type: container type, RX or TX
2077 * @rc: ring container that the ITR values will come from
2078 *
2079 * Query the device for ice_ring_container specific ITR values. This is
2080 * done per ice_ring_container because each q_vector can have 1 or more rings
2081 * and all of said ring(s) will have the same ITR values.
2082 *
2083 * Returns 0 on success, negative otherwise.
2084 */
2085static int
2086ice_get_rc_coalesce(struct ethtool_coalesce *ec, enum ice_container_type c_type,
2087 struct ice_ring_container *rc)
2088{
2089 struct ice_pf *pf = rc->ring->vsi->back;
2090
2091 switch (c_type) {
2092 case ICE_RX_CONTAINER:
2093 ec->use_adaptive_rx_coalesce = ITR_IS_DYNAMIC(rc->itr_setting);
2094 ec->rx_coalesce_usecs = rc->itr_setting & ~ICE_ITR_DYNAMIC;
2095 break;
2096 case ICE_TX_CONTAINER:
2097 ec->use_adaptive_tx_coalesce = ITR_IS_DYNAMIC(rc->itr_setting);
2098 ec->tx_coalesce_usecs = rc->itr_setting & ~ICE_ITR_DYNAMIC;
2099 break;
2100 default:
2101 dev_dbg(&pf->pdev->dev, "Invalid c_type %d\n", c_type);
2102 return -EINVAL;
2103 }
2104
2105 return 0;
2106}
2107
2108/**
2109 * __ice_get_coalesce - get ITR/INTRL values for the device
2110 * @netdev: pointer to the netdev associated with this query
2111 * @ec: ethtool structure to fill with driver's coalesce settings
2112 * @q_num: queue number to get the coalesce settings for
2113 */
2114static int
2115__ice_get_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec,
2116 int q_num)
2117{
2118 struct ice_netdev_priv *np = netdev_priv(netdev);
2119 int tx = -EINVAL, rx = -EINVAL;
2120 struct ice_vsi *vsi = np->vsi;
2121
2122 if (q_num < 0) {
2123 rx = ice_get_rc_coalesce(ec, ICE_RX_CONTAINER,
2124 &vsi->rx_rings[0]->q_vector->rx);
2125 tx = ice_get_rc_coalesce(ec, ICE_TX_CONTAINER,
2126 &vsi->tx_rings[0]->q_vector->tx);
2127
2128 goto update_coalesced_frames;
2129 }
2130
2131 if (q_num < vsi->num_rxq && q_num < vsi->num_txq) {
2132 rx = ice_get_rc_coalesce(ec, ICE_RX_CONTAINER,
2133 &vsi->rx_rings[q_num]->q_vector->rx);
2134 tx = ice_get_rc_coalesce(ec, ICE_TX_CONTAINER,
2135 &vsi->tx_rings[q_num]->q_vector->tx);
2136 } else if (q_num < vsi->num_rxq) {
2137 rx = ice_get_rc_coalesce(ec, ICE_RX_CONTAINER,
2138 &vsi->rx_rings[q_num]->q_vector->rx);
2139 } else if (q_num < vsi->num_txq) {
2140 tx = ice_get_rc_coalesce(ec, ICE_TX_CONTAINER,
2141 &vsi->tx_rings[q_num]->q_vector->tx);
2142 } else {
2143 /* q_num is invalid for both Rx and Tx queues */
2144 return -EINVAL;
2145 }
2146
2147update_coalesced_frames:
2148 /* either q_num is invalid for both Rx and Tx queues or setting coalesce
2149 * failed completely
2150 */
2151 if (tx && rx)
2152 return -EINVAL;
2153
2154 if (q_num < vsi->num_txq)
2155 ec->tx_max_coalesced_frames_irq = vsi->work_lmt;
2156
2157 if (q_num < vsi->num_rxq)
2158 ec->rx_max_coalesced_frames_irq = vsi->work_lmt;
2159
2160 return 0;
2161}
2162
2163static int
2164ice_get_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec)
2165{
2166 return __ice_get_coalesce(netdev, ec, -1);
2167}
2168
2169static int ice_get_per_q_coalesce(struct net_device *netdev, u32 q_num,
2170 struct ethtool_coalesce *ec)
2171{
2172 return __ice_get_coalesce(netdev, ec, q_num);
2173}
2174
2175/**
2176 * ice_set_rc_coalesce - set ITR values for specific ring container
2177 * @c_type: container type, RX or TX
2178 * @ec: ethtool structure from user to update ITR settings
2179 * @rc: ring container that the ITR values will come from
2180 * @vsi: VSI associated to the ring container
2181 *
2182 * Set specific ITR values. This is done per ice_ring_container because each
2183 * q_vector can have 1 or more rings and all of said ring(s) will have the same
2184 * ITR values.
2185 *
2186 * Returns 0 on success, negative otherwise.
2187 */
2188static int
2189ice_set_rc_coalesce(enum ice_container_type c_type, struct ethtool_coalesce *ec,
2190 struct ice_ring_container *rc, struct ice_vsi *vsi)
2191{
2192 struct ice_pf *pf = vsi->back;
2193 u16 itr_setting;
2194
2195 if (!rc->ring)
2196 return -EINVAL;
2197
2198 itr_setting = rc->itr_setting & ~ICE_ITR_DYNAMIC;
2199
2200 switch (c_type) {
2201 case ICE_RX_CONTAINER:
2202 if (ec->rx_coalesce_usecs != itr_setting &&
2203 ec->use_adaptive_rx_coalesce) {
2204 netdev_info(vsi->netdev,
2205 "Rx interrupt throttling cannot be changed if adaptive-rx is enabled\n");
2206 return -EINVAL;
2207 }
2208
2209 if (ec->rx_coalesce_usecs > ICE_ITR_MAX) {
2210 netdev_info(vsi->netdev,
2211 "Invalid value, rx-usecs range is 0-%d\n",
2212 ICE_ITR_MAX);
2213 return -EINVAL;
2214 }
2215
2216 if (ec->use_adaptive_rx_coalesce) {
2217 rc->itr_setting |= ICE_ITR_DYNAMIC;
2218 } else {
2219 rc->itr_setting = ITR_REG_ALIGN(ec->rx_coalesce_usecs);
2220 rc->target_itr = ITR_TO_REG(rc->itr_setting);
2221 }
2222 break;
2223 case ICE_TX_CONTAINER:
2224 if (ec->tx_coalesce_usecs != itr_setting &&
2225 ec->use_adaptive_tx_coalesce) {
2226 netdev_info(vsi->netdev,
2227 "Tx interrupt throttling cannot be changed if adaptive-tx is enabled\n");
2228 return -EINVAL;
2229 }
2230
2231 if (ec->tx_coalesce_usecs > ICE_ITR_MAX) {
2232 netdev_info(vsi->netdev,
2233 "Invalid value, tx-usecs range is 0-%d\n",
2234 ICE_ITR_MAX);
2235 return -EINVAL;
2236 }
2237
2238 if (ec->use_adaptive_tx_coalesce) {
2239 rc->itr_setting |= ICE_ITR_DYNAMIC;
2240 } else {
2241 rc->itr_setting = ITR_REG_ALIGN(ec->tx_coalesce_usecs);
2242 rc->target_itr = ITR_TO_REG(rc->itr_setting);
2243 }
2244 break;
2245 default:
2246 dev_dbg(&pf->pdev->dev, "Invalid container type %d\n", c_type);
2247 return -EINVAL;
2248 }
2249
2250 return 0;
2251}
2252
2253static int
2254__ice_set_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec,
2255 int q_num)
2256{
2257 struct ice_netdev_priv *np = netdev_priv(netdev);
2258 int rx = -EINVAL, tx = -EINVAL;
2259 struct ice_vsi *vsi = np->vsi;
2260
2261 if (q_num < 0) {
2262 int i;
2263
2264 ice_for_each_q_vector(vsi, i) {
2265 struct ice_q_vector *q_vector = vsi->q_vectors[i];
2266
2267 if (ice_set_rc_coalesce(ICE_RX_CONTAINER, ec,
2268 &q_vector->rx, vsi) ||
2269 ice_set_rc_coalesce(ICE_TX_CONTAINER, ec,
2270 &q_vector->tx, vsi))
2271 return -EINVAL;
2272 }
2273
2274 goto set_work_lmt;
2275 }
2276
2277 if (q_num < vsi->num_rxq && q_num < vsi->num_txq) {
2278 rx = ice_set_rc_coalesce(ICE_RX_CONTAINER, ec,
2279 &vsi->rx_rings[q_num]->q_vector->rx,
2280 vsi);
2281 tx = ice_set_rc_coalesce(ICE_TX_CONTAINER, ec,
2282 &vsi->tx_rings[q_num]->q_vector->tx,
2283 vsi);
2284 } else if (q_num < vsi->num_rxq) {
2285 rx = ice_set_rc_coalesce(ICE_RX_CONTAINER, ec,
2286 &vsi->rx_rings[q_num]->q_vector->rx,
2287 vsi);
2288 } else if (q_num < vsi->num_txq) {
2289 tx = ice_set_rc_coalesce(ICE_TX_CONTAINER, ec,
2290 &vsi->tx_rings[q_num]->q_vector->tx,
2291 vsi);
2292 }
2293
2294 /* either q_num is invalid for both Rx and Tx queues or setting coalesce
2295 * failed completely
2296 */
2297 if (rx && tx)
2298 return -EINVAL;
2299
2300set_work_lmt:
2301 if (ec->tx_max_coalesced_frames_irq || ec->rx_max_coalesced_frames_irq)
2302 vsi->work_lmt = max(ec->tx_max_coalesced_frames_irq,
2303 ec->rx_max_coalesced_frames_irq);
2304
2305 return 0;
2306}
2307
2308static int
2309ice_set_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec)
2310{
2311 return __ice_set_coalesce(netdev, ec, -1);
2312}
2313
2314static int ice_set_per_q_coalesce(struct net_device *netdev, u32 q_num,
2315 struct ethtool_coalesce *ec)
2316{
2317 return __ice_set_coalesce(netdev, ec, q_num);
2318}
2319
1670static const struct ethtool_ops ice_ethtool_ops = { 2320static const struct ethtool_ops ice_ethtool_ops = {
1671 .get_link_ksettings = ice_get_link_ksettings, 2321 .get_link_ksettings = ice_get_link_ksettings,
1672 .set_link_ksettings = ice_set_link_ksettings, 2322 .set_link_ksettings = ice_set_link_ksettings,
@@ -1676,8 +2326,15 @@ static const struct ethtool_ops ice_ethtool_ops = {
1676 .get_msglevel = ice_get_msglevel, 2326 .get_msglevel = ice_get_msglevel,
1677 .set_msglevel = ice_set_msglevel, 2327 .set_msglevel = ice_set_msglevel,
1678 .get_link = ethtool_op_get_link, 2328 .get_link = ethtool_op_get_link,
2329 .get_eeprom_len = ice_get_eeprom_len,
2330 .get_eeprom = ice_get_eeprom,
2331 .get_coalesce = ice_get_coalesce,
2332 .set_coalesce = ice_set_coalesce,
1679 .get_strings = ice_get_strings, 2333 .get_strings = ice_get_strings,
2334 .set_phys_id = ice_set_phys_id,
1680 .get_ethtool_stats = ice_get_ethtool_stats, 2335 .get_ethtool_stats = ice_get_ethtool_stats,
2336 .get_priv_flags = ice_get_priv_flags,
2337 .set_priv_flags = ice_set_priv_flags,
1681 .get_sset_count = ice_get_sset_count, 2338 .get_sset_count = ice_get_sset_count,
1682 .get_rxnfc = ice_get_rxnfc, 2339 .get_rxnfc = ice_get_rxnfc,
1683 .get_ringparam = ice_get_ringparam, 2340 .get_ringparam = ice_get_ringparam,
@@ -1689,6 +2346,9 @@ static const struct ethtool_ops ice_ethtool_ops = {
1689 .get_rxfh_indir_size = ice_get_rxfh_indir_size, 2346 .get_rxfh_indir_size = ice_get_rxfh_indir_size,
1690 .get_rxfh = ice_get_rxfh, 2347 .get_rxfh = ice_get_rxfh,
1691 .set_rxfh = ice_set_rxfh, 2348 .set_rxfh = ice_set_rxfh,
2349 .get_ts_info = ethtool_op_get_ts_info,
2350 .get_per_queue_coalesce = ice_get_per_q_coalesce,
2351 .set_per_queue_coalesce = ice_set_per_q_coalesce,
1692}; 2352};
1693 2353
1694/** 2354/**
diff --git a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
index 5507928c8fbe..f9a38f2cd470 100644
--- a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
+++ b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
@@ -110,6 +110,7 @@
110#define GLINT_DYN_CTL_CLEARPBA_M BIT(1) 110#define GLINT_DYN_CTL_CLEARPBA_M BIT(1)
111#define GLINT_DYN_CTL_SWINT_TRIG_M BIT(2) 111#define GLINT_DYN_CTL_SWINT_TRIG_M BIT(2)
112#define GLINT_DYN_CTL_ITR_INDX_S 3 112#define GLINT_DYN_CTL_ITR_INDX_S 3
113#define GLINT_DYN_CTL_INTERVAL_S 5
113#define GLINT_DYN_CTL_SW_ITR_INDX_M ICE_M(0x3, 25) 114#define GLINT_DYN_CTL_SW_ITR_INDX_M ICE_M(0x3, 25)
114#define GLINT_DYN_CTL_INTENA_MSK_M BIT(31) 115#define GLINT_DYN_CTL_INTENA_MSK_M BIT(31)
115#define GLINT_ITR(_i, _INT) (0x00154000 + ((_i) * 8192 + (_INT) * 4)) 116#define GLINT_ITR(_i, _INT) (0x00154000 + ((_i) * 8192 + (_INT) * 4))
diff --git a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
index bb51dd7defb5..ef4c79b5aa32 100644
--- a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
+++ b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
@@ -346,6 +346,7 @@ enum ice_tx_desc_cmd_bits {
346 ICE_TX_DESC_CMD_IIPT_IPV4 = 0x0040, /* 2 BITS */ 346 ICE_TX_DESC_CMD_IIPT_IPV4 = 0x0040, /* 2 BITS */
347 ICE_TX_DESC_CMD_IIPT_IPV4_CSUM = 0x0060, /* 2 BITS */ 347 ICE_TX_DESC_CMD_IIPT_IPV4_CSUM = 0x0060, /* 2 BITS */
348 ICE_TX_DESC_CMD_L4T_EOFT_TCP = 0x0100, /* 2 BITS */ 348 ICE_TX_DESC_CMD_L4T_EOFT_TCP = 0x0100, /* 2 BITS */
349 ICE_TX_DESC_CMD_L4T_EOFT_SCTP = 0x0200, /* 2 BITS */
349 ICE_TX_DESC_CMD_L4T_EOFT_UDP = 0x0300, /* 2 BITS */ 350 ICE_TX_DESC_CMD_L4T_EOFT_UDP = 0x0300, /* 2 BITS */
350}; 351};
351 352
@@ -488,5 +489,7 @@ static inline struct ice_rx_ptype_decoded ice_decode_rx_desc_ptype(u16 ptype)
488#define ICE_LINK_SPEED_20000MBPS 20000 489#define ICE_LINK_SPEED_20000MBPS 20000
489#define ICE_LINK_SPEED_25000MBPS 25000 490#define ICE_LINK_SPEED_25000MBPS 25000
490#define ICE_LINK_SPEED_40000MBPS 40000 491#define ICE_LINK_SPEED_40000MBPS 40000
492#define ICE_LINK_SPEED_50000MBPS 50000
493#define ICE_LINK_SPEED_100000MBPS 100000
491 494
492#endif /* _ICE_LAN_TX_RX_H_ */ 495#endif /* _ICE_LAN_TX_RX_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index 29b1dcfd4331..27c3760ae5cb 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -514,110 +514,89 @@ unlock_pf:
514} 514}
515 515
516/** 516/**
517 * ice_vsi_get_qs_contig - Assign a contiguous chunk of queues to VSI 517 * __ice_vsi_get_qs_contig - Assign a contiguous chunk of queues to VSI
518 * @vsi: the VSI getting queues 518 * @qs_cfg: gathered variables needed for PF->VSI queues assignment
519 * 519 *
520 * Return 0 on success and a negative value on error 520 * Return 0 on success and -ENOMEM in case of no left space in PF queue bitmap
521 */ 521 */
522static int ice_vsi_get_qs_contig(struct ice_vsi *vsi) 522static int __ice_vsi_get_qs_contig(struct ice_qs_cfg *qs_cfg)
523{ 523{
524 struct ice_pf *pf = vsi->back; 524 int offset, i;
525 int offset, ret = 0;
526
527 mutex_lock(&pf->avail_q_mutex);
528 /* look for contiguous block of queues for Tx */
529 offset = bitmap_find_next_zero_area(pf->avail_txqs, ICE_MAX_TXQS,
530 0, vsi->alloc_txq, 0);
531 if (offset < ICE_MAX_TXQS) {
532 int i;
533 525
534 bitmap_set(pf->avail_txqs, offset, vsi->alloc_txq); 526 mutex_lock(qs_cfg->qs_mutex);
535 for (i = 0; i < vsi->alloc_txq; i++) 527 offset = bitmap_find_next_zero_area(qs_cfg->pf_map, qs_cfg->pf_map_size,
536 vsi->txq_map[i] = i + offset; 528 0, qs_cfg->q_count, 0);
537 } else { 529 if (offset >= qs_cfg->pf_map_size) {
538 ret = -ENOMEM; 530 mutex_unlock(qs_cfg->qs_mutex);
539 vsi->tx_mapping_mode = ICE_VSI_MAP_SCATTER; 531 return -ENOMEM;
540 } 532 }
541 533
542 /* look for contiguous block of queues for Rx */ 534 bitmap_set(qs_cfg->pf_map, offset, qs_cfg->q_count);
543 offset = bitmap_find_next_zero_area(pf->avail_rxqs, ICE_MAX_RXQS, 535 for (i = 0; i < qs_cfg->q_count; i++)
544 0, vsi->alloc_rxq, 0); 536 qs_cfg->vsi_map[i + qs_cfg->vsi_map_offset] = i + offset;
545 if (offset < ICE_MAX_RXQS) { 537 mutex_unlock(qs_cfg->qs_mutex);
546 int i;
547
548 bitmap_set(pf->avail_rxqs, offset, vsi->alloc_rxq);
549 for (i = 0; i < vsi->alloc_rxq; i++)
550 vsi->rxq_map[i] = i + offset;
551 } else {
552 ret = -ENOMEM;
553 vsi->rx_mapping_mode = ICE_VSI_MAP_SCATTER;
554 }
555 mutex_unlock(&pf->avail_q_mutex);
556 538
557 return ret; 539 return 0;
558} 540}
559 541
560/** 542/**
561 * ice_vsi_get_qs_scatter - Assign a scattered queues to VSI 543 * __ice_vsi_get_qs_sc - Assign a scattered queues from PF to VSI
562 * @vsi: the VSI getting queues 544 * @qs_cfg: gathered variables needed for PF->VSI queues assignment
563 * 545 *
564 * Return 0 on success and a negative value on error 546 * Return 0 on success and -ENOMEM in case of no left space in PF queue bitmap
565 */ 547 */
566static int ice_vsi_get_qs_scatter(struct ice_vsi *vsi) 548static int __ice_vsi_get_qs_sc(struct ice_qs_cfg *qs_cfg)
567{ 549{
568 struct ice_pf *pf = vsi->back;
569 int i, index = 0; 550 int i, index = 0;
570 551
571 mutex_lock(&pf->avail_q_mutex); 552 mutex_lock(qs_cfg->qs_mutex);
572 553 for (i = 0; i < qs_cfg->q_count; i++) {
573 if (vsi->tx_mapping_mode == ICE_VSI_MAP_SCATTER) { 554 index = find_next_zero_bit(qs_cfg->pf_map,
574 for (i = 0; i < vsi->alloc_txq; i++) { 555 qs_cfg->pf_map_size, index);
575 index = find_next_zero_bit(pf->avail_txqs, 556 if (index >= qs_cfg->pf_map_size)
576 ICE_MAX_TXQS, index); 557 goto err_scatter;
577 if (index < ICE_MAX_TXQS) { 558 set_bit(index, qs_cfg->pf_map);
578 set_bit(index, pf->avail_txqs); 559 qs_cfg->vsi_map[i + qs_cfg->vsi_map_offset] = index;
579 vsi->txq_map[i] = index;
580 } else {
581 goto err_scatter_tx;
582 }
583 }
584 } 560 }
561 mutex_unlock(qs_cfg->qs_mutex);
585 562
586 if (vsi->rx_mapping_mode == ICE_VSI_MAP_SCATTER) {
587 for (i = 0; i < vsi->alloc_rxq; i++) {
588 index = find_next_zero_bit(pf->avail_rxqs,
589 ICE_MAX_RXQS, index);
590 if (index < ICE_MAX_RXQS) {
591 set_bit(index, pf->avail_rxqs);
592 vsi->rxq_map[i] = index;
593 } else {
594 goto err_scatter_rx;
595 }
596 }
597 }
598
599 mutex_unlock(&pf->avail_q_mutex);
600 return 0; 563 return 0;
601 564err_scatter:
602err_scatter_rx:
603 /* unflag any queues we have grabbed (i is failed position) */
604 for (index = 0; index < i; index++) {
605 clear_bit(vsi->rxq_map[index], pf->avail_rxqs);
606 vsi->rxq_map[index] = 0;
607 }
608 i = vsi->alloc_txq;
609err_scatter_tx:
610 /* i is either position of failed attempt or vsi->alloc_txq */
611 for (index = 0; index < i; index++) { 565 for (index = 0; index < i; index++) {
612 clear_bit(vsi->txq_map[index], pf->avail_txqs); 566 clear_bit(qs_cfg->vsi_map[index], qs_cfg->pf_map);
613 vsi->txq_map[index] = 0; 567 qs_cfg->vsi_map[index + qs_cfg->vsi_map_offset] = 0;
614 } 568 }
569 mutex_unlock(qs_cfg->qs_mutex);
615 570
616 mutex_unlock(&pf->avail_q_mutex);
617 return -ENOMEM; 571 return -ENOMEM;
618} 572}
619 573
620/** 574/**
575 * __ice_vsi_get_qs - helper function for assigning queues from PF to VSI
576 * @qs_cfg: gathered variables needed for PF->VSI queues assignment
577 *
578 * This is an internal function for assigning queues from the PF to VSI and
579 * initially tries to find contiguous space. If it is not successful to find
580 * contiguous space, then it tries with the scatter approach.
581 *
582 * Return 0 on success and -ENOMEM in case of no left space in PF queue bitmap
583 */
584static int __ice_vsi_get_qs(struct ice_qs_cfg *qs_cfg)
585{
586 int ret = 0;
587
588 ret = __ice_vsi_get_qs_contig(qs_cfg);
589 if (ret) {
590 /* contig failed, so try with scatter approach */
591 qs_cfg->mapping_mode = ICE_VSI_MAP_SCATTER;
592 qs_cfg->q_count = min_t(u16, qs_cfg->q_count,
593 qs_cfg->scatter_count);
594 ret = __ice_vsi_get_qs_sc(qs_cfg);
595 }
596 return ret;
597}
598
599/**
621 * ice_vsi_get_qs - Assign queues from PF to VSI 600 * ice_vsi_get_qs - Assign queues from PF to VSI
622 * @vsi: the VSI to assign queues to 601 * @vsi: the VSI to assign queues to
623 * 602 *
@@ -625,25 +604,35 @@ err_scatter_tx:
625 */ 604 */
626static int ice_vsi_get_qs(struct ice_vsi *vsi) 605static int ice_vsi_get_qs(struct ice_vsi *vsi)
627{ 606{
607 struct ice_pf *pf = vsi->back;
608 struct ice_qs_cfg tx_qs_cfg = {
609 .qs_mutex = &pf->avail_q_mutex,
610 .pf_map = pf->avail_txqs,
611 .pf_map_size = ICE_MAX_TXQS,
612 .q_count = vsi->alloc_txq,
613 .scatter_count = ICE_MAX_SCATTER_TXQS,
614 .vsi_map = vsi->txq_map,
615 .vsi_map_offset = 0,
616 .mapping_mode = vsi->tx_mapping_mode
617 };
618 struct ice_qs_cfg rx_qs_cfg = {
619 .qs_mutex = &pf->avail_q_mutex,
620 .pf_map = pf->avail_rxqs,
621 .pf_map_size = ICE_MAX_RXQS,
622 .q_count = vsi->alloc_rxq,
623 .scatter_count = ICE_MAX_SCATTER_RXQS,
624 .vsi_map = vsi->rxq_map,
625 .vsi_map_offset = 0,
626 .mapping_mode = vsi->rx_mapping_mode
627 };
628 int ret = 0; 628 int ret = 0;
629 629
630 vsi->tx_mapping_mode = ICE_VSI_MAP_CONTIG; 630 vsi->tx_mapping_mode = ICE_VSI_MAP_CONTIG;
631 vsi->rx_mapping_mode = ICE_VSI_MAP_CONTIG; 631 vsi->rx_mapping_mode = ICE_VSI_MAP_CONTIG;
632 632
633 /* NOTE: ice_vsi_get_qs_contig() will set the Rx/Tx mapping 633 ret = __ice_vsi_get_qs(&tx_qs_cfg);
634 * modes individually to scatter if assigning contiguous queues 634 if (!ret)
635 * to Rx or Tx fails 635 ret = __ice_vsi_get_qs(&rx_qs_cfg);
636 */
637 ret = ice_vsi_get_qs_contig(vsi);
638 if (ret < 0) {
639 if (vsi->tx_mapping_mode == ICE_VSI_MAP_SCATTER)
640 vsi->alloc_txq = max_t(u16, vsi->alloc_txq,
641 ICE_MAX_SCATTER_TXQS);
642 if (vsi->rx_mapping_mode == ICE_VSI_MAP_SCATTER)
643 vsi->alloc_rxq = max_t(u16, vsi->alloc_rxq,
644 ICE_MAX_SCATTER_RXQS);
645 ret = ice_vsi_get_qs_scatter(vsi);
646 }
647 636
648 return ret; 637 return ret;
649} 638}
@@ -1614,11 +1603,14 @@ setup_rings:
1614/** 1603/**
1615 * ice_vsi_cfg_txqs - Configure the VSI for Tx 1604 * ice_vsi_cfg_txqs - Configure the VSI for Tx
1616 * @vsi: the VSI being configured 1605 * @vsi: the VSI being configured
1606 * @rings: Tx ring array to be configured
1607 * @offset: offset within vsi->txq_map
1617 * 1608 *
1618 * Return 0 on success and a negative value on error 1609 * Return 0 on success and a negative value on error
1619 * Configure the Tx VSI for operation. 1610 * Configure the Tx VSI for operation.
1620 */ 1611 */
1621int ice_vsi_cfg_txqs(struct ice_vsi *vsi) 1612static int
1613ice_vsi_cfg_txqs(struct ice_vsi *vsi, struct ice_ring **rings, int offset)
1622{ 1614{
1623 struct ice_aqc_add_tx_qgrp *qg_buf; 1615 struct ice_aqc_add_tx_qgrp *qg_buf;
1624 struct ice_aqc_add_txqs_perq *txq; 1616 struct ice_aqc_add_txqs_perq *txq;
@@ -1626,7 +1618,7 @@ int ice_vsi_cfg_txqs(struct ice_vsi *vsi)
1626 u8 num_q_grps, q_idx = 0; 1618 u8 num_q_grps, q_idx = 0;
1627 enum ice_status status; 1619 enum ice_status status;
1628 u16 buf_len, i, pf_q; 1620 u16 buf_len, i, pf_q;
1629 int err = 0, tc = 0; 1621 int err = 0, tc;
1630 1622
1631 buf_len = sizeof(struct ice_aqc_add_tx_qgrp); 1623 buf_len = sizeof(struct ice_aqc_add_tx_qgrp);
1632 qg_buf = devm_kzalloc(&pf->pdev->dev, buf_len, GFP_KERNEL); 1624 qg_buf = devm_kzalloc(&pf->pdev->dev, buf_len, GFP_KERNEL);
@@ -1644,9 +1636,8 @@ int ice_vsi_cfg_txqs(struct ice_vsi *vsi)
1644 for (i = 0; i < vsi->tc_cfg.tc_info[tc].qcount_tx; i++) { 1636 for (i = 0; i < vsi->tc_cfg.tc_info[tc].qcount_tx; i++) {
1645 struct ice_tlan_ctx tlan_ctx = { 0 }; 1637 struct ice_tlan_ctx tlan_ctx = { 0 };
1646 1638
1647 pf_q = vsi->txq_map[q_idx]; 1639 pf_q = vsi->txq_map[q_idx + offset];
1648 ice_setup_tx_ctx(vsi->tx_rings[q_idx], &tlan_ctx, 1640 ice_setup_tx_ctx(rings[q_idx], &tlan_ctx, pf_q);
1649 pf_q);
1650 /* copy context contents into the qg_buf */ 1641 /* copy context contents into the qg_buf */
1651 qg_buf->txqs[0].txq_id = cpu_to_le16(pf_q); 1642 qg_buf->txqs[0].txq_id = cpu_to_le16(pf_q);
1652 ice_set_ctx((u8 *)&tlan_ctx, qg_buf->txqs[0].txq_ctx, 1643 ice_set_ctx((u8 *)&tlan_ctx, qg_buf->txqs[0].txq_ctx,
@@ -1655,7 +1646,7 @@ int ice_vsi_cfg_txqs(struct ice_vsi *vsi)
1655 /* init queue specific tail reg. It is referred as 1646 /* init queue specific tail reg. It is referred as
1656 * transmit comm scheduler queue doorbell. 1647 * transmit comm scheduler queue doorbell.
1657 */ 1648 */
1658 vsi->tx_rings[q_idx]->tail = 1649 rings[q_idx]->tail =
1659 pf->hw.hw_addr + QTX_COMM_DBELL(pf_q); 1650 pf->hw.hw_addr + QTX_COMM_DBELL(pf_q);
1660 status = ice_ena_vsi_txq(vsi->port_info, vsi->idx, tc, 1651 status = ice_ena_vsi_txq(vsi->port_info, vsi->idx, tc,
1661 num_q_grps, qg_buf, buf_len, 1652 num_q_grps, qg_buf, buf_len,
@@ -1674,7 +1665,7 @@ int ice_vsi_cfg_txqs(struct ice_vsi *vsi)
1674 */ 1665 */
1675 txq = &qg_buf->txqs[0]; 1666 txq = &qg_buf->txqs[0];
1676 if (pf_q == le16_to_cpu(txq->txq_id)) 1667 if (pf_q == le16_to_cpu(txq->txq_id))
1677 vsi->tx_rings[q_idx]->txq_teid = 1668 rings[q_idx]->txq_teid =
1678 le32_to_cpu(txq->q_teid); 1669 le32_to_cpu(txq->q_teid);
1679 1670
1680 q_idx++; 1671 q_idx++;
@@ -1686,6 +1677,18 @@ err_cfg_txqs:
1686} 1677}
1687 1678
1688/** 1679/**
1680 * ice_vsi_cfg_lan_txqs - Configure the VSI for Tx
1681 * @vsi: the VSI being configured
1682 *
1683 * Return 0 on success and a negative value on error
1684 * Configure the Tx VSI for operation.
1685 */
1686int ice_vsi_cfg_lan_txqs(struct ice_vsi *vsi)
1687{
1688 return ice_vsi_cfg_txqs(vsi, vsi->tx_rings, 0);
1689}
1690
1691/**
1689 * ice_intrl_usec_to_reg - convert interrupt rate limit to register value 1692 * ice_intrl_usec_to_reg - convert interrupt rate limit to register value
1690 * @intrl: interrupt rate limit in usecs 1693 * @intrl: interrupt rate limit in usecs
1691 * @gran: interrupt rate limit granularity in usecs 1694 * @gran: interrupt rate limit granularity in usecs
@@ -1714,22 +1717,34 @@ static u32 ice_intrl_usec_to_reg(u8 intrl, u8 gran)
1714static void 1717static void
1715ice_cfg_itr(struct ice_hw *hw, struct ice_q_vector *q_vector, u16 vector) 1718ice_cfg_itr(struct ice_hw *hw, struct ice_q_vector *q_vector, u16 vector)
1716{ 1719{
1717 u8 itr_gran = hw->itr_gran;
1718
1719 if (q_vector->num_ring_rx) { 1720 if (q_vector->num_ring_rx) {
1720 struct ice_ring_container *rc = &q_vector->rx; 1721 struct ice_ring_container *rc = &q_vector->rx;
1721 1722
1722 rc->itr = ITR_TO_REG(ICE_DFLT_RX_ITR, itr_gran); 1723 /* if this value is set then don't overwrite with default */
1724 if (!rc->itr_setting)
1725 rc->itr_setting = ICE_DFLT_RX_ITR;
1726
1727 rc->target_itr = ITR_TO_REG(rc->itr_setting);
1728 rc->next_update = jiffies + 1;
1729 rc->current_itr = rc->target_itr;
1723 rc->latency_range = ICE_LOW_LATENCY; 1730 rc->latency_range = ICE_LOW_LATENCY;
1724 wr32(hw, GLINT_ITR(rc->itr_idx, vector), rc->itr); 1731 wr32(hw, GLINT_ITR(rc->itr_idx, vector),
1732 ITR_REG_ALIGN(rc->current_itr) >> ICE_ITR_GRAN_S);
1725 } 1733 }
1726 1734
1727 if (q_vector->num_ring_tx) { 1735 if (q_vector->num_ring_tx) {
1728 struct ice_ring_container *rc = &q_vector->tx; 1736 struct ice_ring_container *rc = &q_vector->tx;
1729 1737
1730 rc->itr = ITR_TO_REG(ICE_DFLT_TX_ITR, itr_gran); 1738 /* if this value is set then don't overwrite with default */
1739 if (!rc->itr_setting)
1740 rc->itr_setting = ICE_DFLT_TX_ITR;
1741
1742 rc->target_itr = ITR_TO_REG(rc->itr_setting);
1743 rc->next_update = jiffies + 1;
1744 rc->current_itr = rc->target_itr;
1731 rc->latency_range = ICE_LOW_LATENCY; 1745 rc->latency_range = ICE_LOW_LATENCY;
1732 wr32(hw, GLINT_ITR(rc->itr_idx, vector), rc->itr); 1746 wr32(hw, GLINT_ITR(rc->itr_idx, vector),
1747 ITR_REG_ALIGN(rc->current_itr) >> ICE_ITR_GRAN_S);
1733 } 1748 }
1734} 1749}
1735 1750
@@ -1897,9 +1912,12 @@ int ice_vsi_stop_rx_rings(struct ice_vsi *vsi)
1897 * @vsi: the VSI being configured 1912 * @vsi: the VSI being configured
1898 * @rst_src: reset source 1913 * @rst_src: reset source
1899 * @rel_vmvf_num: Relative id of VF/VM 1914 * @rel_vmvf_num: Relative id of VF/VM
1915 * @rings: Tx ring array to be stopped
1916 * @offset: offset within vsi->txq_map
1900 */ 1917 */
1901int ice_vsi_stop_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src, 1918static int
1902 u16 rel_vmvf_num) 1919ice_vsi_stop_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
1920 u16 rel_vmvf_num, struct ice_ring **rings, int offset)
1903{ 1921{
1904 struct ice_pf *pf = vsi->back; 1922 struct ice_pf *pf = vsi->back;
1905 struct ice_hw *hw = &pf->hw; 1923 struct ice_hw *hw = &pf->hw;
@@ -1927,19 +1945,18 @@ int ice_vsi_stop_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
1927 ice_for_each_txq(vsi, i) { 1945 ice_for_each_txq(vsi, i) {
1928 u16 v_idx; 1946 u16 v_idx;
1929 1947
1930 if (!vsi->tx_rings || !vsi->tx_rings[i] || 1948 if (!rings || !rings[i] || !rings[i]->q_vector) {
1931 !vsi->tx_rings[i]->q_vector) {
1932 err = -EINVAL; 1949 err = -EINVAL;
1933 goto err_out; 1950 goto err_out;
1934 } 1951 }
1935 1952
1936 q_ids[i] = vsi->txq_map[i]; 1953 q_ids[i] = vsi->txq_map[i + offset];
1937 q_teids[i] = vsi->tx_rings[i]->txq_teid; 1954 q_teids[i] = rings[i]->txq_teid;
1938 1955
1939 /* clear cause_ena bit for disabled queues */ 1956 /* clear cause_ena bit for disabled queues */
1940 val = rd32(hw, QINT_TQCTL(vsi->tx_rings[i]->reg_idx)); 1957 val = rd32(hw, QINT_TQCTL(rings[i]->reg_idx));
1941 val &= ~QINT_TQCTL_CAUSE_ENA_M; 1958 val &= ~QINT_TQCTL_CAUSE_ENA_M;
1942 wr32(hw, QINT_TQCTL(vsi->tx_rings[i]->reg_idx), val); 1959 wr32(hw, QINT_TQCTL(rings[i]->reg_idx), val);
1943 1960
1944 /* software is expected to wait for 100 ns */ 1961 /* software is expected to wait for 100 ns */
1945 ndelay(100); 1962 ndelay(100);
@@ -1947,7 +1964,7 @@ int ice_vsi_stop_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
1947 /* trigger a software interrupt for the vector associated to 1964 /* trigger a software interrupt for the vector associated to
1948 * the queue to schedule NAPI handler 1965 * the queue to schedule NAPI handler
1949 */ 1966 */
1950 v_idx = vsi->tx_rings[i]->q_vector->v_idx; 1967 v_idx = rings[i]->q_vector->v_idx;
1951 wr32(hw, GLINT_DYN_CTL(vsi->hw_base_vector + v_idx), 1968 wr32(hw, GLINT_DYN_CTL(vsi->hw_base_vector + v_idx),
1952 GLINT_DYN_CTL_SWINT_TRIG_M | GLINT_DYN_CTL_INTENA_MSK_M); 1969 GLINT_DYN_CTL_SWINT_TRIG_M | GLINT_DYN_CTL_INTENA_MSK_M);
1953 } 1970 }
@@ -1977,6 +1994,19 @@ err_alloc_q_ids:
1977} 1994}
1978 1995
1979/** 1996/**
1997 * ice_vsi_stop_lan_tx_rings - Disable LAN Tx rings
1998 * @vsi: the VSI being configured
1999 * @rst_src: reset source
2000 * @rel_vmvf_num: Relative id of VF/VM
2001 */
2002int ice_vsi_stop_lan_tx_rings(struct ice_vsi *vsi,
2003 enum ice_disq_rst_src rst_src, u16 rel_vmvf_num)
2004{
2005 return ice_vsi_stop_tx_rings(vsi, rst_src, rel_vmvf_num, vsi->tx_rings,
2006 0);
2007}
2008
2009/**
1980 * ice_cfg_vlan_pruning - enable or disable VLAN pruning on the VSI 2010 * ice_cfg_vlan_pruning - enable or disable VLAN pruning on the VSI
1981 * @vsi: VSI to enable or disable VLAN pruning on 2011 * @vsi: VSI to enable or disable VLAN pruning on
1982 * @ena: set to true to enable VLAN pruning and false to disable it 2012 * @ena: set to true to enable VLAN pruning and false to disable it
@@ -2581,6 +2611,12 @@ int ice_vsi_rebuild(struct ice_vsi *vsi)
2581 goto err_vectors; 2611 goto err_vectors;
2582 2612
2583 ice_vsi_map_rings_to_vectors(vsi); 2613 ice_vsi_map_rings_to_vectors(vsi);
2614 /* Do not exit if configuring RSS had an issue, at least
2615 * receive traffic on first queue. Hence no need to capture
2616 * return value
2617 */
2618 if (test_bit(ICE_FLAG_RSS_ENA, vsi->back->flags))
2619 ice_vsi_cfg_rss_lut_key(vsi);
2584 break; 2620 break;
2585 case ICE_VSI_VF: 2621 case ICE_VSI_VF:
2586 ret = ice_vsi_alloc_q_vectors(vsi); 2622 ret = ice_vsi_alloc_q_vectors(vsi);
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.h b/drivers/net/ethernet/intel/ice/ice_lib.h
index 3831b4f0960a..7988a53729a9 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.h
+++ b/drivers/net/ethernet/intel/ice/ice_lib.h
@@ -15,7 +15,7 @@ void ice_update_eth_stats(struct ice_vsi *vsi);
15 15
16int ice_vsi_cfg_rxqs(struct ice_vsi *vsi); 16int ice_vsi_cfg_rxqs(struct ice_vsi *vsi);
17 17
18int ice_vsi_cfg_txqs(struct ice_vsi *vsi); 18int ice_vsi_cfg_lan_txqs(struct ice_vsi *vsi);
19 19
20void ice_vsi_cfg_msix(struct ice_vsi *vsi); 20void ice_vsi_cfg_msix(struct ice_vsi *vsi);
21 21
@@ -31,7 +31,8 @@ int ice_vsi_start_rx_rings(struct ice_vsi *vsi);
31 31
32int ice_vsi_stop_rx_rings(struct ice_vsi *vsi); 32int ice_vsi_stop_rx_rings(struct ice_vsi *vsi);
33 33
34int ice_vsi_stop_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src, 34int
35ice_vsi_stop_lan_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
35 u16 rel_vmvf_num); 36 u16 rel_vmvf_num);
36 37
37int ice_cfg_vlan_pruning(struct ice_vsi *vsi, bool ena); 38int ice_cfg_vlan_pruning(struct ice_vsi *vsi, bool ena);
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 8725569d11f0..48f033928aa2 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -1389,7 +1389,6 @@ static int ice_req_irq_msix_misc(struct ice_pf *pf)
1389{ 1389{
1390 struct ice_hw *hw = &pf->hw; 1390 struct ice_hw *hw = &pf->hw;
1391 int oicr_idx, err = 0; 1391 int oicr_idx, err = 0;
1392 u8 itr_gran;
1393 u32 val; 1392 u32 val;
1394 1393
1395 if (!pf->int_name[0]) 1394 if (!pf->int_name[0])
@@ -1453,10 +1452,8 @@ skip_req_irq:
1453 PFINT_MBX_CTL_CAUSE_ENA_M); 1452 PFINT_MBX_CTL_CAUSE_ENA_M);
1454 wr32(hw, PFINT_MBX_CTL, val); 1453 wr32(hw, PFINT_MBX_CTL, val);
1455 1454
1456 itr_gran = hw->itr_gran;
1457
1458 wr32(hw, GLINT_ITR(ICE_RX_ITR, pf->hw_oicr_idx), 1455 wr32(hw, GLINT_ITR(ICE_RX_ITR, pf->hw_oicr_idx),
1459 ITR_TO_REG(ICE_ITR_8K, itr_gran)); 1456 ITR_REG_ALIGN(ICE_ITR_8K) >> ICE_ITR_GRAN_S);
1460 1457
1461 ice_flush(hw); 1458 ice_flush(hw);
1462 ice_irq_dynamic_ena(hw, NULL, NULL); 1459 ice_irq_dynamic_ena(hw, NULL, NULL);
@@ -1531,6 +1528,7 @@ static int ice_cfg_netdev(struct ice_vsi *vsi)
1531 1528
1532 csumo_features = NETIF_F_RXCSUM | 1529 csumo_features = NETIF_F_RXCSUM |
1533 NETIF_F_IP_CSUM | 1530 NETIF_F_IP_CSUM |
1531 NETIF_F_SCTP_CRC |
1534 NETIF_F_IPV6_CSUM; 1532 NETIF_F_IPV6_CSUM;
1535 1533
1536 vlano_features = NETIF_F_HW_VLAN_CTAG_FILTER | 1534 vlano_features = NETIF_F_HW_VLAN_CTAG_FILTER |
@@ -1998,6 +1996,23 @@ static int ice_init_interrupt_scheme(struct ice_pf *pf)
1998} 1996}
1999 1997
2000/** 1998/**
1999 * ice_verify_itr_gran - verify driver's assumption of ITR granularity
2000 * @pf: pointer to the PF structure
2001 *
2002 * There is no error returned here because the driver will be able to handle a
2003 * different ITR granularity, but interrupt moderation will not be accurate if
2004 * the driver's assumptions are not verified. This assumption is made so we can
2005 * use constants in the hot path instead of accessing structure members.
2006 */
2007static void ice_verify_itr_gran(struct ice_pf *pf)
2008{
2009 if (pf->hw.itr_gran != (ICE_ITR_GRAN_S << 1))
2010 dev_warn(&pf->pdev->dev,
2011 "%d ITR granularity assumption is invalid, actual ITR granularity is %d. Interrupt moderation will be inaccurate!\n",
2012 (ICE_ITR_GRAN_S << 1), pf->hw.itr_gran);
2013}
2014
2015/**
2001 * ice_verify_cacheline_size - verify driver's assumption of 64 Byte cache lines 2016 * ice_verify_cacheline_size - verify driver's assumption of 64 Byte cache lines
2002 * @pf: pointer to the PF structure 2017 * @pf: pointer to the PF structure
2003 * 2018 *
@@ -2163,6 +2178,7 @@ static int ice_probe(struct pci_dev *pdev,
2163 mod_timer(&pf->serv_tmr, round_jiffies(jiffies + pf->serv_tmr_period)); 2178 mod_timer(&pf->serv_tmr, round_jiffies(jiffies + pf->serv_tmr_period));
2164 2179
2165 ice_verify_cacheline_size(pf); 2180 ice_verify_cacheline_size(pf);
2181 ice_verify_itr_gran(pf);
2166 2182
2167 return 0; 2183 return 0;
2168 2184
@@ -2422,7 +2438,8 @@ static void ice_set_rx_mode(struct net_device *netdev)
2422 */ 2438 */
2423static int ice_fdb_add(struct ndmsg *ndm, struct nlattr __always_unused *tb[], 2439static int ice_fdb_add(struct ndmsg *ndm, struct nlattr __always_unused *tb[],
2424 struct net_device *dev, const unsigned char *addr, 2440 struct net_device *dev, const unsigned char *addr,
2425 u16 vid, u16 flags) 2441 u16 vid, u16 flags,
2442 struct netlink_ext_ack *extack)
2426{ 2443{
2427 int err; 2444 int err;
2428 2445
@@ -2546,7 +2563,8 @@ static int ice_vsi_cfg(struct ice_vsi *vsi)
2546 if (err) 2563 if (err)
2547 return err; 2564 return err;
2548 } 2565 }
2549 err = ice_vsi_cfg_txqs(vsi); 2566
2567 err = ice_vsi_cfg_lan_txqs(vsi);
2550 if (!err) 2568 if (!err)
2551 err = ice_vsi_cfg_rxqs(vsi); 2569 err = ice_vsi_cfg_rxqs(vsi);
2552 2570
@@ -2945,12 +2963,91 @@ static void ice_napi_disable_all(struct ice_vsi *vsi)
2945} 2963}
2946 2964
2947/** 2965/**
2966 * ice_force_phys_link_state - Force the physical link state
2967 * @vsi: VSI to force the physical link state to up/down
2968 * @link_up: true/false indicates to set the physical link to up/down
2969 *
2970 * Force the physical link state by getting the current PHY capabilities from
2971 * hardware and setting the PHY config based on the determined capabilities. If
2972 * link changes a link event will be triggered because both the Enable Automatic
2973 * Link Update and LESM Enable bits are set when setting the PHY capabilities.
2974 *
2975 * Returns 0 on success, negative on failure
2976 */
2977static int ice_force_phys_link_state(struct ice_vsi *vsi, bool link_up)
2978{
2979 struct ice_aqc_get_phy_caps_data *pcaps;
2980 struct ice_aqc_set_phy_cfg_data *cfg;
2981 struct ice_port_info *pi;
2982 struct device *dev;
2983 int retcode;
2984
2985 if (!vsi || !vsi->port_info || !vsi->back)
2986 return -EINVAL;
2987 if (vsi->type != ICE_VSI_PF)
2988 return 0;
2989
2990 dev = &vsi->back->pdev->dev;
2991
2992 pi = vsi->port_info;
2993
2994 pcaps = devm_kzalloc(dev, sizeof(*pcaps), GFP_KERNEL);
2995 if (!pcaps)
2996 return -ENOMEM;
2997
2998 retcode = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_SW_CFG, pcaps,
2999 NULL);
3000 if (retcode) {
3001 dev_err(dev,
3002 "Failed to get phy capabilities, VSI %d error %d\n",
3003 vsi->vsi_num, retcode);
3004 retcode = -EIO;
3005 goto out;
3006 }
3007
3008 /* No change in link */
3009 if (link_up == !!(pcaps->caps & ICE_AQC_PHY_EN_LINK) &&
3010 link_up == !!(pi->phy.link_info.link_info & ICE_AQ_LINK_UP))
3011 goto out;
3012
3013 cfg = devm_kzalloc(dev, sizeof(*cfg), GFP_KERNEL);
3014 if (!cfg) {
3015 retcode = -ENOMEM;
3016 goto out;
3017 }
3018
3019 cfg->phy_type_low = pcaps->phy_type_low;
3020 cfg->phy_type_high = pcaps->phy_type_high;
3021 cfg->caps = pcaps->caps | ICE_AQ_PHY_ENA_AUTO_LINK_UPDT;
3022 cfg->low_power_ctrl = pcaps->low_power_ctrl;
3023 cfg->eee_cap = pcaps->eee_cap;
3024 cfg->eeer_value = pcaps->eeer_value;
3025 cfg->link_fec_opt = pcaps->link_fec_options;
3026 if (link_up)
3027 cfg->caps |= ICE_AQ_PHY_ENA_LINK;
3028 else
3029 cfg->caps &= ~ICE_AQ_PHY_ENA_LINK;
3030
3031 retcode = ice_aq_set_phy_cfg(&vsi->back->hw, pi->lport, cfg, NULL);
3032 if (retcode) {
3033 dev_err(dev, "Failed to set phy config, VSI %d error %d\n",
3034 vsi->vsi_num, retcode);
3035 retcode = -EIO;
3036 }
3037
3038 devm_kfree(dev, cfg);
3039out:
3040 devm_kfree(dev, pcaps);
3041 return retcode;
3042}
3043
3044/**
2948 * ice_down - Shutdown the connection 3045 * ice_down - Shutdown the connection
2949 * @vsi: The VSI being stopped 3046 * @vsi: The VSI being stopped
2950 */ 3047 */
2951int ice_down(struct ice_vsi *vsi) 3048int ice_down(struct ice_vsi *vsi)
2952{ 3049{
2953 int i, tx_err, rx_err; 3050 int i, tx_err, rx_err, link_err = 0;
2954 3051
2955 /* Caller of this function is expected to set the 3052 /* Caller of this function is expected to set the
2956 * vsi->state __ICE_DOWN bit 3053 * vsi->state __ICE_DOWN bit
@@ -2961,7 +3058,8 @@ int ice_down(struct ice_vsi *vsi)
2961 } 3058 }
2962 3059
2963 ice_vsi_dis_irq(vsi); 3060 ice_vsi_dis_irq(vsi);
2964 tx_err = ice_vsi_stop_tx_rings(vsi, ICE_NO_RESET, 0); 3061
3062 tx_err = ice_vsi_stop_lan_tx_rings(vsi, ICE_NO_RESET, 0);
2965 if (tx_err) 3063 if (tx_err)
2966 netdev_err(vsi->netdev, 3064 netdev_err(vsi->netdev,
2967 "Failed stop Tx rings, VSI %d error %d\n", 3065 "Failed stop Tx rings, VSI %d error %d\n",
@@ -2975,13 +3073,21 @@ int ice_down(struct ice_vsi *vsi)
2975 3073
2976 ice_napi_disable_all(vsi); 3074 ice_napi_disable_all(vsi);
2977 3075
3076 if (test_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, vsi->back->flags)) {
3077 link_err = ice_force_phys_link_state(vsi, false);
3078 if (link_err)
3079 netdev_err(vsi->netdev,
3080 "Failed to set physical link down, VSI %d error %d\n",
3081 vsi->vsi_num, link_err);
3082 }
3083
2978 ice_for_each_txq(vsi, i) 3084 ice_for_each_txq(vsi, i)
2979 ice_clean_tx_ring(vsi->tx_rings[i]); 3085 ice_clean_tx_ring(vsi->tx_rings[i]);
2980 3086
2981 ice_for_each_rxq(vsi, i) 3087 ice_for_each_rxq(vsi, i)
2982 ice_clean_rx_ring(vsi->rx_rings[i]); 3088 ice_clean_rx_ring(vsi->rx_rings[i]);
2983 3089
2984 if (tx_err || rx_err) { 3090 if (tx_err || rx_err || link_err) {
2985 netdev_err(vsi->netdev, 3091 netdev_err(vsi->netdev,
2986 "Failed to close VSI 0x%04X on switch 0x%04X\n", 3092 "Failed to close VSI 0x%04X on switch 0x%04X\n",
2987 vsi->vsi_num, vsi->vsw->sw_id); 3093 vsi->vsi_num, vsi->vsw->sw_id);
@@ -3641,7 +3747,8 @@ static int ice_vsi_update_bridge_mode(struct ice_vsi *vsi, u16 bmode)
3641 */ 3747 */
3642static int 3748static int
3643ice_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh, 3749ice_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh,
3644 u16 __always_unused flags, struct netlink_ext_ack *extack) 3750 u16 __always_unused flags,
3751 struct netlink_ext_ack __always_unused *extack)
3645{ 3752{
3646 struct ice_netdev_priv *np = netdev_priv(dev); 3753 struct ice_netdev_priv *np = netdev_priv(dev);
3647 struct ice_pf *pf = np->vsi->back; 3754 struct ice_pf *pf = np->vsi->back;
@@ -3814,8 +3921,14 @@ static int ice_open(struct net_device *netdev)
3814 3921
3815 netif_carrier_off(netdev); 3922 netif_carrier_off(netdev);
3816 3923
3817 err = ice_vsi_open(vsi); 3924 err = ice_force_phys_link_state(vsi, true);
3925 if (err) {
3926 netdev_err(netdev,
3927 "Failed to set physical link up, error %d\n", err);
3928 return err;
3929 }
3818 3930
3931 err = ice_vsi_open(vsi);
3819 if (err) 3932 if (err)
3820 netdev_err(netdev, "Failed to open VSI 0x%04X on switch 0x%04X\n", 3933 netdev_err(netdev, "Failed to open VSI 0x%04X on switch 0x%04X\n",
3821 vsi->vsi_num, vsi->vsw->sw_id); 3934 vsi->vsi_num, vsi->vsw->sw_id);
diff --git a/drivers/net/ethernet/intel/ice/ice_nvm.c b/drivers/net/ethernet/intel/ice/ice_nvm.c
index 3274c543283c..ce64cecdae9c 100644
--- a/drivers/net/ethernet/intel/ice/ice_nvm.c
+++ b/drivers/net/ethernet/intel/ice/ice_nvm.c
@@ -125,6 +125,62 @@ ice_read_sr_word_aq(struct ice_hw *hw, u16 offset, u16 *data)
125} 125}
126 126
127/** 127/**
128 * ice_read_sr_buf_aq - Reads Shadow RAM buf via AQ
129 * @hw: pointer to the HW structure
130 * @offset: offset of the Shadow RAM word to read (0x000000 - 0x001FFF)
131 * @words: (in) number of words to read; (out) number of words actually read
132 * @data: words read from the Shadow RAM
133 *
134 * Reads 16 bit words (data buf) from the SR using the ice_read_sr_aq
135 * method. Ownership of the NVM is taken before reading the buffer and later
136 * released.
137 */
138static enum ice_status
139ice_read_sr_buf_aq(struct ice_hw *hw, u16 offset, u16 *words, u16 *data)
140{
141 enum ice_status status;
142 bool last_cmd = false;
143 u16 words_read = 0;
144 u16 i = 0;
145
146 do {
147 u16 read_size, off_w;
148
149 /* Calculate number of bytes we should read in this step.
150 * It's not allowed to read more than one page at a time or
151 * to cross page boundaries.
152 */
153 off_w = offset % ICE_SR_SECTOR_SIZE_IN_WORDS;
154 read_size = off_w ?
155 min(*words,
156 (u16)(ICE_SR_SECTOR_SIZE_IN_WORDS - off_w)) :
157 min((*words - words_read), ICE_SR_SECTOR_SIZE_IN_WORDS);
158
159 /* Check if this is last command, if so set proper flag */
160 if ((words_read + read_size) >= *words)
161 last_cmd = true;
162
163 status = ice_read_sr_aq(hw, offset, read_size,
164 data + words_read, last_cmd);
165 if (status)
166 goto read_nvm_buf_aq_exit;
167
168 /* Increment counter for words already read and move offset to
169 * new read location
170 */
171 words_read += read_size;
172 offset += read_size;
173 } while (words_read < *words);
174
175 for (i = 0; i < *words; i++)
176 data[i] = le16_to_cpu(((__le16 *)data)[i]);
177
178read_nvm_buf_aq_exit:
179 *words = words_read;
180 return status;
181}
182
183/**
128 * ice_acquire_nvm - Generic request for acquiring the NVM ownership 184 * ice_acquire_nvm - Generic request for acquiring the NVM ownership
129 * @hw: pointer to the HW structure 185 * @hw: pointer to the HW structure
130 * @access: NVM access type (read or write) 186 * @access: NVM access type (read or write)
@@ -234,3 +290,28 @@ enum ice_status ice_init_nvm(struct ice_hw *hw)
234 290
235 return status; 291 return status;
236} 292}
293
294/**
295 * ice_read_sr_buf - Reads Shadow RAM buf and acquire lock if necessary
296 * @hw: pointer to the HW structure
297 * @offset: offset of the Shadow RAM word to read (0x000000 - 0x001FFF)
298 * @words: (in) number of words to read; (out) number of words actually read
299 * @data: words read from the Shadow RAM
300 *
301 * Reads 16 bit words (data buf) from the SR using the ice_read_nvm_buf_aq
302 * method. The buf read is preceded by the NVM ownership take
303 * and followed by the release.
304 */
305enum ice_status
306ice_read_sr_buf(struct ice_hw *hw, u16 offset, u16 *words, u16 *data)
307{
308 enum ice_status status;
309
310 status = ice_acquire_nvm(hw, ICE_RES_READ);
311 if (!status) {
312 status = ice_read_sr_buf_aq(hw, offset, words, data);
313 ice_release_nvm(hw);
314 }
315
316 return status;
317}
diff --git a/drivers/net/ethernet/intel/ice/ice_sched.c b/drivers/net/ethernet/intel/ice/ice_sched.c
index a1681853df2e..fb38e8be1e2e 100644
--- a/drivers/net/ethernet/intel/ice/ice_sched.c
+++ b/drivers/net/ethernet/intel/ice/ice_sched.c
@@ -85,37 +85,59 @@ ice_sched_find_node_by_teid(struct ice_sched_node *start_node, u32 teid)
85} 85}
86 86
87/** 87/**
88 * ice_aq_query_sched_elems - query scheduler elements 88 * ice_aqc_send_sched_elem_cmd - send scheduling elements cmd
89 * @hw: pointer to the hw struct 89 * @hw: pointer to the hw struct
90 * @elems_req: number of elements to query 90 * @cmd_opc: cmd opcode
91 * @elems_req: number of elements to request
91 * @buf: pointer to buffer 92 * @buf: pointer to buffer
92 * @buf_size: buffer size in bytes 93 * @buf_size: buffer size in bytes
93 * @elems_ret: returns total number of elements returned 94 * @elems_resp: returns total number of elements response
94 * @cd: pointer to command details structure or NULL 95 * @cd: pointer to command details structure or NULL
95 * 96 *
96 * Query scheduling elements (0x0404) 97 * This function sends a scheduling elements cmd (cmd_opc)
97 */ 98 */
98static enum ice_status 99static enum ice_status
99ice_aq_query_sched_elems(struct ice_hw *hw, u16 elems_req, 100ice_aqc_send_sched_elem_cmd(struct ice_hw *hw, enum ice_adminq_opc cmd_opc,
100 struct ice_aqc_get_elem *buf, u16 buf_size, 101 u16 elems_req, void *buf, u16 buf_size,
101 u16 *elems_ret, struct ice_sq_cd *cd) 102 u16 *elems_resp, struct ice_sq_cd *cd)
102{ 103{
103 struct ice_aqc_get_cfg_elem *cmd; 104 struct ice_aqc_sched_elem_cmd *cmd;
104 struct ice_aq_desc desc; 105 struct ice_aq_desc desc;
105 enum ice_status status; 106 enum ice_status status;
106 107
107 cmd = &desc.params.get_update_elem; 108 cmd = &desc.params.sched_elem_cmd;
108 ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_sched_elems); 109 ice_fill_dflt_direct_cmd_desc(&desc, cmd_opc);
109 cmd->num_elem_req = cpu_to_le16(elems_req); 110 cmd->num_elem_req = cpu_to_le16(elems_req);
110 desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD); 111 desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
111 status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd); 112 status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
112 if (!status && elems_ret) 113 if (!status && elems_resp)
113 *elems_ret = le16_to_cpu(cmd->num_elem_resp); 114 *elems_resp = le16_to_cpu(cmd->num_elem_resp);
114 115
115 return status; 116 return status;
116} 117}
117 118
118/** 119/**
120 * ice_aq_query_sched_elems - query scheduler elements
121 * @hw: pointer to the hw struct
122 * @elems_req: number of elements to query
123 * @buf: pointer to buffer
124 * @buf_size: buffer size in bytes
125 * @elems_ret: returns total number of elements returned
126 * @cd: pointer to command details structure or NULL
127 *
128 * Query scheduling elements (0x0404)
129 */
130static enum ice_status
131ice_aq_query_sched_elems(struct ice_hw *hw, u16 elems_req,
132 struct ice_aqc_get_elem *buf, u16 buf_size,
133 u16 *elems_ret, struct ice_sq_cd *cd)
134{
135 return ice_aqc_send_sched_elem_cmd(hw, ice_aqc_opc_get_sched_elems,
136 elems_req, (void *)buf, buf_size,
137 elems_ret, cd);
138}
139
140/**
119 * ice_sched_query_elem - query element information from hw 141 * ice_sched_query_elem - query element information from hw
120 * @hw: pointer to the hw struct 142 * @hw: pointer to the hw struct
121 * @node_teid: node teid to be queried 143 * @node_teid: node teid to be queried
@@ -218,20 +240,9 @@ ice_aq_delete_sched_elems(struct ice_hw *hw, u16 grps_req,
218 struct ice_aqc_delete_elem *buf, u16 buf_size, 240 struct ice_aqc_delete_elem *buf, u16 buf_size,
219 u16 *grps_del, struct ice_sq_cd *cd) 241 u16 *grps_del, struct ice_sq_cd *cd)
220{ 242{
221 struct ice_aqc_add_move_delete_elem *cmd; 243 return ice_aqc_send_sched_elem_cmd(hw, ice_aqc_opc_delete_sched_elems,
222 struct ice_aq_desc desc; 244 grps_req, (void *)buf, buf_size,
223 enum ice_status status; 245 grps_del, cd);
224
225 cmd = &desc.params.add_move_delete_elem;
226 ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_delete_sched_elems);
227 desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
228 cmd->num_grps_req = cpu_to_le16(grps_req);
229
230 status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
231 if (!status && grps_del)
232 *grps_del = le16_to_cpu(cmd->num_grps_updated);
233
234 return status;
235} 246}
236 247
237/** 248/**
@@ -442,52 +453,9 @@ ice_aq_add_sched_elems(struct ice_hw *hw, u16 grps_req,
442 struct ice_aqc_add_elem *buf, u16 buf_size, 453 struct ice_aqc_add_elem *buf, u16 buf_size,
443 u16 *grps_added, struct ice_sq_cd *cd) 454 u16 *grps_added, struct ice_sq_cd *cd)
444{ 455{
445 struct ice_aqc_add_move_delete_elem *cmd; 456 return ice_aqc_send_sched_elem_cmd(hw, ice_aqc_opc_add_sched_elems,
446 struct ice_aq_desc desc; 457 grps_req, (void *)buf, buf_size,
447 enum ice_status status; 458 grps_added, cd);
448
449 cmd = &desc.params.add_move_delete_elem;
450 ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_add_sched_elems);
451 desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
452
453 cmd->num_grps_req = cpu_to_le16(grps_req);
454 status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
455 if (!status && grps_added)
456 *grps_added = le16_to_cpu(cmd->num_grps_updated);
457
458 return status;
459}
460
461/**
462 * ice_suspend_resume_elems - suspend/resume scheduler elements
463 * @hw: pointer to the hw struct
464 * @elems_req: number of elements to suspend
465 * @buf: pointer to buffer
466 * @buf_size: buffer size in bytes
467 * @elems_ret: returns total number of elements suspended
468 * @cd: pointer to command details structure or NULL
469 * @cmd_code: command code for suspend or resume
470 *
471 * suspend/resume scheduler elements
472 */
473static enum ice_status
474ice_suspend_resume_elems(struct ice_hw *hw, u16 elems_req,
475 struct ice_aqc_suspend_resume_elem *buf, u16 buf_size,
476 u16 *elems_ret, struct ice_sq_cd *cd,
477 enum ice_adminq_opc cmd_code)
478{
479 struct ice_aqc_get_cfg_elem *cmd;
480 struct ice_aq_desc desc;
481 enum ice_status status;
482
483 cmd = &desc.params.get_update_elem;
484 ice_fill_dflt_direct_cmd_desc(&desc, cmd_code);
485 cmd->num_elem_req = cpu_to_le16(elems_req);
486 desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
487 status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
488 if (!status && elems_ret)
489 *elems_ret = le16_to_cpu(cmd->num_elem_resp);
490 return status;
491} 459}
492 460
493/** 461/**
@@ -506,8 +474,9 @@ ice_aq_suspend_sched_elems(struct ice_hw *hw, u16 elems_req,
506 struct ice_aqc_suspend_resume_elem *buf, 474 struct ice_aqc_suspend_resume_elem *buf,
507 u16 buf_size, u16 *elems_ret, struct ice_sq_cd *cd) 475 u16 buf_size, u16 *elems_ret, struct ice_sq_cd *cd)
508{ 476{
509 return ice_suspend_resume_elems(hw, elems_req, buf, buf_size, elems_ret, 477 return ice_aqc_send_sched_elem_cmd(hw, ice_aqc_opc_suspend_sched_elems,
510 cd, ice_aqc_opc_suspend_sched_elems); 478 elems_req, (void *)buf, buf_size,
479 elems_ret, cd);
511} 480}
512 481
513/** 482/**
@@ -526,8 +495,9 @@ ice_aq_resume_sched_elems(struct ice_hw *hw, u16 elems_req,
526 struct ice_aqc_suspend_resume_elem *buf, 495 struct ice_aqc_suspend_resume_elem *buf,
527 u16 buf_size, u16 *elems_ret, struct ice_sq_cd *cd) 496 u16 buf_size, u16 *elems_ret, struct ice_sq_cd *cd)
528{ 497{
529 return ice_suspend_resume_elems(hw, elems_req, buf, buf_size, elems_ret, 498 return ice_aqc_send_sched_elem_cmd(hw, ice_aqc_opc_resume_sched_elems,
530 cd, ice_aqc_opc_resume_sched_elems); 499 elems_req, (void *)buf, buf_size,
500 elems_ret, cd);
531} 501}
532 502
533/** 503/**
@@ -591,23 +561,18 @@ ice_sched_suspend_resume_elems(struct ice_hw *hw, u8 num_nodes, u32 *node_teids,
591} 561}
592 562
593/** 563/**
594 * ice_sched_clear_tx_topo - clears the schduler tree nodes 564 * ice_sched_clear_agg - clears the agg related information
595 * @pi: port information structure 565 * @hw: pointer to the hardware structure
596 * 566 *
597 * This function removes all the nodes from HW as well as from SW DB. 567 * This function removes agg list and free up agg related memory
568 * previously allocated.
598 */ 569 */
599static void ice_sched_clear_tx_topo(struct ice_port_info *pi) 570void ice_sched_clear_agg(struct ice_hw *hw)
600{ 571{
601 struct ice_sched_agg_info *agg_info; 572 struct ice_sched_agg_info *agg_info;
602 struct ice_sched_agg_info *atmp; 573 struct ice_sched_agg_info *atmp;
603 struct ice_hw *hw;
604
605 if (!pi)
606 return;
607
608 hw = pi->hw;
609 574
610 list_for_each_entry_safe(agg_info, atmp, &pi->agg_list, list_entry) { 575 list_for_each_entry_safe(agg_info, atmp, &hw->agg_list, list_entry) {
611 struct ice_sched_agg_vsi_info *agg_vsi_info; 576 struct ice_sched_agg_vsi_info *agg_vsi_info;
612 struct ice_sched_agg_vsi_info *vtmp; 577 struct ice_sched_agg_vsi_info *vtmp;
613 578
@@ -616,8 +581,21 @@ static void ice_sched_clear_tx_topo(struct ice_port_info *pi)
616 list_del(&agg_vsi_info->list_entry); 581 list_del(&agg_vsi_info->list_entry);
617 devm_kfree(ice_hw_to_dev(hw), agg_vsi_info); 582 devm_kfree(ice_hw_to_dev(hw), agg_vsi_info);
618 } 583 }
584 list_del(&agg_info->list_entry);
585 devm_kfree(ice_hw_to_dev(hw), agg_info);
619 } 586 }
587}
620 588
589/**
590 * ice_sched_clear_tx_topo - clears the scheduler tree nodes
591 * @pi: port information structure
592 *
593 * This function removes all the nodes from HW as well as from SW DB.
594 */
595static void ice_sched_clear_tx_topo(struct ice_port_info *pi)
596{
597 if (!pi)
598 return;
621 if (pi->root) { 599 if (pi->root) {
622 ice_free_sched_node(pi, pi->root); 600 ice_free_sched_node(pi, pi->root);
623 pi->root = NULL; 601 pi->root = NULL;
@@ -1035,7 +1013,6 @@ enum ice_status ice_sched_init_port(struct ice_port_info *pi)
1035 /* initialize the port for handling the scheduler tree */ 1013 /* initialize the port for handling the scheduler tree */
1036 pi->port_state = ICE_SCHED_PORT_STATE_READY; 1014 pi->port_state = ICE_SCHED_PORT_STATE_READY;
1037 mutex_init(&pi->sched_lock); 1015 mutex_init(&pi->sched_lock);
1038 INIT_LIST_HEAD(&pi->agg_list);
1039 1016
1040err_init_port: 1017err_init_port:
1041 if (status && pi->root) { 1018 if (status && pi->root) {
@@ -1618,7 +1595,8 @@ ice_sched_rm_agg_vsi_info(struct ice_port_info *pi, u16 vsi_handle)
1618 struct ice_sched_agg_info *agg_info; 1595 struct ice_sched_agg_info *agg_info;
1619 struct ice_sched_agg_info *atmp; 1596 struct ice_sched_agg_info *atmp;
1620 1597
1621 list_for_each_entry_safe(agg_info, atmp, &pi->agg_list, list_entry) { 1598 list_for_each_entry_safe(agg_info, atmp, &pi->hw->agg_list,
1599 list_entry) {
1622 struct ice_sched_agg_vsi_info *agg_vsi_info; 1600 struct ice_sched_agg_vsi_info *agg_vsi_info;
1623 struct ice_sched_agg_vsi_info *vtmp; 1601 struct ice_sched_agg_vsi_info *vtmp;
1624 1602
diff --git a/drivers/net/ethernet/intel/ice/ice_sched.h b/drivers/net/ethernet/intel/ice/ice_sched.h
index da5b4c166da8..bee8221ad146 100644
--- a/drivers/net/ethernet/intel/ice/ice_sched.h
+++ b/drivers/net/ethernet/intel/ice/ice_sched.h
@@ -28,6 +28,8 @@ enum ice_status ice_sched_init_port(struct ice_port_info *pi);
28enum ice_status ice_sched_query_res_alloc(struct ice_hw *hw); 28enum ice_status ice_sched_query_res_alloc(struct ice_hw *hw);
29void ice_sched_clear_port(struct ice_port_info *pi); 29void ice_sched_clear_port(struct ice_port_info *pi);
30void ice_sched_cleanup_all(struct ice_hw *hw); 30void ice_sched_cleanup_all(struct ice_hw *hw);
31void ice_sched_clear_agg(struct ice_hw *hw);
32
31struct ice_sched_node * 33struct ice_sched_node *
32ice_sched_find_node_by_teid(struct ice_sched_node *start_node, u32 teid); 34ice_sched_find_node_by_teid(struct ice_sched_node *start_node, u32 teid);
33enum ice_status 35enum ice_status
diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.c b/drivers/net/ethernet/intel/ice/ice_sriov.c
index 533b989a23e1..d2db0d04e117 100644
--- a/drivers/net/ethernet/intel/ice/ice_sriov.c
+++ b/drivers/net/ethernet/intel/ice/ice_sriov.c
@@ -85,6 +85,12 @@ u32 ice_conv_link_speed_to_virtchnl(bool adv_link_support, u16 link_speed)
85 case ICE_AQ_LINK_SPEED_40GB: 85 case ICE_AQ_LINK_SPEED_40GB:
86 speed = ICE_LINK_SPEED_40000MBPS; 86 speed = ICE_LINK_SPEED_40000MBPS;
87 break; 87 break;
88 case ICE_AQ_LINK_SPEED_50GB:
89 speed = ICE_LINK_SPEED_50000MBPS;
90 break;
91 case ICE_AQ_LINK_SPEED_100GB:
92 speed = ICE_LINK_SPEED_100000MBPS;
93 break;
88 default: 94 default:
89 speed = ICE_LINK_SPEED_UNKNOWN; 95 speed = ICE_LINK_SPEED_UNKNOWN;
90 break; 96 break;
@@ -116,6 +122,9 @@ u32 ice_conv_link_speed_to_virtchnl(bool adv_link_support, u16 link_speed)
116 break; 122 break;
117 case ICE_AQ_LINK_SPEED_40GB: 123 case ICE_AQ_LINK_SPEED_40GB:
118 /* fall through */ 124 /* fall through */
125 case ICE_AQ_LINK_SPEED_50GB:
126 /* fall through */
127 case ICE_AQ_LINK_SPEED_100GB:
119 speed = (u32)VIRTCHNL_LINK_SPEED_40GB; 128 speed = (u32)VIRTCHNL_LINK_SPEED_40GB;
120 break; 129 break;
121 default: 130 default:
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
index 49fc38094185..2357fcac996b 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
@@ -1053,6 +1053,69 @@ static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget)
1053} 1053}
1054 1054
1055/** 1055/**
1056 * ice_buildreg_itr - build value for writing to the GLINT_DYN_CTL register
1057 * @itr_idx: interrupt throttling index
1058 * @reg_itr: interrupt throttling value adjusted based on ITR granularity
1059 */
1060static u32 ice_buildreg_itr(int itr_idx, u16 reg_itr)
1061{
1062 return GLINT_DYN_CTL_INTENA_M | GLINT_DYN_CTL_CLEARPBA_M |
1063 (itr_idx << GLINT_DYN_CTL_ITR_INDX_S) |
1064 (reg_itr << GLINT_DYN_CTL_INTERVAL_S);
1065}
1066
1067/**
1068 * ice_update_ena_itr - Update ITR and re-enable MSIX interrupt
1069 * @vsi: the VSI associated with the q_vector
1070 * @q_vector: q_vector for which ITR is being updated and interrupt enabled
1071 */
1072static void
1073ice_update_ena_itr(struct ice_vsi *vsi, struct ice_q_vector *q_vector)
1074{
1075 struct ice_hw *hw = &vsi->back->hw;
1076 struct ice_ring_container *rc;
1077 u32 itr_val;
1078
1079 /* This block of logic allows us to get away with only updating
1080 * one ITR value with each interrupt. The idea is to perform a
1081 * pseudo-lazy update with the following criteria.
1082 *
1083 * 1. Rx is given higher priority than Tx if both are in same state
1084 * 2. If we must reduce an ITR that is given highest priority.
1085 * 3. We then give priority to increasing ITR based on amount.
1086 */
1087 if (q_vector->rx.target_itr < q_vector->rx.current_itr) {
1088 rc = &q_vector->rx;
1089 /* Rx ITR needs to be reduced, this is highest priority */
1090 itr_val = ice_buildreg_itr(rc->itr_idx, rc->target_itr);
1091 rc->current_itr = rc->target_itr;
1092 } else if ((q_vector->tx.target_itr < q_vector->tx.current_itr) ||
1093 ((q_vector->rx.target_itr - q_vector->rx.current_itr) <
1094 (q_vector->tx.target_itr - q_vector->tx.current_itr))) {
1095 rc = &q_vector->tx;
1096 /* Tx ITR needs to be reduced, this is second priority
1097 * Tx ITR needs to be increased more than Rx, fourth priority
1098 */
1099 itr_val = ice_buildreg_itr(rc->itr_idx, rc->target_itr);
1100 rc->current_itr = rc->target_itr;
1101 } else if (q_vector->rx.current_itr != q_vector->rx.target_itr) {
1102 rc = &q_vector->rx;
1103 /* Rx ITR needs to be increased, third priority */
1104 itr_val = ice_buildreg_itr(rc->itr_idx, rc->target_itr);
1105 rc->current_itr = rc->target_itr;
1106 } else {
1107 /* Still have to re-enable the interrupts */
1108 itr_val = ice_buildreg_itr(ICE_ITR_NONE, 0);
1109 }
1110
1111 if (!test_bit(__ICE_DOWN, vsi->state)) {
1112 int vector = vsi->hw_base_vector + q_vector->v_idx;
1113
1114 wr32(hw, GLINT_DYN_CTL(vector), itr_val);
1115 }
1116}
1117
1118/**
1056 * ice_napi_poll - NAPI polling Rx/Tx cleanup routine 1119 * ice_napi_poll - NAPI polling Rx/Tx cleanup routine
1057 * @napi: napi struct with our devices info in it 1120 * @napi: napi struct with our devices info in it
1058 * @budget: amount of work driver is allowed to do this pass, in packets 1121 * @budget: amount of work driver is allowed to do this pass, in packets
@@ -1108,7 +1171,7 @@ int ice_napi_poll(struct napi_struct *napi, int budget)
1108 */ 1171 */
1109 if (likely(napi_complete_done(napi, work_done))) 1172 if (likely(napi_complete_done(napi, work_done)))
1110 if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags)) 1173 if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags))
1111 ice_irq_dynamic_ena(&vsi->back->hw, vsi, q_vector); 1174 ice_update_ena_itr(vsi, q_vector);
1112 1175
1113 return min(work_done, budget - 1); 1176 return min(work_done, budget - 1);
1114} 1177}
@@ -1402,6 +1465,12 @@ int ice_tx_csum(struct ice_tx_buf *first, struct ice_tx_offload_params *off)
1402 offset |= l4_len << ICE_TX_DESC_LEN_L4_LEN_S; 1465 offset |= l4_len << ICE_TX_DESC_LEN_L4_LEN_S;
1403 break; 1466 break;
1404 case IPPROTO_SCTP: 1467 case IPPROTO_SCTP:
1468 /* enable SCTP checksum offload */
1469 cmd |= ICE_TX_DESC_CMD_L4T_EOFT_SCTP;
1470 l4_len = sizeof(struct sctphdr) >> 2;
1471 offset |= l4_len << ICE_TX_DESC_LEN_L4_LEN_S;
1472 break;
1473
1405 default: 1474 default:
1406 if (first->tx_flags & ICE_TX_FLAGS_TSO) 1475 if (first->tx_flags & ICE_TX_FLAGS_TSO)
1407 return -1; 1476 return -1;
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h
index 75d0eaf6c9dd..fc358ea81816 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.h
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.h
@@ -116,16 +116,17 @@ enum ice_rx_dtype {
116/* indices into GLINT_ITR registers */ 116/* indices into GLINT_ITR registers */
117#define ICE_RX_ITR ICE_IDX_ITR0 117#define ICE_RX_ITR ICE_IDX_ITR0
118#define ICE_TX_ITR ICE_IDX_ITR1 118#define ICE_TX_ITR ICE_IDX_ITR1
119#define ICE_ITR_DYNAMIC 0x8000 /* use top bit as a flag */ 119#define ICE_ITR_8K 124
120#define ICE_ITR_8K 125
121#define ICE_ITR_20K 50 120#define ICE_ITR_20K 50
122#define ICE_DFLT_TX_ITR ICE_ITR_20K 121#define ICE_ITR_MAX 8160
123#define ICE_DFLT_RX_ITR ICE_ITR_20K 122#define ICE_DFLT_TX_ITR (ICE_ITR_20K | ICE_ITR_DYNAMIC)
124/* apply ITR granularity translation to program the register. itr_gran is either 123#define ICE_DFLT_RX_ITR (ICE_ITR_20K | ICE_ITR_DYNAMIC)
125 * 2 or 4 usecs so we need to divide by 2 first then shift by that value 124#define ICE_ITR_DYNAMIC 0x8000 /* used as flag for itr_setting */
126 */ 125#define ITR_IS_DYNAMIC(setting) (!!((setting) & ICE_ITR_DYNAMIC))
127#define ITR_TO_REG(val, itr_gran) (((val) & ~ICE_ITR_DYNAMIC) >> \ 126#define ITR_TO_REG(setting) ((setting) & ~ICE_ITR_DYNAMIC)
128 ((itr_gran) / 2)) 127#define ICE_ITR_GRAN_S 1 /* Assume ITR granularity is 2us */
128#define ICE_ITR_MASK 0x1FFE /* ITR register value alignment mask */
129#define ITR_REG_ALIGN(setting) __ALIGN_MASK(setting, ~ICE_ITR_MASK)
129 130
130#define ICE_DFLT_INTRL 0 131#define ICE_DFLT_INTRL 0
131 132
@@ -180,13 +181,20 @@ enum ice_latency_range {
180}; 181};
181 182
182struct ice_ring_container { 183struct ice_ring_container {
183 /* array of pointers to rings */ 184 /* head of linked-list of rings */
184 struct ice_ring *ring; 185 struct ice_ring *ring;
186 unsigned long next_update; /* jiffies value of next queue update */
185 unsigned int total_bytes; /* total bytes processed this int */ 187 unsigned int total_bytes; /* total bytes processed this int */
186 unsigned int total_pkts; /* total packets processed this int */ 188 unsigned int total_pkts; /* total packets processed this int */
187 enum ice_latency_range latency_range; 189 enum ice_latency_range latency_range;
188 int itr_idx; /* index in the interrupt vector */ 190 int itr_idx; /* index in the interrupt vector */
189 u16 itr; 191 u16 target_itr; /* value in usecs divided by the hw->itr_gran */
192 u16 current_itr; /* value in usecs divided by the hw->itr_gran */
193 /* high bit set means dynamic ITR, rest is used to store user
194 * readable ITR value in usecs and must be converted before programming
195 * to a register.
196 */
197 u16 itr_setting;
190}; 198};
191 199
192/* iterator for handling rings in ring container */ 200/* iterator for handling rings in ring container */
diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h
index 0ea428104215..17086d5b5c33 100644
--- a/drivers/net/ethernet/intel/ice/ice_type.h
+++ b/drivers/net/ethernet/intel/ice/ice_type.h
@@ -90,6 +90,7 @@ enum ice_vsi_type {
90struct ice_link_status { 90struct ice_link_status {
91 /* Refer to ice_aq_phy_type for bits definition */ 91 /* Refer to ice_aq_phy_type for bits definition */
92 u64 phy_type_low; 92 u64 phy_type_low;
93 u64 phy_type_high;
93 u16 max_frame_size; 94 u16 max_frame_size;
94 u16 link_speed; 95 u16 link_speed;
95 u16 req_speeds; 96 u16 req_speeds;
@@ -118,6 +119,7 @@ struct ice_phy_info {
118 struct ice_link_status link_info; 119 struct ice_link_status link_info;
119 struct ice_link_status link_info_old; 120 struct ice_link_status link_info_old;
120 u64 phy_type_low; 121 u64 phy_type_low;
122 u64 phy_type_high;
121 enum ice_media_type media_type; 123 enum ice_media_type media_type;
122 u8 get_link_info; 124 u8 get_link_info;
123}; 125};
@@ -272,7 +274,6 @@ struct ice_port_info {
272 struct ice_mac_info mac; 274 struct ice_mac_info mac;
273 struct ice_phy_info phy; 275 struct ice_phy_info phy;
274 struct mutex sched_lock; /* protect access to TXSched tree */ 276 struct mutex sched_lock; /* protect access to TXSched tree */
275 struct list_head agg_list; /* lists all aggregator */
276 u8 lport; 277 u8 lport;
277#define ICE_LPORT_MASK 0xff 278#define ICE_LPORT_MASK 0xff
278 u8 is_vf; 279 u8 is_vf;
@@ -326,6 +327,7 @@ struct ice_hw {
326 u8 max_cgds; 327 u8 max_cgds;
327 u8 sw_entry_point_layer; 328 u8 sw_entry_point_layer;
328 u16 max_children[ICE_AQC_TOPO_MAX_LEVEL_NUM]; 329 u16 max_children[ICE_AQC_TOPO_MAX_LEVEL_NUM];
330 struct list_head agg_list; /* lists all aggregator */
329 331
330 struct ice_vsi_ctx *vsi_ctx[ICE_MAX_VSI]; 332 struct ice_vsi_ctx *vsi_ctx[ICE_MAX_VSI];
331 u8 evb_veb; /* true for VEB, false for VEPA */ 333 u8 evb_veb; /* true for VEB, false for VEPA */
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
index 05ff4f910649..80b50e67cbef 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
@@ -224,13 +224,15 @@ void ice_free_vfs(struct ice_pf *pf)
224 224
225 /* Avoid wait time by stopping all VFs at the same time */ 225 /* Avoid wait time by stopping all VFs at the same time */
226 for (i = 0; i < pf->num_alloc_vfs; i++) { 226 for (i = 0; i < pf->num_alloc_vfs; i++) {
227 struct ice_vsi *vsi;
228
227 if (!test_bit(ICE_VF_STATE_ENA, pf->vf[i].vf_states)) 229 if (!test_bit(ICE_VF_STATE_ENA, pf->vf[i].vf_states))
228 continue; 230 continue;
229 231
232 vsi = pf->vsi[pf->vf[i].lan_vsi_idx];
230 /* stop rings without wait time */ 233 /* stop rings without wait time */
231 ice_vsi_stop_tx_rings(pf->vsi[pf->vf[i].lan_vsi_idx], 234 ice_vsi_stop_lan_tx_rings(vsi, ICE_NO_RESET, i);
232 ICE_NO_RESET, i); 235 ice_vsi_stop_rx_rings(vsi);
233 ice_vsi_stop_rx_rings(pf->vsi[pf->vf[i].lan_vsi_idx]);
234 236
235 clear_bit(ICE_VF_STATE_ENA, pf->vf[i].vf_states); 237 clear_bit(ICE_VF_STATE_ENA, pf->vf[i].vf_states);
236 } 238 }
@@ -831,6 +833,7 @@ static bool ice_reset_vf(struct ice_vf *vf, bool is_vflr)
831{ 833{
832 struct ice_pf *pf = vf->pf; 834 struct ice_pf *pf = vf->pf;
833 struct ice_hw *hw = &pf->hw; 835 struct ice_hw *hw = &pf->hw;
836 struct ice_vsi *vsi;
834 bool rsd = false; 837 bool rsd = false;
835 u32 reg; 838 u32 reg;
836 int i; 839 int i;
@@ -843,17 +846,18 @@ static bool ice_reset_vf(struct ice_vf *vf, bool is_vflr)
843 846
844 ice_trigger_vf_reset(vf, is_vflr); 847 ice_trigger_vf_reset(vf, is_vflr);
845 848
849 vsi = pf->vsi[vf->lan_vsi_idx];
850
846 if (test_bit(ICE_VF_STATE_ENA, vf->vf_states)) { 851 if (test_bit(ICE_VF_STATE_ENA, vf->vf_states)) {
847 ice_vsi_stop_tx_rings(pf->vsi[vf->lan_vsi_idx], ICE_VF_RESET, 852 ice_vsi_stop_lan_tx_rings(vsi, ICE_VF_RESET, vf->vf_id);
848 vf->vf_id); 853 ice_vsi_stop_rx_rings(vsi);
849 ice_vsi_stop_rx_rings(pf->vsi[vf->lan_vsi_idx]);
850 clear_bit(ICE_VF_STATE_ENA, vf->vf_states); 854 clear_bit(ICE_VF_STATE_ENA, vf->vf_states);
851 } else { 855 } else {
852 /* Call Disable LAN Tx queue AQ call even when queues are not 856 /* Call Disable LAN Tx queue AQ call even when queues are not
853 * enabled. This is needed for successful completiom of VFR 857 * enabled. This is needed for successful completiom of VFR
854 */ 858 */
855 ice_dis_vsi_txq(pf->vsi[vf->lan_vsi_idx]->port_info, 0, 859 ice_dis_vsi_txq(vsi->port_info, 0, NULL, NULL, ICE_VF_RESET,
856 NULL, NULL, ICE_VF_RESET, vf->vf_id, NULL); 860 vf->vf_id, NULL);
857 } 861 }
858 862
859 /* poll VPGEN_VFRSTAT reg to make sure 863 /* poll VPGEN_VFRSTAT reg to make sure
@@ -1614,7 +1618,7 @@ static int ice_vc_dis_qs_msg(struct ice_vf *vf, u8 *msg)
1614 goto error_param; 1618 goto error_param;
1615 } 1619 }
1616 1620
1617 if (ice_vsi_stop_tx_rings(vsi, ICE_NO_RESET, vf->vf_id)) { 1621 if (ice_vsi_stop_lan_tx_rings(vsi, ICE_NO_RESET, vf->vf_id)) {
1618 dev_err(&vsi->back->pdev->dev, 1622 dev_err(&vsi->back->pdev->dev,
1619 "Failed to stop tx rings on VSI %d\n", 1623 "Failed to stop tx rings on VSI %d\n",
1620 vsi->vsi_num); 1624 vsi->vsi_num);
@@ -1784,7 +1788,7 @@ static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg)
1784 vsi->num_txq = qci->num_queue_pairs; 1788 vsi->num_txq = qci->num_queue_pairs;
1785 vsi->num_rxq = qci->num_queue_pairs; 1789 vsi->num_rxq = qci->num_queue_pairs;
1786 1790
1787 if (!ice_vsi_cfg_txqs(vsi) && !ice_vsi_cfg_rxqs(vsi)) 1791 if (!ice_vsi_cfg_lan_txqs(vsi) && !ice_vsi_cfg_rxqs(vsi))
1788 aq_ret = 0; 1792 aq_ret = 0;
1789 else 1793 else
1790 aq_ret = ICE_ERR_PARAM; 1794 aq_ret = ICE_ERR_PARAM;
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index 7137e7f9c7f3..dfa357b1a9d6 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -2486,7 +2486,8 @@ static int igb_set_features(struct net_device *netdev,
2486static int igb_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], 2486static int igb_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
2487 struct net_device *dev, 2487 struct net_device *dev,
2488 const unsigned char *addr, u16 vid, 2488 const unsigned char *addr, u16 vid,
2489 u16 flags) 2489 u16 flags,
2490 struct netlink_ext_ack *extack)
2490{ 2491{
2491 /* guarantee we can provide a unique filter for the unicast address */ 2492 /* guarantee we can provide a unique filter for the unicast address */
2492 if (is_unicast_ether_addr(addr) || is_link_local_ether_addr(addr)) { 2493 if (is_unicast_ether_addr(addr) || is_link_local_ether_addr(addr)) {
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index daff8183534b..b53087a980ef 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -9910,7 +9910,8 @@ static void ixgbe_del_udp_tunnel_port(struct net_device *dev,
9910static int ixgbe_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], 9910static int ixgbe_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
9911 struct net_device *dev, 9911 struct net_device *dev,
9912 const unsigned char *addr, u16 vid, 9912 const unsigned char *addr, u16 vid,
9913 u16 flags) 9913 u16 flags,
9914 struct netlink_ext_ack *extack)
9914{ 9915{
9915 /* guarantee we can provide a unique filter for the unicast address */ 9916 /* guarantee we can provide a unique filter for the unicast address */
9916 if (is_unicast_ether_addr(addr) || is_link_local_ether_addr(addr)) { 9917 if (is_unicast_ether_addr(addr) || is_link_local_ether_addr(addr)) {
diff --git a/drivers/net/ethernet/jme.c b/drivers/net/ethernet/jme.c
index a5ab6f3403ae..763ee5281177 100644
--- a/drivers/net/ethernet/jme.c
+++ b/drivers/net/ethernet/jme.c
@@ -2034,10 +2034,9 @@ static void jme_drop_tx_map(struct jme_adapter *jme, int startidx, int count)
2034 ctxbi->len, 2034 ctxbi->len,
2035 PCI_DMA_TODEVICE); 2035 PCI_DMA_TODEVICE);
2036 2036
2037 ctxbi->mapping = 0; 2037 ctxbi->mapping = 0;
2038 ctxbi->len = 0; 2038 ctxbi->len = 0;
2039 } 2039 }
2040
2041} 2040}
2042 2041
2043static int 2042static int
diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c
index e65bc3c95630..c19e74e6ac94 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
@@ -3274,7 +3274,7 @@ int mlx4_set_vf_link_state(struct mlx4_dev *dev, int port, int vf, int link_stat
3274 mlx4_warn(dev, "unknown value for link_state %02x on slave %d port %d\n", 3274 mlx4_warn(dev, "unknown value for link_state %02x on slave %d port %d\n",
3275 link_state, slave, port); 3275 link_state, slave, port);
3276 return -EINVAL; 3276 return -EINVAL;
3277 }; 3277 }
3278 s_info = &priv->mfunc.master.vf_admin[slave].vport[port]; 3278 s_info = &priv->mfunc.master.vf_admin[slave].vport[port];
3279 s_info->link_state = link_state; 3279 s_info->link_state = link_state;
3280 3280
diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c
index 2df92dbd38e1..4953c852c247 100644
--- a/drivers/net/ethernet/mellanox/mlx4/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/eq.c
@@ -820,7 +820,7 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq)
820 !!(eqe->owner & 0x80) ^ 820 !!(eqe->owner & 0x80) ^
821 !!(eq->cons_index & eq->nent) ? "HW" : "SW"); 821 !!(eq->cons_index & eq->nent) ? "HW" : "SW");
822 break; 822 break;
823 }; 823 }
824 824
825 ++eq->cons_index; 825 ++eq->cons_index;
826 eqes_found = 1; 826 eqes_found = 1;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 9de9abacf7f6..6bb2a860b15b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -22,7 +22,7 @@ mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
22# 22#
23mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \ 23mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \
24 en_tx.o en_rx.o en_dim.o en_txrx.o en/xdp.o en_stats.o \ 24 en_tx.o en_rx.o en_dim.o en_txrx.o en/xdp.o en_stats.o \
25 en_selftest.o en/port.o en/monitor_stats.o 25 en_selftest.o en/port.o en/monitor_stats.o en/reporter_tx.o
26 26
27# 27#
28# Netdev extra 28# Netdev extra
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 8fa8fdd30b85..27e276c9bf84 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -388,10 +388,7 @@ struct mlx5e_txqsq {
388 struct mlx5e_channel *channel; 388 struct mlx5e_channel *channel;
389 int txq_ix; 389 int txq_ix;
390 u32 rate_limit; 390 u32 rate_limit;
391 struct mlx5e_txqsq_recover { 391 struct work_struct recover_work;
392 struct work_struct recover_work;
393 u64 last_recover;
394 } recover;
395} ____cacheline_aligned_in_smp; 392} ____cacheline_aligned_in_smp;
396 393
397struct mlx5e_dma_info { 394struct mlx5e_dma_info {
@@ -682,6 +679,13 @@ struct mlx5e_rss_params {
682 u8 hfunc; 679 u8 hfunc;
683}; 680};
684 681
682struct mlx5e_modify_sq_param {
683 int curr_state;
684 int next_state;
685 int rl_update;
686 int rl_index;
687};
688
685struct mlx5e_priv { 689struct mlx5e_priv {
686 /* priv data path fields - start */ 690 /* priv data path fields - start */
687 struct mlx5e_txqsq *txq2sq[MLX5E_MAX_NUM_CHANNELS * MLX5E_MAX_NUM_TC]; 691 struct mlx5e_txqsq *txq2sq[MLX5E_MAX_NUM_CHANNELS * MLX5E_MAX_NUM_TC];
@@ -737,6 +741,7 @@ struct mlx5e_priv {
737#ifdef CONFIG_MLX5_EN_TLS 741#ifdef CONFIG_MLX5_EN_TLS
738 struct mlx5e_tls *tls; 742 struct mlx5e_tls *tls;
739#endif 743#endif
744 struct devlink_health_reporter *tx_reporter;
740}; 745};
741 746
742struct mlx5e_profile { 747struct mlx5e_profile {
@@ -866,6 +871,11 @@ void mlx5e_set_rq_type(struct mlx5_core_dev *mdev, struct mlx5e_params *params);
866void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev, 871void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev,
867 struct mlx5e_params *params); 872 struct mlx5e_params *params);
868 873
874int mlx5e_modify_sq(struct mlx5_core_dev *mdev, u32 sqn,
875 struct mlx5e_modify_sq_param *p);
876void mlx5e_activate_txqsq(struct mlx5e_txqsq *sq);
877void mlx5e_tx_disable_queue(struct netdev_queue *txq);
878
869static inline bool mlx5e_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev) 879static inline bool mlx5e_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev)
870{ 880{
871 return (MLX5_CAP_ETH(mdev, tunnel_stateless_gre) && 881 return (MLX5_CAP_ETH(mdev, tunnel_stateless_gre) &&
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter.h b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter.h
new file mode 100644
index 000000000000..2335c5b48820
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter.h
@@ -0,0 +1,15 @@
1/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
2/* Copyright (c) 2018 Mellanox Technologies. */
3
4#ifndef __MLX5E_EN_REPORTER_H
5#define __MLX5E_EN_REPORTER_H
6
7#include <linux/mlx5/driver.h>
8#include "en.h"
9
10int mlx5e_tx_reporter_create(struct mlx5e_priv *priv);
11void mlx5e_tx_reporter_destroy(struct mlx5e_priv *priv);
12void mlx5e_tx_reporter_err_cqe(struct mlx5e_txqsq *sq);
13void mlx5e_tx_reporter_timeout(struct mlx5e_txqsq *sq);
14
15#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
new file mode 100644
index 000000000000..d9675afbb924
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
@@ -0,0 +1,356 @@
1/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
2/* Copyright (c) 2018 Mellanox Technologies. */
3
4#include <net/devlink.h>
5#include "reporter.h"
6#include "lib/eq.h"
7
8#define MLX5E_TX_REPORTER_PER_SQ_MAX_LEN 256
9
10struct mlx5e_tx_err_ctx {
11 int (*recover)(struct mlx5e_txqsq *sq);
12 struct mlx5e_txqsq *sq;
13};
14
15static int mlx5e_wait_for_sq_flush(struct mlx5e_txqsq *sq)
16{
17 unsigned long exp_time = jiffies + msecs_to_jiffies(2000);
18
19 while (time_before(jiffies, exp_time)) {
20 if (sq->cc == sq->pc)
21 return 0;
22
23 msleep(20);
24 }
25
26 netdev_err(sq->channel->netdev,
27 "Wait for SQ 0x%x flush timeout (sq cc = 0x%x, sq pc = 0x%x)\n",
28 sq->sqn, sq->cc, sq->pc);
29
30 return -ETIMEDOUT;
31}
32
33static void mlx5e_reset_txqsq_cc_pc(struct mlx5e_txqsq *sq)
34{
35 WARN_ONCE(sq->cc != sq->pc,
36 "SQ 0x%x: cc (0x%x) != pc (0x%x)\n",
37 sq->sqn, sq->cc, sq->pc);
38 sq->cc = 0;
39 sq->dma_fifo_cc = 0;
40 sq->pc = 0;
41}
42
43static int mlx5e_sq_to_ready(struct mlx5e_txqsq *sq, int curr_state)
44{
45 struct mlx5_core_dev *mdev = sq->channel->mdev;
46 struct net_device *dev = sq->channel->netdev;
47 struct mlx5e_modify_sq_param msp = {0};
48 int err;
49
50 msp.curr_state = curr_state;
51 msp.next_state = MLX5_SQC_STATE_RST;
52
53 err = mlx5e_modify_sq(mdev, sq->sqn, &msp);
54 if (err) {
55 netdev_err(dev, "Failed to move sq 0x%x to reset\n", sq->sqn);
56 return err;
57 }
58
59 memset(&msp, 0, sizeof(msp));
60 msp.curr_state = MLX5_SQC_STATE_RST;
61 msp.next_state = MLX5_SQC_STATE_RDY;
62
63 err = mlx5e_modify_sq(mdev, sq->sqn, &msp);
64 if (err) {
65 netdev_err(dev, "Failed to move sq 0x%x to ready\n", sq->sqn);
66 return err;
67 }
68
69 return 0;
70}
71
72static int mlx5e_tx_reporter_err_cqe_recover(struct mlx5e_txqsq *sq)
73{
74 struct mlx5_core_dev *mdev = sq->channel->mdev;
75 struct net_device *dev = sq->channel->netdev;
76 u8 state;
77 int err;
78
79 if (!test_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state))
80 return 0;
81
82 err = mlx5_core_query_sq_state(mdev, sq->sqn, &state);
83 if (err) {
84 netdev_err(dev, "Failed to query SQ 0x%x state. err = %d\n",
85 sq->sqn, err);
86 return err;
87 }
88
89 if (state != MLX5_RQC_STATE_ERR) {
90 netdev_err(dev, "SQ 0x%x not in ERROR state\n", sq->sqn);
91 return -EINVAL;
92 }
93
94 mlx5e_tx_disable_queue(sq->txq);
95
96 err = mlx5e_wait_for_sq_flush(sq);
97 if (err)
98 return err;
99
100 /* At this point, no new packets will arrive from the stack as TXQ is
101 * marked with QUEUE_STATE_DRV_XOFF. In addition, NAPI cleared all
102 * pending WQEs. SQ can safely reset the SQ.
103 */
104
105 err = mlx5e_sq_to_ready(sq, state);
106 if (err)
107 return err;
108
109 mlx5e_reset_txqsq_cc_pc(sq);
110 sq->stats->recover++;
111 mlx5e_activate_txqsq(sq);
112
113 return 0;
114}
115
116void mlx5e_tx_reporter_err_cqe(struct mlx5e_txqsq *sq)
117{
118 char err_str[MLX5E_TX_REPORTER_PER_SQ_MAX_LEN];
119 struct mlx5e_tx_err_ctx err_ctx = {0};
120
121 err_ctx.sq = sq;
122 err_ctx.recover = mlx5e_tx_reporter_err_cqe_recover;
123 sprintf(err_str, "ERR CQE on SQ: 0x%x", sq->sqn);
124
125 devlink_health_report(sq->channel->priv->tx_reporter, err_str,
126 &err_ctx);
127}
128
129static int mlx5e_tx_reporter_timeout_recover(struct mlx5e_txqsq *sq)
130{
131 struct mlx5_eq_comp *eq = sq->cq.mcq.eq;
132 u32 eqe_count;
133
134 netdev_err(sq->channel->netdev, "EQ 0x%x: Cons = 0x%x, irqn = 0x%x\n",
135 eq->core.eqn, eq->core.cons_index, eq->core.irqn);
136
137 eqe_count = mlx5_eq_poll_irq_disabled(eq);
138 if (!eqe_count) {
139 clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
140 return 1;
141 }
142
143 netdev_err(sq->channel->netdev, "Recover %d eqes on EQ 0x%x\n",
144 eqe_count, eq->core.eqn);
145 sq->channel->stats->eq_rearm++;
146 return 0;
147}
148
149void mlx5e_tx_reporter_timeout(struct mlx5e_txqsq *sq)
150{
151 struct mlx5e_tx_err_ctx err_ctx;
152 char err_str[MLX5E_TX_REPORTER_PER_SQ_MAX_LEN];
153
154 err_ctx.sq = sq;
155 err_ctx.recover = mlx5e_tx_reporter_timeout_recover;
156 sprintf(err_str,
157 "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x, usecs since last trans: %u\n",
158 sq->channel->ix, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc,
159 jiffies_to_usecs(jiffies - sq->txq->trans_start));
160 devlink_health_report(sq->channel->priv->tx_reporter, err_str,
161 &err_ctx);
162}
163
164/* state lock cannot be grabbed within this function.
165 * It can cause a dead lock or a read-after-free.
166 */
167int mlx5e_tx_reporter_recover_from_ctx(struct mlx5e_tx_err_ctx *err_ctx)
168{
169 return err_ctx->recover(err_ctx->sq);
170}
171
172static int mlx5e_tx_reporter_recover_all(struct mlx5e_priv *priv)
173{
174 int err;
175
176 mutex_lock(&priv->state_lock);
177 mlx5e_close_locked(priv->netdev);
178 err = mlx5e_open_locked(priv->netdev);
179 mutex_unlock(&priv->state_lock);
180
181 return err;
182}
183
184static int mlx5e_tx_reporter_recover(struct devlink_health_reporter *reporter,
185 void *context)
186{
187 struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
188 struct mlx5e_tx_err_ctx *err_ctx = context;
189
190 return err_ctx ? mlx5e_tx_reporter_recover_from_ctx(err_ctx) :
191 mlx5e_tx_reporter_recover_all(priv);
192}
193
194static int
195mlx5e_tx_reporter_build_diagnose_output(struct devlink_health_buffer *buffer,
196 u32 sqn, u8 state, u8 stopped)
197{
198 int err, i;
199 int nest = 0;
200 char name[20];
201
202 err = devlink_health_buffer_nest_start(buffer,
203 DEVLINK_ATTR_HEALTH_BUFFER_OBJECT);
204 if (err)
205 goto buffer_error;
206 nest++;
207
208 err = devlink_health_buffer_nest_start(buffer,
209 DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_PAIR);
210 if (err)
211 goto buffer_error;
212 nest++;
213
214 sprintf(name, "SQ 0x%x", sqn);
215 err = devlink_health_buffer_put_object_name(buffer, name);
216 if (err)
217 goto buffer_error;
218
219 err = devlink_health_buffer_nest_start(buffer,
220 DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE);
221 if (err)
222 goto buffer_error;
223 nest++;
224
225 err = devlink_health_buffer_nest_start(buffer,
226 DEVLINK_ATTR_HEALTH_BUFFER_OBJECT);
227 if (err)
228 goto buffer_error;
229 nest++;
230
231 err = devlink_health_buffer_nest_start(buffer,
232 DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_PAIR);
233 if (err)
234 goto buffer_error;
235 nest++;
236
237 err = devlink_health_buffer_put_object_name(buffer, "HW state");
238 if (err)
239 goto buffer_error;
240
241 err = devlink_health_buffer_nest_start(buffer,
242 DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE);
243 if (err)
244 goto buffer_error;
245 nest++;
246
247 err = devlink_health_buffer_put_value_u8(buffer, state);
248 if (err)
249 goto buffer_error;
250
251 devlink_health_buffer_nest_end(buffer); /* DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE */
252 nest--;
253
254 devlink_health_buffer_nest_end(buffer); /* DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_PAIR */
255 nest--;
256
257 err = devlink_health_buffer_nest_start(buffer,
258 DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_PAIR);
259 if (err)
260 goto buffer_error;
261 nest++;
262
263 err = devlink_health_buffer_put_object_name(buffer, "stopped");
264 if (err)
265 goto buffer_error;
266
267 err = devlink_health_buffer_nest_start(buffer,
268 DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE);
269 if (err)
270 goto buffer_error;
271 nest++;
272
273 err = devlink_health_buffer_put_value_u8(buffer, stopped);
274 if (err)
275 goto buffer_error;
276
277 for (i = 0; i < nest; i++)
278 devlink_health_buffer_nest_end(buffer);
279
280 return 0;
281
282buffer_error:
283 for (i = 0; i < nest; i++)
284 devlink_health_buffer_nest_cancel(buffer);
285 return err;
286}
287
288static int mlx5e_tx_reporter_diagnose(struct devlink_health_reporter *reporter,
289 struct devlink_health_buffer **buffers_array,
290 unsigned int buffer_size,
291 unsigned int num_buffers)
292{
293 struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
294 unsigned int buff = 0;
295 int i = 0, err = 0;
296
297 if (buffer_size < MLX5E_TX_REPORTER_PER_SQ_MAX_LEN)
298 return -ENOMEM;
299
300 mutex_lock(&priv->state_lock);
301
302 if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
303 mutex_unlock(&priv->state_lock);
304 return 0;
305 }
306
307 while (i < priv->channels.num * priv->channels.params.num_tc) {
308 struct mlx5e_txqsq *sq = priv->txq2sq[i];
309 u8 state;
310
311 err = mlx5_core_query_sq_state(priv->mdev, sq->sqn, &state);
312 if (err)
313 break;
314
315 err = mlx5e_tx_reporter_build_diagnose_output(buffers_array[buff],
316 sq->sqn, state,
317 netif_xmit_stopped(sq->txq));
318 if (err) {
319 if (++buff == num_buffers)
320 break;
321 } else {
322 i++;
323 }
324 }
325
326 mutex_unlock(&priv->state_lock);
327 return err;
328}
329
330static const struct devlink_health_reporter_ops mlx5_tx_reporter_ops = {
331 .name = "TX",
332 .recover = mlx5e_tx_reporter_recover,
333 .diagnose_size = MLX5E_MAX_NUM_CHANNELS * MLX5E_MAX_NUM_TC *
334 MLX5E_TX_REPORTER_PER_SQ_MAX_LEN,
335 .diagnose = mlx5e_tx_reporter_diagnose,
336 .dump_size = 0,
337 .dump = NULL,
338};
339
340#define MLX5_REPORTER_TX_GRACEFUL_PERIOD 500
341int mlx5e_tx_reporter_create(struct mlx5e_priv *priv)
342{
343 struct mlx5_core_dev *mdev = priv->mdev;
344 struct devlink *devlink = priv_to_devlink(mdev);
345
346 priv->tx_reporter =
347 devlink_health_reporter_create(devlink, &mlx5_tx_reporter_ops,
348 MLX5_REPORTER_TX_GRACEFUL_PERIOD,
349 true, priv);
350 return PTR_ERR_OR_ZERO(priv->tx_reporter);
351}
352
353void mlx5e_tx_reporter_destroy(struct mlx5e_priv *priv)
354{
355 devlink_health_reporter_destroy(priv->tx_reporter);
356}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 8cfd2ec7c0a2..dee0c8f3d4e9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -51,6 +51,7 @@
51#include "en/xdp.h" 51#include "en/xdp.h"
52#include "lib/eq.h" 52#include "lib/eq.h"
53#include "en/monitor_stats.h" 53#include "en/monitor_stats.h"
54#include "en/reporter.h"
54 55
55struct mlx5e_rq_param { 56struct mlx5e_rq_param {
56 u32 rqc[MLX5_ST_SZ_DW(rqc)]; 57 u32 rqc[MLX5_ST_SZ_DW(rqc)];
@@ -1160,7 +1161,7 @@ static int mlx5e_alloc_txqsq_db(struct mlx5e_txqsq *sq, int numa)
1160 return 0; 1161 return 0;
1161} 1162}
1162 1163
1163static void mlx5e_sq_recover(struct work_struct *work); 1164static void mlx5e_tx_err_cqe_work(struct work_struct *recover_work);
1164static int mlx5e_alloc_txqsq(struct mlx5e_channel *c, 1165static int mlx5e_alloc_txqsq(struct mlx5e_channel *c,
1165 int txq_ix, 1166 int txq_ix,
1166 struct mlx5e_params *params, 1167 struct mlx5e_params *params,
@@ -1182,7 +1183,7 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c,
1182 sq->uar_map = mdev->mlx5e_res.bfreg.map; 1183 sq->uar_map = mdev->mlx5e_res.bfreg.map;
1183 sq->min_inline_mode = params->tx_min_inline_mode; 1184 sq->min_inline_mode = params->tx_min_inline_mode;
1184 sq->stats = &c->priv->channel_stats[c->ix].sq[tc]; 1185 sq->stats = &c->priv->channel_stats[c->ix].sq[tc];
1185 INIT_WORK(&sq->recover.recover_work, mlx5e_sq_recover); 1186 INIT_WORK(&sq->recover_work, mlx5e_tx_err_cqe_work);
1186 if (MLX5_IPSEC_DEV(c->priv->mdev)) 1187 if (MLX5_IPSEC_DEV(c->priv->mdev))
1187 set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state); 1188 set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state);
1188 if (mlx5_accel_is_tls_device(c->priv->mdev)) 1189 if (mlx5_accel_is_tls_device(c->priv->mdev))
@@ -1270,15 +1271,8 @@ static int mlx5e_create_sq(struct mlx5_core_dev *mdev,
1270 return err; 1271 return err;
1271} 1272}
1272 1273
1273struct mlx5e_modify_sq_param { 1274int mlx5e_modify_sq(struct mlx5_core_dev *mdev, u32 sqn,
1274 int curr_state; 1275 struct mlx5e_modify_sq_param *p)
1275 int next_state;
1276 bool rl_update;
1277 int rl_index;
1278};
1279
1280static int mlx5e_modify_sq(struct mlx5_core_dev *mdev, u32 sqn,
1281 struct mlx5e_modify_sq_param *p)
1282{ 1276{
1283 void *in; 1277 void *in;
1284 void *sqc; 1278 void *sqc;
@@ -1376,17 +1370,7 @@ err_free_txqsq:
1376 return err; 1370 return err;
1377} 1371}
1378 1372
1379static void mlx5e_reset_txqsq_cc_pc(struct mlx5e_txqsq *sq) 1373void mlx5e_activate_txqsq(struct mlx5e_txqsq *sq)
1380{
1381 WARN_ONCE(sq->cc != sq->pc,
1382 "SQ 0x%x: cc (0x%x) != pc (0x%x)\n",
1383 sq->sqn, sq->cc, sq->pc);
1384 sq->cc = 0;
1385 sq->dma_fifo_cc = 0;
1386 sq->pc = 0;
1387}
1388
1389static void mlx5e_activate_txqsq(struct mlx5e_txqsq *sq)
1390{ 1374{
1391 sq->txq = netdev_get_tx_queue(sq->channel->netdev, sq->txq_ix); 1375 sq->txq = netdev_get_tx_queue(sq->channel->netdev, sq->txq_ix);
1392 clear_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state); 1376 clear_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state);
@@ -1395,7 +1379,7 @@ static void mlx5e_activate_txqsq(struct mlx5e_txqsq *sq)
1395 netif_tx_start_queue(sq->txq); 1379 netif_tx_start_queue(sq->txq);
1396} 1380}
1397 1381
1398static inline void netif_tx_disable_queue(struct netdev_queue *txq) 1382void mlx5e_tx_disable_queue(struct netdev_queue *txq)
1399{ 1383{
1400 __netif_tx_lock_bh(txq); 1384 __netif_tx_lock_bh(txq);
1401 netif_tx_stop_queue(txq); 1385 netif_tx_stop_queue(txq);
@@ -1411,7 +1395,7 @@ static void mlx5e_deactivate_txqsq(struct mlx5e_txqsq *sq)
1411 /* prevent netif_tx_wake_queue */ 1395 /* prevent netif_tx_wake_queue */
1412 napi_synchronize(&c->napi); 1396 napi_synchronize(&c->napi);
1413 1397
1414 netif_tx_disable_queue(sq->txq); 1398 mlx5e_tx_disable_queue(sq->txq);
1415 1399
1416 /* last doorbell out, godspeed .. */ 1400 /* last doorbell out, godspeed .. */
1417 if (mlx5e_wqc_has_room_for(wq, sq->cc, sq->pc, 1)) { 1401 if (mlx5e_wqc_has_room_for(wq, sq->cc, sq->pc, 1)) {
@@ -1431,6 +1415,7 @@ static void mlx5e_close_txqsq(struct mlx5e_txqsq *sq)
1431 struct mlx5_rate_limit rl = {0}; 1415 struct mlx5_rate_limit rl = {0};
1432 1416
1433 cancel_work_sync(&sq->dim.work); 1417 cancel_work_sync(&sq->dim.work);
1418 cancel_work_sync(&sq->recover_work);
1434 mlx5e_destroy_sq(mdev, sq->sqn); 1419 mlx5e_destroy_sq(mdev, sq->sqn);
1435 if (sq->rate_limit) { 1420 if (sq->rate_limit) {
1436 rl.rate = sq->rate_limit; 1421 rl.rate = sq->rate_limit;
@@ -1440,105 +1425,15 @@ static void mlx5e_close_txqsq(struct mlx5e_txqsq *sq)
1440 mlx5e_free_txqsq(sq); 1425 mlx5e_free_txqsq(sq);
1441} 1426}
1442 1427
1443static int mlx5e_wait_for_sq_flush(struct mlx5e_txqsq *sq) 1428static void mlx5e_tx_err_cqe_work(struct work_struct *recover_work)
1444{
1445 unsigned long exp_time = jiffies + msecs_to_jiffies(2000);
1446
1447 while (time_before(jiffies, exp_time)) {
1448 if (sq->cc == sq->pc)
1449 return 0;
1450
1451 msleep(20);
1452 }
1453
1454 netdev_err(sq->channel->netdev,
1455 "Wait for SQ 0x%x flush timeout (sq cc = 0x%x, sq pc = 0x%x)\n",
1456 sq->sqn, sq->cc, sq->pc);
1457
1458 return -ETIMEDOUT;
1459}
1460
1461static int mlx5e_sq_to_ready(struct mlx5e_txqsq *sq, int curr_state)
1462{ 1429{
1463 struct mlx5_core_dev *mdev = sq->channel->mdev; 1430 struct mlx5e_txqsq *sq = container_of(recover_work, struct mlx5e_txqsq,
1464 struct net_device *dev = sq->channel->netdev; 1431 recover_work);
1465 struct mlx5e_modify_sq_param msp = {0};
1466 int err;
1467 1432
1468 msp.curr_state = curr_state; 1433 if (!sq->channel->priv->tx_reporter)
1469 msp.next_state = MLX5_SQC_STATE_RST;
1470
1471 err = mlx5e_modify_sq(mdev, sq->sqn, &msp);
1472 if (err) {
1473 netdev_err(dev, "Failed to move sq 0x%x to reset\n", sq->sqn);
1474 return err;
1475 }
1476
1477 memset(&msp, 0, sizeof(msp));
1478 msp.curr_state = MLX5_SQC_STATE_RST;
1479 msp.next_state = MLX5_SQC_STATE_RDY;
1480
1481 err = mlx5e_modify_sq(mdev, sq->sqn, &msp);
1482 if (err) {
1483 netdev_err(dev, "Failed to move sq 0x%x to ready\n", sq->sqn);
1484 return err;
1485 }
1486
1487 return 0;
1488}
1489
1490static void mlx5e_sq_recover(struct work_struct *work)
1491{
1492 struct mlx5e_txqsq_recover *recover =
1493 container_of(work, struct mlx5e_txqsq_recover,
1494 recover_work);
1495 struct mlx5e_txqsq *sq = container_of(recover, struct mlx5e_txqsq,
1496 recover);
1497 struct mlx5_core_dev *mdev = sq->channel->mdev;
1498 struct net_device *dev = sq->channel->netdev;
1499 u8 state;
1500 int err;
1501
1502 err = mlx5_core_query_sq_state(mdev, sq->sqn, &state);
1503 if (err) {
1504 netdev_err(dev, "Failed to query SQ 0x%x state. err = %d\n",
1505 sq->sqn, err);
1506 return;
1507 }
1508
1509 if (state != MLX5_RQC_STATE_ERR) {
1510 netdev_err(dev, "SQ 0x%x not in ERROR state\n", sq->sqn);
1511 return;
1512 }
1513
1514 netif_tx_disable_queue(sq->txq);
1515
1516 if (mlx5e_wait_for_sq_flush(sq))
1517 return; 1434 return;
1518 1435
1519 /* If the interval between two consecutive recovers per SQ is too 1436 mlx5e_tx_reporter_err_cqe(sq);
1520 * short, don't recover to avoid infinite loop of ERR_CQE -> recover.
1521 * If we reached this state, there is probably a bug that needs to be
1522 * fixed. let's keep the queue close and let tx timeout cleanup.
1523 */
1524 if (jiffies_to_msecs(jiffies - recover->last_recover) <
1525 MLX5E_SQ_RECOVER_MIN_INTERVAL) {
1526 netdev_err(dev, "Recover SQ 0x%x canceled, too many error CQEs\n",
1527 sq->sqn);
1528 return;
1529 }
1530
1531 /* At this point, no new packets will arrive from the stack as TXQ is
1532 * marked with QUEUE_STATE_DRV_XOFF. In addition, NAPI cleared all
1533 * pending WQEs. SQ can safely reset the SQ.
1534 */
1535 if (mlx5e_sq_to_ready(sq, state))
1536 return;
1537
1538 mlx5e_reset_txqsq_cc_pc(sq);
1539 sq->stats->recover++;
1540 recover->last_recover = jiffies;
1541 mlx5e_activate_txqsq(sq);
1542} 1437}
1543 1438
1544static int mlx5e_open_icosq(struct mlx5e_channel *c, 1439static int mlx5e_open_icosq(struct mlx5e_channel *c,
@@ -3207,6 +3102,7 @@ static void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv)
3207{ 3102{
3208 int tc; 3103 int tc;
3209 3104
3105 mlx5e_tx_reporter_destroy(priv);
3210 for (tc = 0; tc < priv->profile->max_tc; tc++) 3106 for (tc = 0; tc < priv->profile->max_tc; tc++)
3211 mlx5e_destroy_tis(priv->mdev, priv->tisn[tc]); 3107 mlx5e_destroy_tis(priv->mdev, priv->tisn[tc]);
3212} 3108}
@@ -4178,31 +4074,14 @@ netdev_features_t mlx5e_features_check(struct sk_buff *skb,
4178 return features; 4074 return features;
4179} 4075}
4180 4076
4181static bool mlx5e_tx_timeout_eq_recover(struct net_device *dev,
4182 struct mlx5e_txqsq *sq)
4183{
4184 struct mlx5_eq_comp *eq = sq->cq.mcq.eq;
4185 u32 eqe_count;
4186
4187 netdev_err(dev, "EQ 0x%x: Cons = 0x%x, irqn = 0x%x\n",
4188 eq->core.eqn, eq->core.cons_index, eq->core.irqn);
4189
4190 eqe_count = mlx5_eq_poll_irq_disabled(eq);
4191 if (!eqe_count)
4192 return false;
4193
4194 netdev_err(dev, "Recover %d eqes on EQ 0x%x\n", eqe_count, eq->core.eqn);
4195 sq->channel->stats->eq_rearm++;
4196 return true;
4197}
4198
4199static void mlx5e_tx_timeout_work(struct work_struct *work) 4077static void mlx5e_tx_timeout_work(struct work_struct *work)
4200{ 4078{
4201 struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv, 4079 struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
4202 tx_timeout_work); 4080 tx_timeout_work);
4203 struct net_device *dev = priv->netdev; 4081 int i;
4204 bool reopen_channels = false; 4082
4205 int i, err; 4083 if (!priv->tx_reporter)
4084 return;
4206 4085
4207 rtnl_lock(); 4086 rtnl_lock();
4208 mutex_lock(&priv->state_lock); 4087 mutex_lock(&priv->state_lock);
@@ -4211,36 +4090,16 @@ static void mlx5e_tx_timeout_work(struct work_struct *work)
4211 goto unlock; 4090 goto unlock;
4212 4091
4213 for (i = 0; i < priv->channels.num * priv->channels.params.num_tc; i++) { 4092 for (i = 0; i < priv->channels.num * priv->channels.params.num_tc; i++) {
4214 struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, i); 4093 struct netdev_queue *dev_queue =
4094 netdev_get_tx_queue(priv->netdev, i);
4215 struct mlx5e_txqsq *sq = priv->txq2sq[i]; 4095 struct mlx5e_txqsq *sq = priv->txq2sq[i];
4216 4096
4217 if (!netif_xmit_stopped(dev_queue)) 4097 if (!netif_xmit_stopped(dev_queue))
4218 continue; 4098 continue;
4219 4099
4220 netdev_err(dev, 4100 mlx5e_tx_reporter_timeout(sq);
4221 "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x, usecs since last trans: %u\n",
4222 i, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc,
4223 jiffies_to_usecs(jiffies - dev_queue->trans_start));
4224
4225 /* If we recover a lost interrupt, most likely TX timeout will
4226 * be resolved, skip reopening channels
4227 */
4228 if (!mlx5e_tx_timeout_eq_recover(dev, sq)) {
4229 clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
4230 reopen_channels = true;
4231 }
4232 } 4101 }
4233 4102
4234 if (!reopen_channels)
4235 goto unlock;
4236
4237 mlx5e_close_locked(dev);
4238 err = mlx5e_open_locked(dev);
4239 if (err)
4240 netdev_err(priv->netdev,
4241 "mlx5e_open_locked failed recovering from a tx_timeout, err(%d).\n",
4242 err);
4243
4244unlock: 4103unlock:
4245 mutex_unlock(&priv->state_lock); 4104 mutex_unlock(&priv->state_lock);
4246 rtnl_unlock(); 4105 rtnl_unlock();
@@ -4908,6 +4767,7 @@ static int mlx5e_init_nic_tx(struct mlx5e_priv *priv)
4908#ifdef CONFIG_MLX5_CORE_EN_DCB 4767#ifdef CONFIG_MLX5_CORE_EN_DCB
4909 mlx5e_dcbnl_initialize(priv); 4768 mlx5e_dcbnl_initialize(priv);
4910#endif 4769#endif
4770 mlx5e_tx_reporter_create(priv);
4911 return 0; 4771 return 0;
4912} 4772}
4913 4773
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index 598ad7e4d5c9..a8e052a5ce36 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -514,7 +514,7 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
514 mlx5e_dump_error_cqe(sq, 514 mlx5e_dump_error_cqe(sq,
515 (struct mlx5_err_cqe *)cqe); 515 (struct mlx5_err_cqe *)cqe);
516 queue_work(cq->channel->priv->wq, 516 queue_work(cq->channel->priv->wq,
517 &sq->recover.recover_work); 517 &sq->recover_work);
518 } 518 }
519 stats->cqe_err++; 519 stats->cqe_err++;
520 } 520 }
diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index 9b48dffc9f63..5f8066ab7d40 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -5666,6 +5666,8 @@ enum mlxsw_reg_ritr_loopback_protocol {
5666 MLXSW_REG_RITR_LOOPBACK_PROTOCOL_IPIP_IPV4, 5666 MLXSW_REG_RITR_LOOPBACK_PROTOCOL_IPIP_IPV4,
5667 /* IPinIP IPv6 underlay Unicast */ 5667 /* IPinIP IPv6 underlay Unicast */
5668 MLXSW_REG_RITR_LOOPBACK_PROTOCOL_IPIP_IPV6, 5668 MLXSW_REG_RITR_LOOPBACK_PROTOCOL_IPIP_IPV6,
5669 /* IPinIP generic - used for Spectrum-2 underlay RIF */
5670 MLXSW_REG_RITR_LOOPBACK_GENERIC,
5669}; 5671};
5670 5672
5671/* reg_ritr_loopback_protocol 5673/* reg_ritr_loopback_protocol
@@ -5706,6 +5708,13 @@ MLXSW_ITEM32(reg, ritr, loopback_ipip_options, 0x10, 20, 4);
5706 */ 5708 */
5707MLXSW_ITEM32(reg, ritr, loopback_ipip_uvr, 0x10, 0, 16); 5709MLXSW_ITEM32(reg, ritr, loopback_ipip_uvr, 0x10, 0, 16);
5708 5710
5711/* reg_ritr_loopback_ipip_underlay_rif
5712 * Underlay ingress router interface.
5713 * Reserved for Spectrum.
5714 * Access: RW
5715 */
5716MLXSW_ITEM32(reg, ritr, loopback_ipip_underlay_rif, 0x14, 0, 16);
5717
5709/* reg_ritr_loopback_ipip_usip* 5718/* reg_ritr_loopback_ipip_usip*
5710 * Encapsulation Underlay source IP. 5719 * Encapsulation Underlay source IP.
5711 * Access: RW 5720 * Access: RW
@@ -5821,11 +5830,12 @@ static inline void
5821mlxsw_reg_ritr_loopback_ipip_common_pack(char *payload, 5830mlxsw_reg_ritr_loopback_ipip_common_pack(char *payload,
5822 enum mlxsw_reg_ritr_loopback_ipip_type ipip_type, 5831 enum mlxsw_reg_ritr_loopback_ipip_type ipip_type,
5823 enum mlxsw_reg_ritr_loopback_ipip_options options, 5832 enum mlxsw_reg_ritr_loopback_ipip_options options,
5824 u16 uvr_id, u32 gre_key) 5833 u16 uvr_id, u16 underlay_rif, u32 gre_key)
5825{ 5834{
5826 mlxsw_reg_ritr_loopback_ipip_type_set(payload, ipip_type); 5835 mlxsw_reg_ritr_loopback_ipip_type_set(payload, ipip_type);
5827 mlxsw_reg_ritr_loopback_ipip_options_set(payload, options); 5836 mlxsw_reg_ritr_loopback_ipip_options_set(payload, options);
5828 mlxsw_reg_ritr_loopback_ipip_uvr_set(payload, uvr_id); 5837 mlxsw_reg_ritr_loopback_ipip_uvr_set(payload, uvr_id);
5838 mlxsw_reg_ritr_loopback_ipip_underlay_rif_set(payload, underlay_rif);
5829 mlxsw_reg_ritr_loopback_ipip_gre_key_set(payload, gre_key); 5839 mlxsw_reg_ritr_loopback_ipip_gre_key_set(payload, gre_key);
5830} 5840}
5831 5841
@@ -5833,12 +5843,12 @@ static inline void
5833mlxsw_reg_ritr_loopback_ipip4_pack(char *payload, 5843mlxsw_reg_ritr_loopback_ipip4_pack(char *payload,
5834 enum mlxsw_reg_ritr_loopback_ipip_type ipip_type, 5844 enum mlxsw_reg_ritr_loopback_ipip_type ipip_type,
5835 enum mlxsw_reg_ritr_loopback_ipip_options options, 5845 enum mlxsw_reg_ritr_loopback_ipip_options options,
5836 u16 uvr_id, u32 usip, u32 gre_key) 5846 u16 uvr_id, u16 underlay_rif, u32 usip, u32 gre_key)
5837{ 5847{
5838 mlxsw_reg_ritr_loopback_protocol_set(payload, 5848 mlxsw_reg_ritr_loopback_protocol_set(payload,
5839 MLXSW_REG_RITR_LOOPBACK_PROTOCOL_IPIP_IPV4); 5849 MLXSW_REG_RITR_LOOPBACK_PROTOCOL_IPIP_IPV4);
5840 mlxsw_reg_ritr_loopback_ipip_common_pack(payload, ipip_type, options, 5850 mlxsw_reg_ritr_loopback_ipip_common_pack(payload, ipip_type, options,
5841 uvr_id, gre_key); 5851 uvr_id, underlay_rif, gre_key);
5842 mlxsw_reg_ritr_loopback_ipip_usip4_set(payload, usip); 5852 mlxsw_reg_ritr_loopback_ipip_usip4_set(payload, usip);
5843} 5853}
5844 5854
@@ -7200,6 +7210,13 @@ MLXSW_ITEM32(reg, rtdp, type, 0x00, 28, 4);
7200 */ 7210 */
7201MLXSW_ITEM32(reg, rtdp, tunnel_index, 0x00, 0, 24); 7211MLXSW_ITEM32(reg, rtdp, tunnel_index, 0x00, 0, 24);
7202 7212
7213/* reg_rtdp_egress_router_interface
7214 * Underlay egress router interface.
7215 * Valid range is from 0 to cap_max_router_interfaces - 1
7216 * Access: RW
7217 */
7218MLXSW_ITEM32(reg, rtdp, egress_router_interface, 0x40, 0, 16);
7219
7203/* IPinIP */ 7220/* IPinIP */
7204 7221
7205/* reg_rtdp_ipip_irif 7222/* reg_rtdp_ipip_irif
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 32519c93df17..a88169738b4a 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -4094,6 +4094,7 @@ static int mlxsw_sp1_init(struct mlxsw_core *mlxsw_core,
4094 mlxsw_sp->acl_tcam_ops = &mlxsw_sp1_acl_tcam_ops; 4094 mlxsw_sp->acl_tcam_ops = &mlxsw_sp1_acl_tcam_ops;
4095 mlxsw_sp->nve_ops_arr = mlxsw_sp1_nve_ops_arr; 4095 mlxsw_sp->nve_ops_arr = mlxsw_sp1_nve_ops_arr;
4096 mlxsw_sp->mac_mask = mlxsw_sp1_mac_mask; 4096 mlxsw_sp->mac_mask = mlxsw_sp1_mac_mask;
4097 mlxsw_sp->rif_ops_arr = mlxsw_sp1_rif_ops_arr;
4097 4098
4098 return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info); 4099 return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info);
4099} 4100}
@@ -4110,6 +4111,7 @@ static int mlxsw_sp2_init(struct mlxsw_core *mlxsw_core,
4110 mlxsw_sp->acl_tcam_ops = &mlxsw_sp2_acl_tcam_ops; 4111 mlxsw_sp->acl_tcam_ops = &mlxsw_sp2_acl_tcam_ops;
4111 mlxsw_sp->nve_ops_arr = mlxsw_sp2_nve_ops_arr; 4112 mlxsw_sp->nve_ops_arr = mlxsw_sp2_nve_ops_arr;
4112 mlxsw_sp->mac_mask = mlxsw_sp2_mac_mask; 4113 mlxsw_sp->mac_mask = mlxsw_sp2_mac_mask;
4114 mlxsw_sp->rif_ops_arr = mlxsw_sp2_rif_ops_arr;
4113 4115
4114 return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info); 4116 return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info);
4115} 4117}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index a1c32a81b011..1fa5c81b209f 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -75,6 +75,11 @@ enum mlxsw_sp_rif_type {
75 MLXSW_SP_RIF_TYPE_MAX, 75 MLXSW_SP_RIF_TYPE_MAX,
76}; 76};
77 77
78struct mlxsw_sp_rif_ops;
79
80extern const struct mlxsw_sp_rif_ops *mlxsw_sp1_rif_ops_arr[];
81extern const struct mlxsw_sp_rif_ops *mlxsw_sp2_rif_ops_arr[];
82
78enum mlxsw_sp_fid_type { 83enum mlxsw_sp_fid_type {
79 MLXSW_SP_FID_TYPE_8021Q, 84 MLXSW_SP_FID_TYPE_8021Q,
80 MLXSW_SP_FID_TYPE_8021D, 85 MLXSW_SP_FID_TYPE_8021D,
@@ -161,6 +166,7 @@ struct mlxsw_sp {
161 const struct mlxsw_sp_mr_tcam_ops *mr_tcam_ops; 166 const struct mlxsw_sp_mr_tcam_ops *mr_tcam_ops;
162 const struct mlxsw_sp_acl_tcam_ops *acl_tcam_ops; 167 const struct mlxsw_sp_acl_tcam_ops *acl_tcam_ops;
163 const struct mlxsw_sp_nve_ops **nve_ops_arr; 168 const struct mlxsw_sp_nve_ops **nve_ops_arr;
169 const struct mlxsw_sp_rif_ops **rif_ops_arr;
164}; 170};
165 171
166static inline struct mlxsw_sp_upper * 172static inline struct mlxsw_sp_upper *
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_bloom_filter.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_bloom_filter.c
index 505b87846acc..f5c381dcb015 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_bloom_filter.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_bloom_filter.c
@@ -234,8 +234,8 @@ mlxsw_sp_acl_bf_init(struct mlxsw_sp *mlxsw_sp, unsigned int num_erp_banks)
234 * is 2^ACL_MAX_BF_LOG 234 * is 2^ACL_MAX_BF_LOG
235 */ 235 */
236 bf_bank_size = 1 << MLXSW_CORE_RES_GET(mlxsw_sp->core, ACL_MAX_BF_LOG); 236 bf_bank_size = 1 << MLXSW_CORE_RES_GET(mlxsw_sp->core, ACL_MAX_BF_LOG);
237 bf = kzalloc(sizeof(*bf) + bf_bank_size * num_erp_banks * 237 bf = kzalloc(struct_size(bf, refcnt, bf_bank_size * num_erp_banks),
238 sizeof(*bf->refcnt), GFP_KERNEL); 238 GFP_KERNEL);
239 if (!bf) 239 if (!bf)
240 return ERR_PTR(-ENOMEM); 240 return ERR_PTR(-ENOMEM);
241 241
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c
index 41e607a14846..49933818c6f5 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c
@@ -220,7 +220,7 @@ start_again:
220 for (; i < rif_count; i++) { 220 for (; i < rif_count; i++) {
221 struct mlxsw_sp_rif *rif = mlxsw_sp_rif_by_index(mlxsw_sp, i); 221 struct mlxsw_sp_rif *rif = mlxsw_sp_rif_by_index(mlxsw_sp, i);
222 222
223 if (!rif) 223 if (!rif || !mlxsw_sp_rif_dev(rif))
224 continue; 224 continue;
225 err = mlxsw_sp_erif_entry_get(mlxsw_sp, &entry, rif, 225 err = mlxsw_sp_erif_entry_get(mlxsw_sp, &entry, rif,
226 counters_enabled); 226 counters_enabled);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c
index 00db26c96bf5..6400cd644b7a 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c
@@ -145,6 +145,7 @@ mlxsw_sp_ipip_fib_entry_op_gre4_rtdp(struct mlxsw_sp *mlxsw_sp,
145 struct mlxsw_sp_ipip_entry *ipip_entry) 145 struct mlxsw_sp_ipip_entry *ipip_entry)
146{ 146{
147 u16 rif_index = mlxsw_sp_ipip_lb_rif_index(ipip_entry->ol_lb); 147 u16 rif_index = mlxsw_sp_ipip_lb_rif_index(ipip_entry->ol_lb);
148 u16 ul_rif_id = mlxsw_sp_ipip_lb_ul_rif_id(ipip_entry->ol_lb);
148 char rtdp_pl[MLXSW_REG_RTDP_LEN]; 149 char rtdp_pl[MLXSW_REG_RTDP_LEN];
149 struct ip_tunnel_parm parms; 150 struct ip_tunnel_parm parms;
150 unsigned int type_check; 151 unsigned int type_check;
@@ -157,6 +158,7 @@ mlxsw_sp_ipip_fib_entry_op_gre4_rtdp(struct mlxsw_sp *mlxsw_sp,
157 ikey = mlxsw_sp_ipip_parms4_ikey(parms); 158 ikey = mlxsw_sp_ipip_parms4_ikey(parms);
158 159
159 mlxsw_reg_rtdp_pack(rtdp_pl, MLXSW_REG_RTDP_TYPE_IPIP, tunnel_index); 160 mlxsw_reg_rtdp_pack(rtdp_pl, MLXSW_REG_RTDP_TYPE_IPIP, tunnel_index);
161 mlxsw_reg_rtdp_egress_router_interface_set(rtdp_pl, ul_rif_id);
160 162
161 type_check = has_ikey ? 163 type_check = has_ikey ?
162 MLXSW_REG_RTDP_IPIP_TYPE_CHECK_ALLOW_GRE_KEY : 164 MLXSW_REG_RTDP_IPIP_TYPE_CHECK_ALLOW_GRE_KEY :
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c
index fb1c48c698f2..1df164a4b06d 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c
@@ -267,8 +267,8 @@ mlxsw_sp_nve_mc_record_create(struct mlxsw_sp *mlxsw_sp,
267 struct mlxsw_sp_nve_mc_record *mc_record; 267 struct mlxsw_sp_nve_mc_record *mc_record;
268 int err; 268 int err;
269 269
270 mc_record = kzalloc(sizeof(*mc_record) + num_max_entries * 270 mc_record = kzalloc(struct_size(mc_record, entries, num_max_entries),
271 sizeof(struct mlxsw_sp_nve_mc_entry), GFP_KERNEL); 271 GFP_KERNEL);
272 if (!mc_record) 272 if (!mc_record)
273 return ERR_PTR(-ENOMEM); 273 return ERR_PTR(-ENOMEM);
274 274
@@ -841,11 +841,9 @@ int mlxsw_sp_nve_fid_enable(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fid *fid,
841 841
842 nve->config = config; 842 nve->config = config;
843 843
844 err = ops->fdb_replay(params->dev, params->vni); 844 err = ops->fdb_replay(params->dev, params->vni, extack);
845 if (err) { 845 if (err)
846 NL_SET_ERR_MSG_MOD(extack, "Failed to offload the FDB");
847 goto err_fdb_replay; 846 goto err_fdb_replay;
848 }
849 847
850 return 0; 848 return 0;
851 849
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.h
index 02937ea95bc3..20d99b41611d 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.h
@@ -41,7 +41,8 @@ struct mlxsw_sp_nve_ops {
41 int (*init)(struct mlxsw_sp_nve *nve, 41 int (*init)(struct mlxsw_sp_nve *nve,
42 const struct mlxsw_sp_nve_config *config); 42 const struct mlxsw_sp_nve_config *config);
43 void (*fini)(struct mlxsw_sp_nve *nve); 43 void (*fini)(struct mlxsw_sp_nve *nve);
44 int (*fdb_replay)(const struct net_device *nve_dev, __be32 vni); 44 int (*fdb_replay)(const struct net_device *nve_dev, __be32 vni,
45 struct netlink_ext_ack *extack);
45 void (*fdb_clear_offload)(const struct net_device *nve_dev, __be32 vni); 46 void (*fdb_clear_offload)(const struct net_device *nve_dev, __be32 vni);
46}; 47};
47 48
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve_vxlan.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve_vxlan.c
index 74e564c4ac19..9ba0b83bd949 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve_vxlan.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve_vxlan.c
@@ -212,11 +212,13 @@ static void mlxsw_sp1_nve_vxlan_fini(struct mlxsw_sp_nve *nve)
212} 212}
213 213
214static int 214static int
215mlxsw_sp_nve_vxlan_fdb_replay(const struct net_device *nve_dev, __be32 vni) 215mlxsw_sp_nve_vxlan_fdb_replay(const struct net_device *nve_dev, __be32 vni,
216 struct netlink_ext_ack *extack)
216{ 217{
217 if (WARN_ON(!netif_is_vxlan(nve_dev))) 218 if (WARN_ON(!netif_is_vxlan(nve_dev)))
218 return -EINVAL; 219 return -EINVAL;
219 return vxlan_fdb_replay(nve_dev, vni, &mlxsw_sp_switchdev_notifier); 220 return vxlan_fdb_replay(nve_dev, vni, &mlxsw_sp_switchdev_notifier,
221 extack);
220} 222}
221 223
222static void 224static void
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 98e5ffd71b91..0949404a28e5 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -80,7 +80,7 @@ struct mlxsw_sp_router {
80struct mlxsw_sp_rif { 80struct mlxsw_sp_rif {
81 struct list_head nexthop_list; 81 struct list_head nexthop_list;
82 struct list_head neigh_list; 82 struct list_head neigh_list;
83 struct net_device *dev; 83 struct net_device *dev; /* NULL for underlay RIF */
84 struct mlxsw_sp_fid *fid; 84 struct mlxsw_sp_fid *fid;
85 unsigned char addr[ETH_ALEN]; 85 unsigned char addr[ETH_ALEN];
86 int mtu; 86 int mtu;
@@ -120,6 +120,7 @@ struct mlxsw_sp_rif_ipip_lb {
120 struct mlxsw_sp_rif common; 120 struct mlxsw_sp_rif common;
121 struct mlxsw_sp_rif_ipip_lb_config lb_config; 121 struct mlxsw_sp_rif_ipip_lb_config lb_config;
122 u16 ul_vr_id; /* Reserved for Spectrum-2. */ 122 u16 ul_vr_id; /* Reserved for Spectrum-2. */
123 u16 ul_rif_id; /* Reserved for Spectrum. */
123}; 124};
124 125
125struct mlxsw_sp_rif_params_ipip_lb { 126struct mlxsw_sp_rif_params_ipip_lb {
@@ -440,6 +441,8 @@ struct mlxsw_sp_vr {
440 struct mlxsw_sp_fib *fib4; 441 struct mlxsw_sp_fib *fib4;
441 struct mlxsw_sp_fib *fib6; 442 struct mlxsw_sp_fib *fib6;
442 struct mlxsw_sp_mr_table *mr_table[MLXSW_SP_L3_PROTO_MAX]; 443 struct mlxsw_sp_mr_table *mr_table[MLXSW_SP_L3_PROTO_MAX];
444 struct mlxsw_sp_rif *ul_rif;
445 refcount_t ul_rif_refcnt;
443}; 446};
444 447
445static const struct rhashtable_params mlxsw_sp_fib_ht_params; 448static const struct rhashtable_params mlxsw_sp_fib_ht_params;
@@ -1437,8 +1440,8 @@ mlxsw_sp_ipip_entry_ol_up_event(struct mlxsw_sp *mlxsw_sp,
1437} 1440}
1438 1441
1439static int 1442static int
1440mlxsw_sp_rif_ipip_lb_op(struct mlxsw_sp_rif_ipip_lb *lb_rif, 1443mlxsw_sp_rif_ipip_lb_op(struct mlxsw_sp_rif_ipip_lb *lb_rif, u16 ul_vr_id,
1441 struct mlxsw_sp_vr *ul_vr, bool enable) 1444 u16 ul_rif_id, bool enable)
1442{ 1445{
1443 struct mlxsw_sp_rif_ipip_lb_config lb_cf = lb_rif->lb_config; 1446 struct mlxsw_sp_rif_ipip_lb_config lb_cf = lb_rif->lb_config;
1444 struct mlxsw_sp_rif *rif = &lb_rif->common; 1447 struct mlxsw_sp_rif *rif = &lb_rif->common;
@@ -1453,7 +1456,7 @@ mlxsw_sp_rif_ipip_lb_op(struct mlxsw_sp_rif_ipip_lb *lb_rif,
1453 rif->rif_index, rif->vr_id, rif->dev->mtu); 1456 rif->rif_index, rif->vr_id, rif->dev->mtu);
1454 mlxsw_reg_ritr_loopback_ipip4_pack(ritr_pl, lb_cf.lb_ipipt, 1457 mlxsw_reg_ritr_loopback_ipip4_pack(ritr_pl, lb_cf.lb_ipipt,
1455 MLXSW_REG_RITR_LOOPBACK_IPIP_OPTIONS_GRE_KEY_PRESET, 1458 MLXSW_REG_RITR_LOOPBACK_IPIP_OPTIONS_GRE_KEY_PRESET,
1456 ul_vr->id, saddr4, lb_cf.okey); 1459 ul_vr_id, ul_rif_id, saddr4, lb_cf.okey);
1457 break; 1460 break;
1458 1461
1459 case MLXSW_SP_L3_PROTO_IPV6: 1462 case MLXSW_SP_L3_PROTO_IPV6:
@@ -1468,14 +1471,13 @@ static int mlxsw_sp_netdevice_ipip_ol_update_mtu(struct mlxsw_sp *mlxsw_sp,
1468{ 1471{
1469 struct mlxsw_sp_ipip_entry *ipip_entry; 1472 struct mlxsw_sp_ipip_entry *ipip_entry;
1470 struct mlxsw_sp_rif_ipip_lb *lb_rif; 1473 struct mlxsw_sp_rif_ipip_lb *lb_rif;
1471 struct mlxsw_sp_vr *ul_vr;
1472 int err = 0; 1474 int err = 0;
1473 1475
1474 ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev); 1476 ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1475 if (ipip_entry) { 1477 if (ipip_entry) {
1476 lb_rif = ipip_entry->ol_lb; 1478 lb_rif = ipip_entry->ol_lb;
1477 ul_vr = &mlxsw_sp->router->vrs[lb_rif->ul_vr_id]; 1479 err = mlxsw_sp_rif_ipip_lb_op(lb_rif, lb_rif->ul_vr_id,
1478 err = mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr, true); 1480 lb_rif->ul_rif_id, true);
1479 if (err) 1481 if (err)
1480 goto out; 1482 goto out;
1481 lb_rif->common.mtu = ol_dev->mtu; 1483 lb_rif->common.mtu = ol_dev->mtu;
@@ -6224,10 +6226,12 @@ static struct mlxsw_sp_rif *mlxsw_sp_rif_alloc(size_t rif_size, u16 rif_index,
6224 6226
6225 INIT_LIST_HEAD(&rif->nexthop_list); 6227 INIT_LIST_HEAD(&rif->nexthop_list);
6226 INIT_LIST_HEAD(&rif->neigh_list); 6228 INIT_LIST_HEAD(&rif->neigh_list);
6227 ether_addr_copy(rif->addr, l3_dev->dev_addr); 6229 if (l3_dev) {
6228 rif->mtu = l3_dev->mtu; 6230 ether_addr_copy(rif->addr, l3_dev->dev_addr);
6231 rif->mtu = l3_dev->mtu;
6232 rif->dev = l3_dev;
6233 }
6229 rif->vr_id = vr_id; 6234 rif->vr_id = vr_id;
6230 rif->dev = l3_dev;
6231 rif->rif_index = rif_index; 6235 rif->rif_index = rif_index;
6232 6236
6233 return rif; 6237 return rif;
@@ -6251,7 +6255,19 @@ u16 mlxsw_sp_ipip_lb_rif_index(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
6251 6255
6252u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif) 6256u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
6253{ 6257{
6254 return lb_rif->ul_vr_id; 6258 u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(lb_rif->common.dev);
6259 struct mlxsw_sp_vr *ul_vr;
6260
6261 ul_vr = mlxsw_sp_vr_get(lb_rif->common.mlxsw_sp, ul_tb_id, NULL);
6262 if (WARN_ON(IS_ERR(ul_vr)))
6263 return 0;
6264
6265 return ul_vr->id;
6266}
6267
6268u16 mlxsw_sp_ipip_lb_ul_rif_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
6269{
6270 return lb_rif->ul_rif_id;
6255} 6271}
6256 6272
6257int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif) 6273int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif)
@@ -6284,7 +6300,7 @@ mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
6284 int i, err; 6300 int i, err;
6285 6301
6286 type = mlxsw_sp_dev_rif_type(mlxsw_sp, params->dev); 6302 type = mlxsw_sp_dev_rif_type(mlxsw_sp, params->dev);
6287 ops = mlxsw_sp->router->rif_ops_arr[type]; 6303 ops = mlxsw_sp->rif_ops_arr[type];
6288 6304
6289 vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id ? : RT_TABLE_MAIN, extack); 6305 vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id ? : RT_TABLE_MAIN, extack);
6290 if (IS_ERR(vr)) 6306 if (IS_ERR(vr))
@@ -6303,6 +6319,7 @@ mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
6303 goto err_rif_alloc; 6319 goto err_rif_alloc;
6304 } 6320 }
6305 dev_hold(rif->dev); 6321 dev_hold(rif->dev);
6322 mlxsw_sp->router->rifs[rif_index] = rif;
6306 rif->mlxsw_sp = mlxsw_sp; 6323 rif->mlxsw_sp = mlxsw_sp;
6307 rif->ops = ops; 6324 rif->ops = ops;
6308 6325
@@ -6329,7 +6346,6 @@ mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
6329 } 6346 }
6330 6347
6331 mlxsw_sp_rif_counters_alloc(rif); 6348 mlxsw_sp_rif_counters_alloc(rif);
6332 mlxsw_sp->router->rifs[rif_index] = rif;
6333 6349
6334 return rif; 6350 return rif;
6335 6351
@@ -6341,6 +6357,7 @@ err_configure:
6341 if (fid) 6357 if (fid)
6342 mlxsw_sp_fid_put(fid); 6358 mlxsw_sp_fid_put(fid);
6343err_fid_get: 6359err_fid_get:
6360 mlxsw_sp->router->rifs[rif_index] = NULL;
6344 dev_put(rif->dev); 6361 dev_put(rif->dev);
6345 kfree(rif); 6362 kfree(rif);
6346err_rif_alloc: 6363err_rif_alloc:
@@ -6361,7 +6378,6 @@ static void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif)
6361 mlxsw_sp_router_rif_gone_sync(mlxsw_sp, rif); 6378 mlxsw_sp_router_rif_gone_sync(mlxsw_sp, rif);
6362 vr = &mlxsw_sp->router->vrs[rif->vr_id]; 6379 vr = &mlxsw_sp->router->vrs[rif->vr_id];
6363 6380
6364 mlxsw_sp->router->rifs[rif->rif_index] = NULL;
6365 mlxsw_sp_rif_counters_free(rif); 6381 mlxsw_sp_rif_counters_free(rif);
6366 for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++) 6382 for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++)
6367 mlxsw_sp_mr_rif_del(vr->mr_table[i], rif); 6383 mlxsw_sp_mr_rif_del(vr->mr_table[i], rif);
@@ -6369,6 +6385,7 @@ static void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif)
6369 if (fid) 6385 if (fid)
6370 /* Loopback RIFs are not associated with a FID. */ 6386 /* Loopback RIFs are not associated with a FID. */
6371 mlxsw_sp_fid_put(fid); 6387 mlxsw_sp_fid_put(fid);
6388 mlxsw_sp->router->rifs[rif->rif_index] = NULL;
6372 dev_put(rif->dev); 6389 dev_put(rif->dev);
6373 kfree(rif); 6390 kfree(rif);
6374 vr->rif_count--; 6391 vr->rif_count--;
@@ -6750,7 +6767,7 @@ static int mlxsw_sp_router_port_check_rif_addr(struct mlxsw_sp *mlxsw_sp,
6750 6767
6751 for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) { 6768 for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) {
6752 rif = mlxsw_sp->router->rifs[i]; 6769 rif = mlxsw_sp->router->rifs[i];
6753 if (rif && rif->dev != dev && 6770 if (rif && rif->dev && rif->dev != dev &&
6754 !ether_addr_equal_masked(rif->dev->dev_addr, dev_addr, 6771 !ether_addr_equal_masked(rif->dev->dev_addr, dev_addr,
6755 mlxsw_sp->mac_mask)) { 6772 mlxsw_sp->mac_mask)) {
6756 NL_SET_ERR_MSG_MOD(extack, "All router interface MAC addresses must have the same prefix"); 6773 NL_SET_ERR_MSG_MOD(extack, "All router interface MAC addresses must have the same prefix");
@@ -7294,7 +7311,8 @@ static void mlxsw_sp_rif_vlan_fdb_del(struct mlxsw_sp_rif *rif, const char *mac)
7294 7311
7295 info.addr = mac; 7312 info.addr = mac;
7296 info.vid = vid; 7313 info.vid = vid;
7297 call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE, dev, &info.info); 7314 call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE, dev, &info.info,
7315 NULL);
7298} 7316}
7299 7317
7300static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_vlan_ops = { 7318static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_vlan_ops = {
@@ -7381,7 +7399,8 @@ static void mlxsw_sp_rif_fid_fdb_del(struct mlxsw_sp_rif *rif, const char *mac)
7381 7399
7382 info.addr = mac; 7400 info.addr = mac;
7383 info.vid = 0; 7401 info.vid = 0;
7384 call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE, dev, &info.info); 7402 call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE, dev, &info.info,
7403 NULL);
7385} 7404}
7386 7405
7387static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_fid_ops = { 7406static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_fid_ops = {
@@ -7422,7 +7441,7 @@ mlxsw_sp_rif_ipip_lb_setup(struct mlxsw_sp_rif *rif,
7422} 7441}
7423 7442
7424static int 7443static int
7425mlxsw_sp_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif) 7444mlxsw_sp1_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif)
7426{ 7445{
7427 struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif); 7446 struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
7428 u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(rif->dev); 7447 u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(rif->dev);
@@ -7434,11 +7453,12 @@ mlxsw_sp_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif)
7434 if (IS_ERR(ul_vr)) 7453 if (IS_ERR(ul_vr))
7435 return PTR_ERR(ul_vr); 7454 return PTR_ERR(ul_vr);
7436 7455
7437 err = mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr, true); 7456 err = mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr->id, 0, true);
7438 if (err) 7457 if (err)
7439 goto err_loopback_op; 7458 goto err_loopback_op;
7440 7459
7441 lb_rif->ul_vr_id = ul_vr->id; 7460 lb_rif->ul_vr_id = ul_vr->id;
7461 lb_rif->ul_rif_id = 0;
7442 ++ul_vr->rif_count; 7462 ++ul_vr->rif_count;
7443 return 0; 7463 return 0;
7444 7464
@@ -7447,32 +7467,185 @@ err_loopback_op:
7447 return err; 7467 return err;
7448} 7468}
7449 7469
7450static void mlxsw_sp_rif_ipip_lb_deconfigure(struct mlxsw_sp_rif *rif) 7470static void mlxsw_sp1_rif_ipip_lb_deconfigure(struct mlxsw_sp_rif *rif)
7451{ 7471{
7452 struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif); 7472 struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
7453 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; 7473 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7454 struct mlxsw_sp_vr *ul_vr; 7474 struct mlxsw_sp_vr *ul_vr;
7455 7475
7456 ul_vr = &mlxsw_sp->router->vrs[lb_rif->ul_vr_id]; 7476 ul_vr = &mlxsw_sp->router->vrs[lb_rif->ul_vr_id];
7457 mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr, false); 7477 mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr->id, 0, false);
7458 7478
7459 --ul_vr->rif_count; 7479 --ul_vr->rif_count;
7460 mlxsw_sp_vr_put(mlxsw_sp, ul_vr); 7480 mlxsw_sp_vr_put(mlxsw_sp, ul_vr);
7461} 7481}
7462 7482
7463static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_ipip_lb_ops = { 7483static const struct mlxsw_sp_rif_ops mlxsw_sp1_rif_ipip_lb_ops = {
7464 .type = MLXSW_SP_RIF_TYPE_IPIP_LB, 7484 .type = MLXSW_SP_RIF_TYPE_IPIP_LB,
7465 .rif_size = sizeof(struct mlxsw_sp_rif_ipip_lb), 7485 .rif_size = sizeof(struct mlxsw_sp_rif_ipip_lb),
7466 .setup = mlxsw_sp_rif_ipip_lb_setup, 7486 .setup = mlxsw_sp_rif_ipip_lb_setup,
7467 .configure = mlxsw_sp_rif_ipip_lb_configure, 7487 .configure = mlxsw_sp1_rif_ipip_lb_configure,
7468 .deconfigure = mlxsw_sp_rif_ipip_lb_deconfigure, 7488 .deconfigure = mlxsw_sp1_rif_ipip_lb_deconfigure,
7469}; 7489};
7470 7490
7471static const struct mlxsw_sp_rif_ops *mlxsw_sp_rif_ops_arr[] = { 7491const struct mlxsw_sp_rif_ops *mlxsw_sp1_rif_ops_arr[] = {
7472 [MLXSW_SP_RIF_TYPE_SUBPORT] = &mlxsw_sp_rif_subport_ops, 7492 [MLXSW_SP_RIF_TYPE_SUBPORT] = &mlxsw_sp_rif_subport_ops,
7473 [MLXSW_SP_RIF_TYPE_VLAN] = &mlxsw_sp_rif_vlan_emu_ops, 7493 [MLXSW_SP_RIF_TYPE_VLAN] = &mlxsw_sp_rif_vlan_emu_ops,
7474 [MLXSW_SP_RIF_TYPE_FID] = &mlxsw_sp_rif_fid_ops, 7494 [MLXSW_SP_RIF_TYPE_FID] = &mlxsw_sp_rif_fid_ops,
7475 [MLXSW_SP_RIF_TYPE_IPIP_LB] = &mlxsw_sp_rif_ipip_lb_ops, 7495 [MLXSW_SP_RIF_TYPE_IPIP_LB] = &mlxsw_sp1_rif_ipip_lb_ops,
7496};
7497
7498static int
7499mlxsw_sp_rif_ipip_lb_ul_rif_op(struct mlxsw_sp_rif *ul_rif, bool enable)
7500{
7501 struct mlxsw_sp *mlxsw_sp = ul_rif->mlxsw_sp;
7502 char ritr_pl[MLXSW_REG_RITR_LEN];
7503
7504 mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_LOOPBACK_IF,
7505 ul_rif->rif_index, ul_rif->vr_id, IP_MAX_MTU);
7506 mlxsw_reg_ritr_loopback_protocol_set(ritr_pl,
7507 MLXSW_REG_RITR_LOOPBACK_GENERIC);
7508
7509 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
7510}
7511
7512static struct mlxsw_sp_rif *
7513mlxsw_sp_ul_rif_create(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr,
7514 struct netlink_ext_ack *extack)
7515{
7516 struct mlxsw_sp_rif *ul_rif;
7517 u16 rif_index;
7518 int err;
7519
7520 err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index);
7521 if (err) {
7522 NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported router interfaces");
7523 return ERR_PTR(err);
7524 }
7525
7526 ul_rif = mlxsw_sp_rif_alloc(sizeof(*ul_rif), rif_index, vr->id, NULL);
7527 if (!ul_rif)
7528 return ERR_PTR(-ENOMEM);
7529
7530 mlxsw_sp->router->rifs[rif_index] = ul_rif;
7531 ul_rif->mlxsw_sp = mlxsw_sp;
7532 err = mlxsw_sp_rif_ipip_lb_ul_rif_op(ul_rif, true);
7533 if (err)
7534 goto ul_rif_op_err;
7535
7536 return ul_rif;
7537
7538ul_rif_op_err:
7539 mlxsw_sp->router->rifs[rif_index] = NULL;
7540 kfree(ul_rif);
7541 return ERR_PTR(err);
7542}
7543
7544static void mlxsw_sp_ul_rif_destroy(struct mlxsw_sp_rif *ul_rif)
7545{
7546 struct mlxsw_sp *mlxsw_sp = ul_rif->mlxsw_sp;
7547
7548 mlxsw_sp_rif_ipip_lb_ul_rif_op(ul_rif, false);
7549 mlxsw_sp->router->rifs[ul_rif->rif_index] = NULL;
7550 kfree(ul_rif);
7551}
7552
7553static struct mlxsw_sp_rif *
7554mlxsw_sp_ul_rif_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
7555 struct netlink_ext_ack *extack)
7556{
7557 struct mlxsw_sp_vr *vr;
7558 int err;
7559
7560 vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id, extack);
7561 if (IS_ERR(vr))
7562 return ERR_CAST(vr);
7563
7564 if (refcount_inc_not_zero(&vr->ul_rif_refcnt))
7565 return vr->ul_rif;
7566
7567 vr->ul_rif = mlxsw_sp_ul_rif_create(mlxsw_sp, vr, extack);
7568 if (IS_ERR(vr->ul_rif)) {
7569 err = PTR_ERR(vr->ul_rif);
7570 goto err_ul_rif_create;
7571 }
7572
7573 vr->rif_count++;
7574 refcount_set(&vr->ul_rif_refcnt, 1);
7575
7576 return vr->ul_rif;
7577
7578err_ul_rif_create:
7579 mlxsw_sp_vr_put(mlxsw_sp, vr);
7580 return ERR_PTR(err);
7581}
7582
7583static void mlxsw_sp_ul_rif_put(struct mlxsw_sp_rif *ul_rif)
7584{
7585 struct mlxsw_sp *mlxsw_sp = ul_rif->mlxsw_sp;
7586 struct mlxsw_sp_vr *vr;
7587
7588 vr = &mlxsw_sp->router->vrs[ul_rif->vr_id];
7589
7590 if (!refcount_dec_and_test(&vr->ul_rif_refcnt))
7591 return;
7592
7593 vr->rif_count--;
7594 mlxsw_sp_ul_rif_destroy(ul_rif);
7595 mlxsw_sp_vr_put(mlxsw_sp, vr);
7596}
7597
7598static int
7599mlxsw_sp2_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif)
7600{
7601 struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
7602 u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(rif->dev);
7603 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7604 struct mlxsw_sp_rif *ul_rif;
7605 int err;
7606
7607 ul_rif = mlxsw_sp_ul_rif_get(mlxsw_sp, ul_tb_id, NULL);
7608 if (IS_ERR(ul_rif))
7609 return PTR_ERR(ul_rif);
7610
7611 err = mlxsw_sp_rif_ipip_lb_op(lb_rif, 0, ul_rif->rif_index, true);
7612 if (err)
7613 goto err_loopback_op;
7614
7615 lb_rif->ul_vr_id = 0;
7616 lb_rif->ul_rif_id = ul_rif->rif_index;
7617
7618 return 0;
7619
7620err_loopback_op:
7621 mlxsw_sp_ul_rif_put(ul_rif);
7622 return err;
7623}
7624
7625static void mlxsw_sp2_rif_ipip_lb_deconfigure(struct mlxsw_sp_rif *rif)
7626{
7627 struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
7628 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7629 struct mlxsw_sp_rif *ul_rif;
7630
7631 ul_rif = mlxsw_sp_rif_by_index(mlxsw_sp, lb_rif->ul_rif_id);
7632 mlxsw_sp_rif_ipip_lb_op(lb_rif, 0, lb_rif->ul_rif_id, false);
7633 mlxsw_sp_ul_rif_put(ul_rif);
7634}
7635
7636static const struct mlxsw_sp_rif_ops mlxsw_sp2_rif_ipip_lb_ops = {
7637 .type = MLXSW_SP_RIF_TYPE_IPIP_LB,
7638 .rif_size = sizeof(struct mlxsw_sp_rif_ipip_lb),
7639 .setup = mlxsw_sp_rif_ipip_lb_setup,
7640 .configure = mlxsw_sp2_rif_ipip_lb_configure,
7641 .deconfigure = mlxsw_sp2_rif_ipip_lb_deconfigure,
7642};
7643
7644const struct mlxsw_sp_rif_ops *mlxsw_sp2_rif_ops_arr[] = {
7645 [MLXSW_SP_RIF_TYPE_SUBPORT] = &mlxsw_sp_rif_subport_ops,
7646 [MLXSW_SP_RIF_TYPE_VLAN] = &mlxsw_sp_rif_vlan_emu_ops,
7647 [MLXSW_SP_RIF_TYPE_FID] = &mlxsw_sp_rif_fid_ops,
7648 [MLXSW_SP_RIF_TYPE_IPIP_LB] = &mlxsw_sp2_rif_ipip_lb_ops,
7476}; 7649};
7477 7650
7478static int mlxsw_sp_rifs_init(struct mlxsw_sp *mlxsw_sp) 7651static int mlxsw_sp_rifs_init(struct mlxsw_sp *mlxsw_sp)
@@ -7485,8 +7658,6 @@ static int mlxsw_sp_rifs_init(struct mlxsw_sp *mlxsw_sp)
7485 if (!mlxsw_sp->router->rifs) 7658 if (!mlxsw_sp->router->rifs)
7486 return -ENOMEM; 7659 return -ENOMEM;
7487 7660
7488 mlxsw_sp->router->rif_ops_arr = mlxsw_sp_rif_ops_arr;
7489
7490 return 0; 7661 return 0;
7491} 7662}
7492 7663
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
index 3dbafdeaab2b..cc1de91e8217 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
@@ -29,6 +29,7 @@ struct mlxsw_sp_rif *mlxsw_sp_rif_by_index(const struct mlxsw_sp *mlxsw_sp,
29u16 mlxsw_sp_rif_index(const struct mlxsw_sp_rif *rif); 29u16 mlxsw_sp_rif_index(const struct mlxsw_sp_rif *rif);
30u16 mlxsw_sp_ipip_lb_rif_index(const struct mlxsw_sp_rif_ipip_lb *rif); 30u16 mlxsw_sp_ipip_lb_rif_index(const struct mlxsw_sp_rif_ipip_lb *rif);
31u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *rif); 31u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *rif);
32u16 mlxsw_sp_ipip_lb_ul_rif_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif);
32u32 mlxsw_sp_ipip_dev_ul_tb_id(const struct net_device *ol_dev); 33u32 mlxsw_sp_ipip_dev_ul_tb_id(const struct net_device *ol_dev);
33int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif); 34int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif);
34const struct net_device *mlxsw_sp_rif_dev(const struct mlxsw_sp_rif *rif); 35const struct net_device *mlxsw_sp_rif_dev(const struct mlxsw_sp_rif *rif);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
index c772109b638d..0f4e68d31cc3 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
@@ -2443,7 +2443,7 @@ static void mlxsw_sp_fdb_vxlan_call_notifiers(struct net_device *dev,
2443 ether_addr_copy(info.eth_addr, mac); 2443 ether_addr_copy(info.eth_addr, mac);
2444 info.vni = vni; 2444 info.vni = vni;
2445 info.offloaded = adding; 2445 info.offloaded = adding;
2446 call_switchdev_notifiers(type, dev, &info.info); 2446 call_switchdev_notifiers(type, dev, &info.info, NULL);
2447} 2447}
2448 2448
2449static void mlxsw_sp_fdb_nve_call_notifiers(struct net_device *dev, 2449static void mlxsw_sp_fdb_nve_call_notifiers(struct net_device *dev,
@@ -2468,7 +2468,7 @@ mlxsw_sp_fdb_call_notifiers(enum switchdev_notifier_type type,
2468 info.addr = mac; 2468 info.addr = mac;
2469 info.vid = vid; 2469 info.vid = vid;
2470 info.offloaded = offloaded; 2470 info.offloaded = offloaded;
2471 call_switchdev_notifiers(type, dev, &info.info); 2471 call_switchdev_notifiers(type, dev, &info.info, NULL);
2472} 2472}
2473 2473
2474static void mlxsw_sp_fdb_notify_mac_process(struct mlxsw_sp *mlxsw_sp, 2474static void mlxsw_sp_fdb_notify_mac_process(struct mlxsw_sp *mlxsw_sp,
@@ -2819,7 +2819,7 @@ mlxsw_sp_switchdev_bridge_vxlan_fdb_event(struct mlxsw_sp *mlxsw_sp,
2819 return; 2819 return;
2820 vxlan_fdb_info.offloaded = true; 2820 vxlan_fdb_info.offloaded = true;
2821 call_switchdev_notifiers(SWITCHDEV_VXLAN_FDB_OFFLOADED, dev, 2821 call_switchdev_notifiers(SWITCHDEV_VXLAN_FDB_OFFLOADED, dev,
2822 &vxlan_fdb_info.info); 2822 &vxlan_fdb_info.info, NULL);
2823 mlxsw_sp_fdb_call_notifiers(SWITCHDEV_FDB_OFFLOADED, 2823 mlxsw_sp_fdb_call_notifiers(SWITCHDEV_FDB_OFFLOADED,
2824 vxlan_fdb_info.eth_addr, 2824 vxlan_fdb_info.eth_addr,
2825 fdb_info->vid, dev, true); 2825 fdb_info->vid, dev, true);
@@ -2832,7 +2832,7 @@ mlxsw_sp_switchdev_bridge_vxlan_fdb_event(struct mlxsw_sp *mlxsw_sp,
2832 false); 2832 false);
2833 vxlan_fdb_info.offloaded = false; 2833 vxlan_fdb_info.offloaded = false;
2834 call_switchdev_notifiers(SWITCHDEV_VXLAN_FDB_OFFLOADED, dev, 2834 call_switchdev_notifiers(SWITCHDEV_VXLAN_FDB_OFFLOADED, dev,
2835 &vxlan_fdb_info.info); 2835 &vxlan_fdb_info.info, NULL);
2836 break; 2836 break;
2837 } 2837 }
2838} 2838}
@@ -2977,7 +2977,7 @@ mlxsw_sp_switchdev_vxlan_fdb_add(struct mlxsw_sp *mlxsw_sp,
2977 } 2977 }
2978 vxlan_fdb_info->offloaded = true; 2978 vxlan_fdb_info->offloaded = true;
2979 call_switchdev_notifiers(SWITCHDEV_VXLAN_FDB_OFFLOADED, dev, 2979 call_switchdev_notifiers(SWITCHDEV_VXLAN_FDB_OFFLOADED, dev,
2980 &vxlan_fdb_info->info); 2980 &vxlan_fdb_info->info, NULL);
2981 mlxsw_sp_fid_put(fid); 2981 mlxsw_sp_fid_put(fid);
2982 return; 2982 return;
2983 } 2983 }
@@ -2998,7 +2998,7 @@ mlxsw_sp_switchdev_vxlan_fdb_add(struct mlxsw_sp *mlxsw_sp,
2998 goto err_fdb_tunnel_uc_op; 2998 goto err_fdb_tunnel_uc_op;
2999 vxlan_fdb_info->offloaded = true; 2999 vxlan_fdb_info->offloaded = true;
3000 call_switchdev_notifiers(SWITCHDEV_VXLAN_FDB_OFFLOADED, dev, 3000 call_switchdev_notifiers(SWITCHDEV_VXLAN_FDB_OFFLOADED, dev,
3001 &vxlan_fdb_info->info); 3001 &vxlan_fdb_info->info, NULL);
3002 mlxsw_sp_fdb_call_notifiers(SWITCHDEV_FDB_OFFLOADED, 3002 mlxsw_sp_fdb_call_notifiers(SWITCHDEV_FDB_OFFLOADED,
3003 vxlan_fdb_info->eth_addr, vid, dev, true); 3003 vxlan_fdb_info->eth_addr, vid, dev, true);
3004 3004
@@ -3099,23 +3099,34 @@ mlxsw_sp_switchdev_vxlan_work_prepare(struct mlxsw_sp_switchdev_event_work *
3099 struct vxlan_dev *vxlan = netdev_priv(switchdev_work->dev); 3099 struct vxlan_dev *vxlan = netdev_priv(switchdev_work->dev);
3100 struct switchdev_notifier_vxlan_fdb_info *vxlan_fdb_info; 3100 struct switchdev_notifier_vxlan_fdb_info *vxlan_fdb_info;
3101 struct vxlan_config *cfg = &vxlan->cfg; 3101 struct vxlan_config *cfg = &vxlan->cfg;
3102 struct netlink_ext_ack *extack;
3102 3103
3104 extack = switchdev_notifier_info_to_extack(info);
3103 vxlan_fdb_info = container_of(info, 3105 vxlan_fdb_info = container_of(info,
3104 struct switchdev_notifier_vxlan_fdb_info, 3106 struct switchdev_notifier_vxlan_fdb_info,
3105 info); 3107 info);
3106 3108
3107 if (vxlan_fdb_info->remote_port != cfg->dst_port) 3109 if (vxlan_fdb_info->remote_port != cfg->dst_port) {
3108 return -EOPNOTSUPP; 3110 NL_SET_ERR_MSG_MOD(extack, "VxLAN: FDB: Non-default remote port is not supported");
3109 if (vxlan_fdb_info->remote_vni != cfg->vni)
3110 return -EOPNOTSUPP; 3111 return -EOPNOTSUPP;
3111 if (vxlan_fdb_info->vni != cfg->vni) 3112 }
3113 if (vxlan_fdb_info->remote_vni != cfg->vni ||
3114 vxlan_fdb_info->vni != cfg->vni) {
3115 NL_SET_ERR_MSG_MOD(extack, "VxLAN: FDB: Non-default VNI is not supported");
3112 return -EOPNOTSUPP; 3116 return -EOPNOTSUPP;
3113 if (vxlan_fdb_info->remote_ifindex) 3117 }
3118 if (vxlan_fdb_info->remote_ifindex) {
3119 NL_SET_ERR_MSG_MOD(extack, "VxLAN: FDB: Local interface is not supported");
3114 return -EOPNOTSUPP; 3120 return -EOPNOTSUPP;
3115 if (is_multicast_ether_addr(vxlan_fdb_info->eth_addr)) 3121 }
3122 if (is_multicast_ether_addr(vxlan_fdb_info->eth_addr)) {
3123 NL_SET_ERR_MSG_MOD(extack, "VxLAN: FDB: Multicast MAC addresses not supported");
3116 return -EOPNOTSUPP; 3124 return -EOPNOTSUPP;
3117 if (vxlan_addr_multicast(&vxlan_fdb_info->remote_ip)) 3125 }
3126 if (vxlan_addr_multicast(&vxlan_fdb_info->remote_ip)) {
3127 NL_SET_ERR_MSG_MOD(extack, "VxLAN: FDB: Multicast destination IP is not supported");
3118 return -EOPNOTSUPP; 3128 return -EOPNOTSUPP;
3129 }
3119 3130
3120 switchdev_work->vxlan_fdb_info = *vxlan_fdb_info; 3131 switchdev_work->vxlan_fdb_info = *vxlan_fdb_info;
3121 3132
diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c
index 215a45374d7b..c6a575eb0ff5 100644
--- a/drivers/net/ethernet/mscc/ocelot.c
+++ b/drivers/net/ethernet/mscc/ocelot.c
@@ -721,7 +721,8 @@ static void ocelot_get_stats64(struct net_device *dev,
721 721
722static int ocelot_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], 722static int ocelot_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
723 struct net_device *dev, const unsigned char *addr, 723 struct net_device *dev, const unsigned char *addr,
724 u16 vid, u16 flags) 724 u16 vid, u16 flags,
725 struct netlink_ext_ack *extack)
725{ 726{
726 struct ocelot_port *port = netdev_priv(dev); 727 struct ocelot_port *port = netdev_priv(dev);
727 struct ocelot *ocelot = port->ocelot; 728 struct ocelot *ocelot = port->ocelot;
diff --git a/drivers/net/ethernet/neterion/vxge/vxge-main.c b/drivers/net/ethernet/neterion/vxge/vxge-main.c
index 5ae3fa82909f..0da7393b2ef3 100644
--- a/drivers/net/ethernet/neterion/vxge/vxge-main.c
+++ b/drivers/net/ethernet/neterion/vxge/vxge-main.c
@@ -2553,7 +2553,7 @@ static int vxge_add_isr(struct vxgedev *vdev)
2553 vxge_debug_init(VXGE_ERR, 2553 vxge_debug_init(VXGE_ERR,
2554 "%s: Defaulting to INTA", 2554 "%s: Defaulting to INTA",
2555 vdev->ndev->name); 2555 vdev->ndev->name);
2556 goto INTA_MODE; 2556 goto INTA_MODE;
2557 } 2557 }
2558 2558
2559 msix_idx = (vdev->vpaths[0].handle->vpath->vp_id * 2559 msix_idx = (vdev->vpaths[0].handle->vpath->vp_id *
diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.c b/drivers/net/ethernet/netronome/nfp/flower/cmsg.c
index 4c5eaf36d5bb..56b22ea32474 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.c
@@ -203,7 +203,7 @@ nfp_flower_cmsg_portreify_rx(struct nfp_app *app, struct sk_buff *skb)
203 } 203 }
204 204
205 atomic_inc(&priv->reify_replies); 205 atomic_inc(&priv->reify_replies);
206 wake_up_interruptible(&priv->reify_wait_queue); 206 wake_up(&priv->reify_wait_queue);
207} 207}
208 208
209static void 209static void
diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
index 15f41cfef9f1..4fcaf11ed56e 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
+++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
@@ -97,6 +97,9 @@
97 97
98#define NFP_FLOWER_WORKQ_MAX_SKBS 30000 98#define NFP_FLOWER_WORKQ_MAX_SKBS 30000
99 99
100/* Cmesg reply (empirical) timeout*/
101#define NFP_FL_REPLY_TIMEOUT msecs_to_jiffies(40)
102
100#define nfp_flower_cmsg_warn(app, fmt, args...) \ 103#define nfp_flower_cmsg_warn(app, fmt, args...) \
101 do { \ 104 do { \
102 if (net_ratelimit()) \ 105 if (net_ratelimit()) \
diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.c b/drivers/net/ethernet/netronome/nfp/flower/main.c
index 5059110a1768..408089133599 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/main.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/main.c
@@ -32,6 +32,71 @@ static enum devlink_eswitch_mode eswitch_mode_get(struct nfp_app *app)
32 return DEVLINK_ESWITCH_MODE_SWITCHDEV; 32 return DEVLINK_ESWITCH_MODE_SWITCHDEV;
33} 33}
34 34
35static struct nfp_flower_non_repr_priv *
36nfp_flower_non_repr_priv_lookup(struct nfp_app *app, struct net_device *netdev)
37{
38 struct nfp_flower_priv *priv = app->priv;
39 struct nfp_flower_non_repr_priv *entry;
40
41 ASSERT_RTNL();
42
43 list_for_each_entry(entry, &priv->non_repr_priv, list)
44 if (entry->netdev == netdev)
45 return entry;
46
47 return NULL;
48}
49
50void
51__nfp_flower_non_repr_priv_get(struct nfp_flower_non_repr_priv *non_repr_priv)
52{
53 non_repr_priv->ref_count++;
54}
55
56struct nfp_flower_non_repr_priv *
57nfp_flower_non_repr_priv_get(struct nfp_app *app, struct net_device *netdev)
58{
59 struct nfp_flower_priv *priv = app->priv;
60 struct nfp_flower_non_repr_priv *entry;
61
62 entry = nfp_flower_non_repr_priv_lookup(app, netdev);
63 if (entry)
64 goto inc_ref;
65
66 entry = kzalloc(sizeof(*entry), GFP_KERNEL);
67 if (!entry)
68 return NULL;
69
70 entry->netdev = netdev;
71 list_add(&entry->list, &priv->non_repr_priv);
72
73inc_ref:
74 __nfp_flower_non_repr_priv_get(entry);
75 return entry;
76}
77
78void
79__nfp_flower_non_repr_priv_put(struct nfp_flower_non_repr_priv *non_repr_priv)
80{
81 if (--non_repr_priv->ref_count)
82 return;
83
84 list_del(&non_repr_priv->list);
85 kfree(non_repr_priv);
86}
87
88void
89nfp_flower_non_repr_priv_put(struct nfp_app *app, struct net_device *netdev)
90{
91 struct nfp_flower_non_repr_priv *entry;
92
93 entry = nfp_flower_non_repr_priv_lookup(app, netdev);
94 if (!entry)
95 return;
96
97 __nfp_flower_non_repr_priv_put(entry);
98}
99
35static enum nfp_repr_type 100static enum nfp_repr_type
36nfp_flower_repr_get_type_and_port(struct nfp_app *app, u32 port_id, u8 *port) 101nfp_flower_repr_get_type_and_port(struct nfp_app *app, u32 port_id, u8 *port)
37{ 102{
@@ -107,16 +172,14 @@ static int
107nfp_flower_wait_repr_reify(struct nfp_app *app, atomic_t *replies, int tot_repl) 172nfp_flower_wait_repr_reify(struct nfp_app *app, atomic_t *replies, int tot_repl)
108{ 173{
109 struct nfp_flower_priv *priv = app->priv; 174 struct nfp_flower_priv *priv = app->priv;
110 int err;
111 175
112 if (!tot_repl) 176 if (!tot_repl)
113 return 0; 177 return 0;
114 178
115 lockdep_assert_held(&app->pf->lock); 179 lockdep_assert_held(&app->pf->lock);
116 err = wait_event_interruptible_timeout(priv->reify_wait_queue, 180 if (!wait_event_timeout(priv->reify_wait_queue,
117 atomic_read(replies) >= tot_repl, 181 atomic_read(replies) >= tot_repl,
118 msecs_to_jiffies(10)); 182 NFP_FL_REPLY_TIMEOUT)) {
119 if (err <= 0) {
120 nfp_warn(app->cpp, "Not all reprs responded to reify\n"); 183 nfp_warn(app->cpp, "Not all reprs responded to reify\n");
121 return -EIO; 184 return -EIO;
122 } 185 }
@@ -223,6 +286,7 @@ nfp_flower_spawn_vnic_reprs(struct nfp_app *app,
223 286
224 nfp_repr = netdev_priv(repr); 287 nfp_repr = netdev_priv(repr);
225 nfp_repr->app_priv = repr_priv; 288 nfp_repr->app_priv = repr_priv;
289 repr_priv->nfp_repr = nfp_repr;
226 290
227 /* For now we only support 1 PF */ 291 /* For now we only support 1 PF */
228 WARN_ON(repr_type == NFP_REPR_TYPE_PF && i); 292 WARN_ON(repr_type == NFP_REPR_TYPE_PF && i);
@@ -337,6 +401,7 @@ nfp_flower_spawn_phy_reprs(struct nfp_app *app, struct nfp_flower_priv *priv)
337 401
338 nfp_repr = netdev_priv(repr); 402 nfp_repr = netdev_priv(repr);
339 nfp_repr->app_priv = repr_priv; 403 nfp_repr->app_priv = repr_priv;
404 repr_priv->nfp_repr = nfp_repr;
340 405
341 port = nfp_port_alloc(app, NFP_PORT_PHYS_PORT, repr); 406 port = nfp_port_alloc(app, NFP_PORT_PHYS_PORT, repr);
342 if (IS_ERR(port)) { 407 if (IS_ERR(port)) {
@@ -476,8 +541,8 @@ err_clear_nn:
476 541
477static int nfp_flower_init(struct nfp_app *app) 542static int nfp_flower_init(struct nfp_app *app)
478{ 543{
544 u64 version, features, ctx_count, num_mems;
479 const struct nfp_pf *pf = app->pf; 545 const struct nfp_pf *pf = app->pf;
480 u64 version, features, ctx_count;
481 struct nfp_flower_priv *app_priv; 546 struct nfp_flower_priv *app_priv;
482 int err; 547 int err;
483 548
@@ -502,6 +567,23 @@ static int nfp_flower_init(struct nfp_app *app)
502 return err; 567 return err;
503 } 568 }
504 569
570 num_mems = nfp_rtsym_read_le(app->pf->rtbl, "CONFIG_FC_HOST_CTX_SPLIT",
571 &err);
572 if (err) {
573 nfp_warn(app->cpp,
574 "FlowerNIC: unsupported host context memory: %d\n",
575 err);
576 err = 0;
577 num_mems = 1;
578 }
579
580 if (!FIELD_FIT(NFP_FL_STAT_ID_MU_NUM, num_mems) || !num_mems) {
581 nfp_warn(app->cpp,
582 "FlowerNIC: invalid host context memory: %llu\n",
583 num_mems);
584 return -EINVAL;
585 }
586
505 ctx_count = nfp_rtsym_read_le(app->pf->rtbl, "CONFIG_FC_HOST_CTX_COUNT", 587 ctx_count = nfp_rtsym_read_le(app->pf->rtbl, "CONFIG_FC_HOST_CTX_COUNT",
506 &err); 588 &err);
507 if (err) { 589 if (err) {
@@ -522,6 +604,8 @@ static int nfp_flower_init(struct nfp_app *app)
522 if (!app_priv) 604 if (!app_priv)
523 return -ENOMEM; 605 return -ENOMEM;
524 606
607 app_priv->total_mem_units = num_mems;
608 app_priv->active_mem_unit = 0;
525 app_priv->stats_ring_size = roundup_pow_of_two(ctx_count); 609 app_priv->stats_ring_size = roundup_pow_of_two(ctx_count);
526 app->priv = app_priv; 610 app->priv = app_priv;
527 app_priv->app = app; 611 app_priv->app = app;
@@ -533,7 +617,7 @@ static int nfp_flower_init(struct nfp_app *app)
533 init_waitqueue_head(&app_priv->mtu_conf.wait_q); 617 init_waitqueue_head(&app_priv->mtu_conf.wait_q);
534 spin_lock_init(&app_priv->mtu_conf.lock); 618 spin_lock_init(&app_priv->mtu_conf.lock);
535 619
536 err = nfp_flower_metadata_init(app, ctx_count); 620 err = nfp_flower_metadata_init(app, ctx_count, num_mems);
537 if (err) 621 if (err)
538 goto err_free_app_priv; 622 goto err_free_app_priv;
539 623
@@ -558,6 +642,7 @@ static int nfp_flower_init(struct nfp_app *app)
558 } 642 }
559 643
560 INIT_LIST_HEAD(&app_priv->indr_block_cb_priv); 644 INIT_LIST_HEAD(&app_priv->indr_block_cb_priv);
645 INIT_LIST_HEAD(&app_priv->non_repr_priv);
561 646
562 return 0; 647 return 0;
563 648
@@ -601,7 +686,7 @@ nfp_flower_repr_change_mtu(struct nfp_app *app, struct net_device *netdev,
601{ 686{
602 struct nfp_flower_priv *app_priv = app->priv; 687 struct nfp_flower_priv *app_priv = app->priv;
603 struct nfp_repr *repr = netdev_priv(netdev); 688 struct nfp_repr *repr = netdev_priv(netdev);
604 int err, ack; 689 int err;
605 690
606 /* Only need to config FW for physical port MTU change. */ 691 /* Only need to config FW for physical port MTU change. */
607 if (repr->port->type != NFP_PORT_PHYS_PORT) 692 if (repr->port->type != NFP_PORT_PHYS_PORT)
@@ -628,11 +713,9 @@ nfp_flower_repr_change_mtu(struct nfp_app *app, struct net_device *netdev,
628 } 713 }
629 714
630 /* Wait for fw to ack the change. */ 715 /* Wait for fw to ack the change. */
631 ack = wait_event_timeout(app_priv->mtu_conf.wait_q, 716 if (!wait_event_timeout(app_priv->mtu_conf.wait_q,
632 nfp_flower_check_ack(app_priv), 717 nfp_flower_check_ack(app_priv),
633 msecs_to_jiffies(10)); 718 NFP_FL_REPLY_TIMEOUT)) {
634
635 if (!ack) {
636 spin_lock_bh(&app_priv->mtu_conf.lock); 719 spin_lock_bh(&app_priv->mtu_conf.lock);
637 app_priv->mtu_conf.requested_val = 0; 720 app_priv->mtu_conf.requested_val = 0;
638 spin_unlock_bh(&app_priv->mtu_conf.lock); 721 spin_unlock_bh(&app_priv->mtu_conf.lock);
diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.h b/drivers/net/ethernet/netronome/nfp/flower/main.h
index b858bac47621..c0945a5fd1a4 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/main.h
+++ b/drivers/net/ethernet/netronome/nfp/flower/main.h
@@ -20,6 +20,9 @@ struct nfp_fl_pre_lag;
20struct net_device; 20struct net_device;
21struct nfp_app; 21struct nfp_app;
22 22
23#define NFP_FL_STAT_ID_MU_NUM GENMASK(31, 22)
24#define NFP_FL_STAT_ID_STAT GENMASK(21, 0)
25
23#define NFP_FL_STATS_ELEM_RS FIELD_SIZEOF(struct nfp_fl_stats_id, \ 26#define NFP_FL_STATS_ELEM_RS FIELD_SIZEOF(struct nfp_fl_stats_id, \
24 init_unalloc) 27 init_unalloc)
25#define NFP_FLOWER_MASK_ENTRY_RS 256 28#define NFP_FLOWER_MASK_ENTRY_RS 256
@@ -54,6 +57,26 @@ struct nfp_fl_stats_id {
54}; 57};
55 58
56/** 59/**
60 * struct nfp_fl_tunnel_offloads - priv data for tunnel offloads
61 * @offloaded_macs: Hashtable of the offloaded MAC addresses
62 * @ipv4_off_list: List of IPv4 addresses to offload
63 * @neigh_off_list: List of neighbour offloads
64 * @ipv4_off_lock: Lock for the IPv4 address list
65 * @neigh_off_lock: Lock for the neighbour address list
66 * @mac_off_ids: IDA to manage id assignment for offloaded MACs
67 * @neigh_nb: Notifier to monitor neighbour state
68 */
69struct nfp_fl_tunnel_offloads {
70 struct rhashtable offloaded_macs;
71 struct list_head ipv4_off_list;
72 struct list_head neigh_off_list;
73 struct mutex ipv4_off_lock;
74 spinlock_t neigh_off_lock;
75 struct ida mac_off_ids;
76 struct notifier_block neigh_nb;
77};
78
79/**
57 * struct nfp_mtu_conf - manage MTU setting 80 * struct nfp_mtu_conf - manage MTU setting
58 * @portnum: NFP port number of repr with requested MTU change 81 * @portnum: NFP port number of repr with requested MTU change
59 * @requested_val: MTU value requested for repr 82 * @requested_val: MTU value requested for repr
@@ -113,23 +136,16 @@ struct nfp_fl_lag {
113 * processing 136 * processing
114 * @cmsg_skbs_low: List of lower priority skbs for control message 137 * @cmsg_skbs_low: List of lower priority skbs for control message
115 * processing 138 * processing
116 * @nfp_mac_off_list: List of MAC addresses to offload 139 * @tun: Tunnel offload data
117 * @nfp_mac_index_list: List of unique 8-bit indexes for non NFP netdevs
118 * @nfp_ipv4_off_list: List of IPv4 addresses to offload
119 * @nfp_neigh_off_list: List of neighbour offloads
120 * @nfp_mac_off_lock: Lock for the MAC address list
121 * @nfp_mac_index_lock: Lock for the MAC index list
122 * @nfp_ipv4_off_lock: Lock for the IPv4 address list
123 * @nfp_neigh_off_lock: Lock for the neighbour address list
124 * @nfp_mac_off_ids: IDA to manage id assignment for offloaded macs
125 * @nfp_mac_off_count: Number of MACs in address list
126 * @nfp_tun_neigh_nb: Notifier to monitor neighbour state
127 * @reify_replies: atomically stores the number of replies received 140 * @reify_replies: atomically stores the number of replies received
128 * from firmware for repr reify 141 * from firmware for repr reify
129 * @reify_wait_queue: wait queue for repr reify response counting 142 * @reify_wait_queue: wait queue for repr reify response counting
130 * @mtu_conf: Configuration of repr MTU value 143 * @mtu_conf: Configuration of repr MTU value
131 * @nfp_lag: Link aggregation data block 144 * @nfp_lag: Link aggregation data block
132 * @indr_block_cb_priv: List of priv data passed to indirect block cbs 145 * @indr_block_cb_priv: List of priv data passed to indirect block cbs
146 * @non_repr_priv: List of offloaded non-repr ports and their priv data
147 * @active_mem_unit: Current active memory unit for flower rules
148 * @total_mem_units: Total number of available memory units for flower rules
133 */ 149 */
134struct nfp_flower_priv { 150struct nfp_flower_priv {
135 struct nfp_app *app; 151 struct nfp_app *app;
@@ -147,30 +163,47 @@ struct nfp_flower_priv {
147 struct work_struct cmsg_work; 163 struct work_struct cmsg_work;
148 struct sk_buff_head cmsg_skbs_high; 164 struct sk_buff_head cmsg_skbs_high;
149 struct sk_buff_head cmsg_skbs_low; 165 struct sk_buff_head cmsg_skbs_low;
150 struct list_head nfp_mac_off_list; 166 struct nfp_fl_tunnel_offloads tun;
151 struct list_head nfp_mac_index_list;
152 struct list_head nfp_ipv4_off_list;
153 struct list_head nfp_neigh_off_list;
154 struct mutex nfp_mac_off_lock;
155 struct mutex nfp_mac_index_lock;
156 struct mutex nfp_ipv4_off_lock;
157 spinlock_t nfp_neigh_off_lock;
158 struct ida nfp_mac_off_ids;
159 int nfp_mac_off_count;
160 struct notifier_block nfp_tun_neigh_nb;
161 atomic_t reify_replies; 167 atomic_t reify_replies;
162 wait_queue_head_t reify_wait_queue; 168 wait_queue_head_t reify_wait_queue;
163 struct nfp_mtu_conf mtu_conf; 169 struct nfp_mtu_conf mtu_conf;
164 struct nfp_fl_lag nfp_lag; 170 struct nfp_fl_lag nfp_lag;
165 struct list_head indr_block_cb_priv; 171 struct list_head indr_block_cb_priv;
172 struct list_head non_repr_priv;
173 unsigned int active_mem_unit;
174 unsigned int total_mem_units;
166}; 175};
167 176
168/** 177/**
169 * struct nfp_flower_repr_priv - Flower APP per-repr priv data 178 * struct nfp_flower_repr_priv - Flower APP per-repr priv data
179 * @nfp_repr: Back pointer to nfp_repr
170 * @lag_port_flags: Extended port flags to record lag state of repr 180 * @lag_port_flags: Extended port flags to record lag state of repr
181 * @mac_offloaded: Flag indicating a MAC address is offloaded for repr
182 * @offloaded_mac_addr: MAC address that has been offloaded for repr
183 * @mac_list: List entry of reprs that share the same offloaded MAC
171 */ 184 */
172struct nfp_flower_repr_priv { 185struct nfp_flower_repr_priv {
186 struct nfp_repr *nfp_repr;
173 unsigned long lag_port_flags; 187 unsigned long lag_port_flags;
188 bool mac_offloaded;
189 u8 offloaded_mac_addr[ETH_ALEN];
190 struct list_head mac_list;
191};
192
193/**
194 * struct nfp_flower_non_repr_priv - Priv data for non-repr offloaded ports
195 * @list: List entry of offloaded reprs
196 * @netdev: Pointer to non-repr net_device
197 * @ref_count: Number of references held for this priv data
198 * @mac_offloaded: Flag indicating a MAC address is offloaded for device
199 * @offloaded_mac_addr: MAC address that has been offloaded for dev
200 */
201struct nfp_flower_non_repr_priv {
202 struct list_head list;
203 struct net_device *netdev;
204 int ref_count;
205 bool mac_offloaded;
206 u8 offloaded_mac_addr[ETH_ALEN];
174}; 207};
175 208
176struct nfp_fl_key_ls { 209struct nfp_fl_key_ls {
@@ -217,7 +250,8 @@ struct nfp_fl_stats_frame {
217 __be64 stats_cookie; 250 __be64 stats_cookie;
218}; 251};
219 252
220int nfp_flower_metadata_init(struct nfp_app *app, u64 host_ctx_count); 253int nfp_flower_metadata_init(struct nfp_app *app, u64 host_ctx_count,
254 unsigned int host_ctx_split);
221void nfp_flower_metadata_cleanup(struct nfp_app *app); 255void nfp_flower_metadata_cleanup(struct nfp_app *app);
222 256
223int nfp_flower_setup_tc(struct nfp_app *app, struct net_device *netdev, 257int nfp_flower_setup_tc(struct nfp_app *app, struct net_device *netdev,
@@ -252,7 +286,6 @@ void nfp_tunnel_config_stop(struct nfp_app *app);
252int nfp_tunnel_mac_event_handler(struct nfp_app *app, 286int nfp_tunnel_mac_event_handler(struct nfp_app *app,
253 struct net_device *netdev, 287 struct net_device *netdev,
254 unsigned long event, void *ptr); 288 unsigned long event, void *ptr);
255void nfp_tunnel_write_macs(struct nfp_app *app);
256void nfp_tunnel_del_ipv4_off(struct nfp_app *app, __be32 ipv4); 289void nfp_tunnel_del_ipv4_off(struct nfp_app *app, __be32 ipv4);
257void nfp_tunnel_add_ipv4_off(struct nfp_app *app, __be32 ipv4); 290void nfp_tunnel_add_ipv4_off(struct nfp_app *app, __be32 ipv4);
258void nfp_tunnel_request_route(struct nfp_app *app, struct sk_buff *skb); 291void nfp_tunnel_request_route(struct nfp_app *app, struct sk_buff *skb);
@@ -273,4 +306,12 @@ int nfp_flower_reg_indir_block_handler(struct nfp_app *app,
273 struct net_device *netdev, 306 struct net_device *netdev,
274 unsigned long event); 307 unsigned long event);
275 308
309void
310__nfp_flower_non_repr_priv_get(struct nfp_flower_non_repr_priv *non_repr_priv);
311struct nfp_flower_non_repr_priv *
312nfp_flower_non_repr_priv_get(struct nfp_app *app, struct net_device *netdev);
313void
314__nfp_flower_non_repr_priv_put(struct nfp_flower_non_repr_priv *non_repr_priv);
315void
316nfp_flower_non_repr_priv_put(struct nfp_app *app, struct net_device *netdev);
276#endif 317#endif
diff --git a/drivers/net/ethernet/netronome/nfp/flower/match.c b/drivers/net/ethernet/netronome/nfp/flower/match.c
index cdf75595f627..c04a0d6b0184 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/match.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/match.c
@@ -403,9 +403,6 @@ int nfp_flower_compile_flow_match(struct nfp_app *app,
403 ext += sizeof(struct nfp_flower_ipv4_udp_tun); 403 ext += sizeof(struct nfp_flower_ipv4_udp_tun);
404 msk += sizeof(struct nfp_flower_ipv4_udp_tun); 404 msk += sizeof(struct nfp_flower_ipv4_udp_tun);
405 405
406 /* Configure tunnel end point MAC. */
407 nfp_tunnel_write_macs(app);
408
409 /* Store the tunnel destination in the rule data. 406 /* Store the tunnel destination in the rule data.
410 * This must be present and be an exact match. 407 * This must be present and be an exact match.
411 */ 408 */
diff --git a/drivers/net/ethernet/netronome/nfp/flower/metadata.c b/drivers/net/ethernet/netronome/nfp/flower/metadata.c
index 573a4400a26c..492837b852b6 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/metadata.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/metadata.c
@@ -4,6 +4,7 @@
4#include <linux/hash.h> 4#include <linux/hash.h>
5#include <linux/hashtable.h> 5#include <linux/hashtable.h>
6#include <linux/jhash.h> 6#include <linux/jhash.h>
7#include <linux/math64.h>
7#include <linux/vmalloc.h> 8#include <linux/vmalloc.h>
8#include <net/pkt_cls.h> 9#include <net/pkt_cls.h>
9 10
@@ -52,8 +53,17 @@ static int nfp_get_stats_entry(struct nfp_app *app, u32 *stats_context_id)
52 freed_stats_id = priv->stats_ring_size; 53 freed_stats_id = priv->stats_ring_size;
53 /* Check for unallocated entries first. */ 54 /* Check for unallocated entries first. */
54 if (priv->stats_ids.init_unalloc > 0) { 55 if (priv->stats_ids.init_unalloc > 0) {
55 *stats_context_id = priv->stats_ids.init_unalloc - 1; 56 if (priv->active_mem_unit == priv->total_mem_units) {
56 priv->stats_ids.init_unalloc--; 57 priv->stats_ids.init_unalloc--;
58 priv->active_mem_unit = 0;
59 }
60
61 *stats_context_id =
62 FIELD_PREP(NFP_FL_STAT_ID_STAT,
63 priv->stats_ids.init_unalloc - 1) |
64 FIELD_PREP(NFP_FL_STAT_ID_MU_NUM,
65 priv->active_mem_unit);
66 priv->active_mem_unit++;
57 return 0; 67 return 0;
58 } 68 }
59 69
@@ -381,10 +391,11 @@ const struct rhashtable_params nfp_flower_table_params = {
381 .automatic_shrinking = true, 391 .automatic_shrinking = true,
382}; 392};
383 393
384int nfp_flower_metadata_init(struct nfp_app *app, u64 host_ctx_count) 394int nfp_flower_metadata_init(struct nfp_app *app, u64 host_ctx_count,
395 unsigned int host_num_mems)
385{ 396{
386 struct nfp_flower_priv *priv = app->priv; 397 struct nfp_flower_priv *priv = app->priv;
387 int err; 398 int err, stats_size;
388 399
389 hash_init(priv->mask_table); 400 hash_init(priv->mask_table);
390 401
@@ -417,10 +428,12 @@ int nfp_flower_metadata_init(struct nfp_app *app, u64 host_ctx_count)
417 if (!priv->stats_ids.free_list.buf) 428 if (!priv->stats_ids.free_list.buf)
418 goto err_free_last_used; 429 goto err_free_last_used;
419 430
420 priv->stats_ids.init_unalloc = host_ctx_count; 431 priv->stats_ids.init_unalloc = div_u64(host_ctx_count, host_num_mems);
421 432
422 priv->stats = kvmalloc_array(priv->stats_ring_size, 433 stats_size = FIELD_PREP(NFP_FL_STAT_ID_STAT, host_ctx_count) |
423 sizeof(struct nfp_fl_stats), GFP_KERNEL); 434 FIELD_PREP(NFP_FL_STAT_ID_MU_NUM, host_num_mems - 1);
435 priv->stats = kvmalloc_array(stats_size, sizeof(struct nfp_fl_stats),
436 GFP_KERNEL);
424 if (!priv->stats) 437 if (!priv->stats)
425 goto err_free_ring_buf; 438 goto err_free_ring_buf;
426 439
diff --git a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c
index 2d9f26a725c2..4d78be4ec4e9 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c
@@ -98,47 +98,51 @@ struct nfp_ipv4_addr_entry {
98 struct list_head list; 98 struct list_head list;
99}; 99};
100 100
101/** 101#define NFP_TUN_MAC_OFFLOAD_DEL_FLAG 0x2
102 * struct nfp_tun_mac_addr - configure MAC address of tunnel EP on NFP
103 * @reserved: reserved for future use
104 * @count: number of MAC addresses in the message
105 * @addresses.index: index of MAC address in the lookup table
106 * @addresses.addr: interface MAC address
107 * @addresses: series of MACs to offload
108 */
109struct nfp_tun_mac_addr {
110 __be16 reserved;
111 __be16 count;
112 struct index_mac_addr {
113 __be16 index;
114 u8 addr[ETH_ALEN];
115 } addresses[];
116};
117 102
118/** 103/**
119 * struct nfp_tun_mac_offload_entry - list of MACs to offload 104 * struct nfp_tun_mac_addr_offload - configure MAC address of tunnel EP on NFP
120 * @index: index of MAC address for offloading 105 * @flags: MAC address offload options
106 * @count: number of MAC addresses in the message (should be 1)
107 * @index: index of MAC address in the lookup table
121 * @addr: interface MAC address 108 * @addr: interface MAC address
122 * @list: list pointer
123 */ 109 */
124struct nfp_tun_mac_offload_entry { 110struct nfp_tun_mac_addr_offload {
111 __be16 flags;
112 __be16 count;
125 __be16 index; 113 __be16 index;
126 u8 addr[ETH_ALEN]; 114 u8 addr[ETH_ALEN];
127 struct list_head list; 115};
116
117enum nfp_flower_mac_offload_cmd {
118 NFP_TUNNEL_MAC_OFFLOAD_ADD = 0,
119 NFP_TUNNEL_MAC_OFFLOAD_DEL = 1,
120 NFP_TUNNEL_MAC_OFFLOAD_MOD = 2,
128}; 121};
129 122
130#define NFP_MAX_MAC_INDEX 0xff 123#define NFP_MAX_MAC_INDEX 0xff
131 124
132/** 125/**
133 * struct nfp_tun_mac_non_nfp_idx - converts non NFP netdev ifindex to 8-bit id 126 * struct nfp_tun_offloaded_mac - hashtable entry for an offloaded MAC
134 * @ifindex: netdev ifindex of the device 127 * @ht_node: Hashtable entry
135 * @index: index of netdevs mac on NFP 128 * @addr: Offloaded MAC address
136 * @list: list pointer 129 * @index: Offloaded index for given MAC address
130 * @ref_count: Number of devs using this MAC address
131 * @repr_list: List of reprs sharing this MAC address
137 */ 132 */
138struct nfp_tun_mac_non_nfp_idx { 133struct nfp_tun_offloaded_mac {
139 int ifindex; 134 struct rhash_head ht_node;
140 u8 index; 135 u8 addr[ETH_ALEN];
141 struct list_head list; 136 u16 index;
137 int ref_count;
138 struct list_head repr_list;
139};
140
141static const struct rhashtable_params offloaded_macs_params = {
142 .key_offset = offsetof(struct nfp_tun_offloaded_mac, addr),
143 .head_offset = offsetof(struct nfp_tun_offloaded_mac, ht_node),
144 .key_len = ETH_ALEN,
145 .automatic_shrinking = true,
142}; 146};
143 147
144void nfp_tunnel_keep_alive(struct nfp_app *app, struct sk_buff *skb) 148void nfp_tunnel_keep_alive(struct nfp_app *app, struct sk_buff *skb)
@@ -205,15 +209,15 @@ static bool nfp_tun_has_route(struct nfp_app *app, __be32 ipv4_addr)
205 struct nfp_ipv4_route_entry *entry; 209 struct nfp_ipv4_route_entry *entry;
206 struct list_head *ptr, *storage; 210 struct list_head *ptr, *storage;
207 211
208 spin_lock_bh(&priv->nfp_neigh_off_lock); 212 spin_lock_bh(&priv->tun.neigh_off_lock);
209 list_for_each_safe(ptr, storage, &priv->nfp_neigh_off_list) { 213 list_for_each_safe(ptr, storage, &priv->tun.neigh_off_list) {
210 entry = list_entry(ptr, struct nfp_ipv4_route_entry, list); 214 entry = list_entry(ptr, struct nfp_ipv4_route_entry, list);
211 if (entry->ipv4_addr == ipv4_addr) { 215 if (entry->ipv4_addr == ipv4_addr) {
212 spin_unlock_bh(&priv->nfp_neigh_off_lock); 216 spin_unlock_bh(&priv->tun.neigh_off_lock);
213 return true; 217 return true;
214 } 218 }
215 } 219 }
216 spin_unlock_bh(&priv->nfp_neigh_off_lock); 220 spin_unlock_bh(&priv->tun.neigh_off_lock);
217 return false; 221 return false;
218} 222}
219 223
@@ -223,24 +227,24 @@ static void nfp_tun_add_route_to_cache(struct nfp_app *app, __be32 ipv4_addr)
223 struct nfp_ipv4_route_entry *entry; 227 struct nfp_ipv4_route_entry *entry;
224 struct list_head *ptr, *storage; 228 struct list_head *ptr, *storage;
225 229
226 spin_lock_bh(&priv->nfp_neigh_off_lock); 230 spin_lock_bh(&priv->tun.neigh_off_lock);
227 list_for_each_safe(ptr, storage, &priv->nfp_neigh_off_list) { 231 list_for_each_safe(ptr, storage, &priv->tun.neigh_off_list) {
228 entry = list_entry(ptr, struct nfp_ipv4_route_entry, list); 232 entry = list_entry(ptr, struct nfp_ipv4_route_entry, list);
229 if (entry->ipv4_addr == ipv4_addr) { 233 if (entry->ipv4_addr == ipv4_addr) {
230 spin_unlock_bh(&priv->nfp_neigh_off_lock); 234 spin_unlock_bh(&priv->tun.neigh_off_lock);
231 return; 235 return;
232 } 236 }
233 } 237 }
234 entry = kmalloc(sizeof(*entry), GFP_ATOMIC); 238 entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
235 if (!entry) { 239 if (!entry) {
236 spin_unlock_bh(&priv->nfp_neigh_off_lock); 240 spin_unlock_bh(&priv->tun.neigh_off_lock);
237 nfp_flower_cmsg_warn(app, "Mem error when storing new route.\n"); 241 nfp_flower_cmsg_warn(app, "Mem error when storing new route.\n");
238 return; 242 return;
239 } 243 }
240 244
241 entry->ipv4_addr = ipv4_addr; 245 entry->ipv4_addr = ipv4_addr;
242 list_add_tail(&entry->list, &priv->nfp_neigh_off_list); 246 list_add_tail(&entry->list, &priv->tun.neigh_off_list);
243 spin_unlock_bh(&priv->nfp_neigh_off_lock); 247 spin_unlock_bh(&priv->tun.neigh_off_lock);
244} 248}
245 249
246static void nfp_tun_del_route_from_cache(struct nfp_app *app, __be32 ipv4_addr) 250static void nfp_tun_del_route_from_cache(struct nfp_app *app, __be32 ipv4_addr)
@@ -249,8 +253,8 @@ static void nfp_tun_del_route_from_cache(struct nfp_app *app, __be32 ipv4_addr)
249 struct nfp_ipv4_route_entry *entry; 253 struct nfp_ipv4_route_entry *entry;
250 struct list_head *ptr, *storage; 254 struct list_head *ptr, *storage;
251 255
252 spin_lock_bh(&priv->nfp_neigh_off_lock); 256 spin_lock_bh(&priv->tun.neigh_off_lock);
253 list_for_each_safe(ptr, storage, &priv->nfp_neigh_off_list) { 257 list_for_each_safe(ptr, storage, &priv->tun.neigh_off_list) {
254 entry = list_entry(ptr, struct nfp_ipv4_route_entry, list); 258 entry = list_entry(ptr, struct nfp_ipv4_route_entry, list);
255 if (entry->ipv4_addr == ipv4_addr) { 259 if (entry->ipv4_addr == ipv4_addr) {
256 list_del(&entry->list); 260 list_del(&entry->list);
@@ -258,7 +262,7 @@ static void nfp_tun_del_route_from_cache(struct nfp_app *app, __be32 ipv4_addr)
258 break; 262 break;
259 } 263 }
260 } 264 }
261 spin_unlock_bh(&priv->nfp_neigh_off_lock); 265 spin_unlock_bh(&priv->tun.neigh_off_lock);
262} 266}
263 267
264static void 268static void
@@ -326,7 +330,7 @@ nfp_tun_neigh_event_handler(struct notifier_block *nb, unsigned long event,
326 if (!nfp_netdev_is_nfp_repr(n->dev)) 330 if (!nfp_netdev_is_nfp_repr(n->dev))
327 return NOTIFY_DONE; 331 return NOTIFY_DONE;
328 332
329 app_priv = container_of(nb, struct nfp_flower_priv, nfp_tun_neigh_nb); 333 app_priv = container_of(nb, struct nfp_flower_priv, tun.neigh_nb);
330 app = app_priv->app; 334 app = app_priv->app;
331 335
332 /* Only concerned with changes to routes already added to NFP. */ 336 /* Only concerned with changes to routes already added to NFP. */
@@ -401,11 +405,11 @@ static void nfp_tun_write_ipv4_list(struct nfp_app *app)
401 int count; 405 int count;
402 406
403 memset(&payload, 0, sizeof(struct nfp_tun_ipv4_addr)); 407 memset(&payload, 0, sizeof(struct nfp_tun_ipv4_addr));
404 mutex_lock(&priv->nfp_ipv4_off_lock); 408 mutex_lock(&priv->tun.ipv4_off_lock);
405 count = 0; 409 count = 0;
406 list_for_each_safe(ptr, storage, &priv->nfp_ipv4_off_list) { 410 list_for_each_safe(ptr, storage, &priv->tun.ipv4_off_list) {
407 if (count >= NFP_FL_IPV4_ADDRS_MAX) { 411 if (count >= NFP_FL_IPV4_ADDRS_MAX) {
408 mutex_unlock(&priv->nfp_ipv4_off_lock); 412 mutex_unlock(&priv->tun.ipv4_off_lock);
409 nfp_flower_cmsg_warn(app, "IPv4 offload exceeds limit.\n"); 413 nfp_flower_cmsg_warn(app, "IPv4 offload exceeds limit.\n");
410 return; 414 return;
411 } 415 }
@@ -413,7 +417,7 @@ static void nfp_tun_write_ipv4_list(struct nfp_app *app)
413 payload.ipv4_addr[count++] = entry->ipv4_addr; 417 payload.ipv4_addr[count++] = entry->ipv4_addr;
414 } 418 }
415 payload.count = cpu_to_be32(count); 419 payload.count = cpu_to_be32(count);
416 mutex_unlock(&priv->nfp_ipv4_off_lock); 420 mutex_unlock(&priv->tun.ipv4_off_lock);
417 421
418 nfp_flower_xmit_tun_conf(app, NFP_FLOWER_CMSG_TYPE_TUN_IPS, 422 nfp_flower_xmit_tun_conf(app, NFP_FLOWER_CMSG_TYPE_TUN_IPS,
419 sizeof(struct nfp_tun_ipv4_addr), 423 sizeof(struct nfp_tun_ipv4_addr),
@@ -426,26 +430,26 @@ void nfp_tunnel_add_ipv4_off(struct nfp_app *app, __be32 ipv4)
426 struct nfp_ipv4_addr_entry *entry; 430 struct nfp_ipv4_addr_entry *entry;
427 struct list_head *ptr, *storage; 431 struct list_head *ptr, *storage;
428 432
429 mutex_lock(&priv->nfp_ipv4_off_lock); 433 mutex_lock(&priv->tun.ipv4_off_lock);
430 list_for_each_safe(ptr, storage, &priv->nfp_ipv4_off_list) { 434 list_for_each_safe(ptr, storage, &priv->tun.ipv4_off_list) {
431 entry = list_entry(ptr, struct nfp_ipv4_addr_entry, list); 435 entry = list_entry(ptr, struct nfp_ipv4_addr_entry, list);
432 if (entry->ipv4_addr == ipv4) { 436 if (entry->ipv4_addr == ipv4) {
433 entry->ref_count++; 437 entry->ref_count++;
434 mutex_unlock(&priv->nfp_ipv4_off_lock); 438 mutex_unlock(&priv->tun.ipv4_off_lock);
435 return; 439 return;
436 } 440 }
437 } 441 }
438 442
439 entry = kmalloc(sizeof(*entry), GFP_KERNEL); 443 entry = kmalloc(sizeof(*entry), GFP_KERNEL);
440 if (!entry) { 444 if (!entry) {
441 mutex_unlock(&priv->nfp_ipv4_off_lock); 445 mutex_unlock(&priv->tun.ipv4_off_lock);
442 nfp_flower_cmsg_warn(app, "Mem error when offloading IP address.\n"); 446 nfp_flower_cmsg_warn(app, "Mem error when offloading IP address.\n");
443 return; 447 return;
444 } 448 }
445 entry->ipv4_addr = ipv4; 449 entry->ipv4_addr = ipv4;
446 entry->ref_count = 1; 450 entry->ref_count = 1;
447 list_add_tail(&entry->list, &priv->nfp_ipv4_off_list); 451 list_add_tail(&entry->list, &priv->tun.ipv4_off_list);
448 mutex_unlock(&priv->nfp_ipv4_off_lock); 452 mutex_unlock(&priv->tun.ipv4_off_lock);
449 453
450 nfp_tun_write_ipv4_list(app); 454 nfp_tun_write_ipv4_list(app);
451} 455}
@@ -456,8 +460,8 @@ void nfp_tunnel_del_ipv4_off(struct nfp_app *app, __be32 ipv4)
456 struct nfp_ipv4_addr_entry *entry; 460 struct nfp_ipv4_addr_entry *entry;
457 struct list_head *ptr, *storage; 461 struct list_head *ptr, *storage;
458 462
459 mutex_lock(&priv->nfp_ipv4_off_lock); 463 mutex_lock(&priv->tun.ipv4_off_lock);
460 list_for_each_safe(ptr, storage, &priv->nfp_ipv4_off_list) { 464 list_for_each_safe(ptr, storage, &priv->tun.ipv4_off_list) {
461 entry = list_entry(ptr, struct nfp_ipv4_addr_entry, list); 465 entry = list_entry(ptr, struct nfp_ipv4_addr_entry, list);
462 if (entry->ipv4_addr == ipv4) { 466 if (entry->ipv4_addr == ipv4) {
463 entry->ref_count--; 467 entry->ref_count--;
@@ -468,191 +472,357 @@ void nfp_tunnel_del_ipv4_off(struct nfp_app *app, __be32 ipv4)
468 break; 472 break;
469 } 473 }
470 } 474 }
471 mutex_unlock(&priv->nfp_ipv4_off_lock); 475 mutex_unlock(&priv->tun.ipv4_off_lock);
472 476
473 nfp_tun_write_ipv4_list(app); 477 nfp_tun_write_ipv4_list(app);
474} 478}
475 479
476void nfp_tunnel_write_macs(struct nfp_app *app) 480static int
481__nfp_tunnel_offload_mac(struct nfp_app *app, u8 *mac, u16 idx, bool del)
477{ 482{
478 struct nfp_flower_priv *priv = app->priv; 483 struct nfp_tun_mac_addr_offload payload;
479 struct nfp_tun_mac_offload_entry *entry;
480 struct nfp_tun_mac_addr *payload;
481 struct list_head *ptr, *storage;
482 int mac_count, err, pay_size;
483 484
484 mutex_lock(&priv->nfp_mac_off_lock); 485 memset(&payload, 0, sizeof(payload));
485 if (!priv->nfp_mac_off_count) {
486 mutex_unlock(&priv->nfp_mac_off_lock);
487 return;
488 }
489 486
490 pay_size = sizeof(struct nfp_tun_mac_addr) + 487 if (del)
491 sizeof(struct index_mac_addr) * priv->nfp_mac_off_count; 488 payload.flags = cpu_to_be16(NFP_TUN_MAC_OFFLOAD_DEL_FLAG);
492 489
493 payload = kzalloc(pay_size, GFP_KERNEL); 490 /* FW supports multiple MACs per cmsg but restrict to single. */
494 if (!payload) { 491 payload.count = cpu_to_be16(1);
495 mutex_unlock(&priv->nfp_mac_off_lock); 492 payload.index = cpu_to_be16(idx);
496 return; 493 ether_addr_copy(payload.addr, mac);
497 }
498 494
499 payload->count = cpu_to_be16(priv->nfp_mac_off_count); 495 return nfp_flower_xmit_tun_conf(app, NFP_FLOWER_CMSG_TYPE_TUN_MAC,
496 sizeof(struct nfp_tun_mac_addr_offload),
497 &payload, GFP_KERNEL);
498}
500 499
501 mac_count = 0; 500static bool nfp_tunnel_port_is_phy_repr(int port)
502 list_for_each_safe(ptr, storage, &priv->nfp_mac_off_list) { 501{
503 entry = list_entry(ptr, struct nfp_tun_mac_offload_entry, 502 if (FIELD_GET(NFP_FLOWER_CMSG_PORT_TYPE, port) ==
504 list); 503 NFP_FLOWER_CMSG_PORT_TYPE_PHYS_PORT)
505 payload->addresses[mac_count].index = entry->index; 504 return true;
506 ether_addr_copy(payload->addresses[mac_count].addr,
507 entry->addr);
508 mac_count++;
509 }
510 505
511 err = nfp_flower_xmit_tun_conf(app, NFP_FLOWER_CMSG_TYPE_TUN_MAC, 506 return false;
512 pay_size, payload, GFP_KERNEL); 507}
513 508
514 kfree(payload); 509static u16 nfp_tunnel_get_mac_idx_from_phy_port_id(int port)
510{
511 return port << 8 | NFP_FLOWER_CMSG_PORT_TYPE_PHYS_PORT;
512}
515 513
516 if (err) { 514static u16 nfp_tunnel_get_global_mac_idx_from_ida(int id)
517 mutex_unlock(&priv->nfp_mac_off_lock); 515{
518 /* Write failed so retain list for future retry. */ 516 return id << 8 | NFP_FLOWER_CMSG_PORT_TYPE_OTHER_PORT;
519 return; 517}
520 } 518
519static int nfp_tunnel_get_ida_from_global_mac_idx(u16 nfp_mac_idx)
520{
521 return nfp_mac_idx >> 8;
522}
523
524static bool nfp_tunnel_is_mac_idx_global(u16 nfp_mac_idx)
525{
526 return (nfp_mac_idx & 0xff) == NFP_FLOWER_CMSG_PORT_TYPE_OTHER_PORT;
527}
528
529static struct nfp_tun_offloaded_mac *
530nfp_tunnel_lookup_offloaded_macs(struct nfp_app *app, u8 *mac)
531{
532 struct nfp_flower_priv *priv = app->priv;
533
534 return rhashtable_lookup_fast(&priv->tun.offloaded_macs, mac,
535 offloaded_macs_params);
536}
537
538static void
539nfp_tunnel_offloaded_macs_inc_ref_and_link(struct nfp_tun_offloaded_mac *entry,
540 struct net_device *netdev, bool mod)
541{
542 if (nfp_netdev_is_nfp_repr(netdev)) {
543 struct nfp_flower_repr_priv *repr_priv;
544 struct nfp_repr *repr;
521 545
522 /* If list was successfully offloaded, flush it. */ 546 repr = netdev_priv(netdev);
523 list_for_each_safe(ptr, storage, &priv->nfp_mac_off_list) { 547 repr_priv = repr->app_priv;
524 entry = list_entry(ptr, struct nfp_tun_mac_offload_entry, 548
525 list); 549 /* If modifing MAC, remove repr from old list first. */
526 list_del(&entry->list); 550 if (mod)
527 kfree(entry); 551 list_del(&repr_priv->mac_list);
552
553 list_add_tail(&repr_priv->mac_list, &entry->repr_list);
528 } 554 }
529 555
530 priv->nfp_mac_off_count = 0; 556 entry->ref_count++;
531 mutex_unlock(&priv->nfp_mac_off_lock);
532} 557}
533 558
534static int nfp_tun_get_mac_idx(struct nfp_app *app, int ifindex) 559static int
560nfp_tunnel_add_shared_mac(struct nfp_app *app, struct net_device *netdev,
561 int port, bool mod)
535{ 562{
536 struct nfp_flower_priv *priv = app->priv; 563 struct nfp_flower_priv *priv = app->priv;
537 struct nfp_tun_mac_non_nfp_idx *entry; 564 int ida_idx = NFP_MAX_MAC_INDEX, err;
538 struct list_head *ptr, *storage; 565 struct nfp_tun_offloaded_mac *entry;
539 int idx; 566 u16 nfp_mac_idx = 0;
540 567
541 mutex_lock(&priv->nfp_mac_index_lock); 568 entry = nfp_tunnel_lookup_offloaded_macs(app, netdev->dev_addr);
542 list_for_each_safe(ptr, storage, &priv->nfp_mac_index_list) { 569 if (entry && nfp_tunnel_is_mac_idx_global(entry->index)) {
543 entry = list_entry(ptr, struct nfp_tun_mac_non_nfp_idx, list); 570 nfp_tunnel_offloaded_macs_inc_ref_and_link(entry, netdev, mod);
544 if (entry->ifindex == ifindex) { 571 return 0;
545 idx = entry->index;
546 mutex_unlock(&priv->nfp_mac_index_lock);
547 return idx;
548 }
549 } 572 }
550 573
551 idx = ida_simple_get(&priv->nfp_mac_off_ids, 0, 574 /* Assign a global index if non-repr or MAC address is now shared. */
552 NFP_MAX_MAC_INDEX, GFP_KERNEL); 575 if (entry || !port) {
553 if (idx < 0) { 576 ida_idx = ida_simple_get(&priv->tun.mac_off_ids, 0,
554 mutex_unlock(&priv->nfp_mac_index_lock); 577 NFP_MAX_MAC_INDEX, GFP_KERNEL);
555 return idx; 578 if (ida_idx < 0)
579 return ida_idx;
580
581 nfp_mac_idx = nfp_tunnel_get_global_mac_idx_from_ida(ida_idx);
582 } else {
583 nfp_mac_idx = nfp_tunnel_get_mac_idx_from_phy_port_id(port);
556 } 584 }
557 585
558 entry = kmalloc(sizeof(*entry), GFP_KERNEL);
559 if (!entry) { 586 if (!entry) {
560 mutex_unlock(&priv->nfp_mac_index_lock); 587 entry = kzalloc(sizeof(*entry), GFP_KERNEL);
561 return -ENOMEM; 588 if (!entry) {
589 err = -ENOMEM;
590 goto err_free_ida;
591 }
592
593 ether_addr_copy(entry->addr, netdev->dev_addr);
594 INIT_LIST_HEAD(&entry->repr_list);
595
596 if (rhashtable_insert_fast(&priv->tun.offloaded_macs,
597 &entry->ht_node,
598 offloaded_macs_params)) {
599 err = -ENOMEM;
600 goto err_free_entry;
601 }
602 }
603
604 err = __nfp_tunnel_offload_mac(app, netdev->dev_addr,
605 nfp_mac_idx, false);
606 if (err) {
607 /* If not shared then free. */
608 if (!entry->ref_count)
609 goto err_remove_hash;
610 goto err_free_ida;
562 } 611 }
563 entry->ifindex = ifindex;
564 entry->index = idx;
565 list_add_tail(&entry->list, &priv->nfp_mac_index_list);
566 mutex_unlock(&priv->nfp_mac_index_lock);
567 612
568 return idx; 613 entry->index = nfp_mac_idx;
614 nfp_tunnel_offloaded_macs_inc_ref_and_link(entry, netdev, mod);
615
616 return 0;
617
618err_remove_hash:
619 rhashtable_remove_fast(&priv->tun.offloaded_macs, &entry->ht_node,
620 offloaded_macs_params);
621err_free_entry:
622 kfree(entry);
623err_free_ida:
624 if (ida_idx != NFP_MAX_MAC_INDEX)
625 ida_simple_remove(&priv->tun.mac_off_ids, ida_idx);
626
627 return err;
569} 628}
570 629
571static void nfp_tun_del_mac_idx(struct nfp_app *app, int ifindex) 630static int
631nfp_tunnel_del_shared_mac(struct nfp_app *app, struct net_device *netdev,
632 u8 *mac, bool mod)
572{ 633{
573 struct nfp_flower_priv *priv = app->priv; 634 struct nfp_flower_priv *priv = app->priv;
574 struct nfp_tun_mac_non_nfp_idx *entry; 635 struct nfp_flower_repr_priv *repr_priv;
575 struct list_head *ptr, *storage; 636 struct nfp_tun_offloaded_mac *entry;
637 struct nfp_repr *repr;
638 int ida_idx;
639
640 entry = nfp_tunnel_lookup_offloaded_macs(app, mac);
641 if (!entry)
642 return 0;
643
644 entry->ref_count--;
645 /* If del is part of a mod then mac_list is still in use elsewheree. */
646 if (nfp_netdev_is_nfp_repr(netdev) && !mod) {
647 repr = netdev_priv(netdev);
648 repr_priv = repr->app_priv;
649 list_del(&repr_priv->mac_list);
650 }
576 651
577 mutex_lock(&priv->nfp_mac_index_lock); 652 /* If MAC is now used by 1 repr set the offloaded MAC index to port. */
578 list_for_each_safe(ptr, storage, &priv->nfp_mac_index_list) { 653 if (entry->ref_count == 1 && list_is_singular(&entry->repr_list)) {
579 entry = list_entry(ptr, struct nfp_tun_mac_non_nfp_idx, list); 654 u16 nfp_mac_idx;
580 if (entry->ifindex == ifindex) { 655 int port, err;
581 ida_simple_remove(&priv->nfp_mac_off_ids, 656
582 entry->index); 657 repr_priv = list_first_entry(&entry->repr_list,
583 list_del(&entry->list); 658 struct nfp_flower_repr_priv,
584 kfree(entry); 659 mac_list);
585 break; 660 repr = repr_priv->nfp_repr;
661 port = nfp_repr_get_port_id(repr->netdev);
662 nfp_mac_idx = nfp_tunnel_get_mac_idx_from_phy_port_id(port);
663 err = __nfp_tunnel_offload_mac(app, mac, nfp_mac_idx, false);
664 if (err) {
665 nfp_flower_cmsg_warn(app, "MAC offload index revert failed on %s.\n",
666 netdev_name(netdev));
667 return 0;
586 } 668 }
669
670 ida_idx = nfp_tunnel_get_ida_from_global_mac_idx(entry->index);
671 ida_simple_remove(&priv->tun.mac_off_ids, ida_idx);
672 entry->index = nfp_mac_idx;
673 return 0;
587 } 674 }
588 mutex_unlock(&priv->nfp_mac_index_lock);
589}
590 675
591static void nfp_tun_add_to_mac_offload_list(struct net_device *netdev, 676 if (entry->ref_count)
592 struct nfp_app *app) 677 return 0;
593{
594 struct nfp_flower_priv *priv = app->priv;
595 struct nfp_tun_mac_offload_entry *entry;
596 u16 nfp_mac_idx;
597 int port = 0;
598 678
599 /* Check if MAC should be offloaded. */ 679 WARN_ON_ONCE(rhashtable_remove_fast(&priv->tun.offloaded_macs,
600 if (!is_valid_ether_addr(netdev->dev_addr)) 680 &entry->ht_node,
601 return; 681 offloaded_macs_params));
682 /* If MAC has global ID then extract and free the ida entry. */
683 if (nfp_tunnel_is_mac_idx_global(entry->index)) {
684 ida_idx = nfp_tunnel_get_ida_from_global_mac_idx(entry->index);
685 ida_simple_remove(&priv->tun.mac_off_ids, ida_idx);
686 }
602 687
603 if (nfp_netdev_is_nfp_repr(netdev)) 688 kfree(entry);
689
690 return __nfp_tunnel_offload_mac(app, mac, 0, true);
691}
692
693static int
694nfp_tunnel_offload_mac(struct nfp_app *app, struct net_device *netdev,
695 enum nfp_flower_mac_offload_cmd cmd)
696{
697 struct nfp_flower_non_repr_priv *nr_priv = NULL;
698 bool non_repr = false, *mac_offloaded;
699 u8 *off_mac = NULL;
700 int err, port = 0;
701
702 if (nfp_netdev_is_nfp_repr(netdev)) {
703 struct nfp_flower_repr_priv *repr_priv;
704 struct nfp_repr *repr;
705
706 repr = netdev_priv(netdev);
707 if (repr->app != app)
708 return 0;
709
710 repr_priv = repr->app_priv;
711 mac_offloaded = &repr_priv->mac_offloaded;
712 off_mac = &repr_priv->offloaded_mac_addr[0];
604 port = nfp_repr_get_port_id(netdev); 713 port = nfp_repr_get_port_id(netdev);
605 else if (!nfp_fl_is_netdev_to_offload(netdev)) 714 if (!nfp_tunnel_port_is_phy_repr(port))
606 return; 715 return 0;
716 } else if (nfp_fl_is_netdev_to_offload(netdev)) {
717 nr_priv = nfp_flower_non_repr_priv_get(app, netdev);
718 if (!nr_priv)
719 return -ENOMEM;
720
721 mac_offloaded = &nr_priv->mac_offloaded;
722 off_mac = &nr_priv->offloaded_mac_addr[0];
723 non_repr = true;
724 } else {
725 return 0;
726 }
607 727
608 entry = kmalloc(sizeof(*entry), GFP_KERNEL); 728 if (!is_valid_ether_addr(netdev->dev_addr)) {
609 if (!entry) { 729 err = -EINVAL;
610 nfp_flower_cmsg_warn(app, "Mem fail when offloading MAC.\n"); 730 goto err_put_non_repr_priv;
611 return;
612 } 731 }
613 732
614 if (FIELD_GET(NFP_FLOWER_CMSG_PORT_TYPE, port) == 733 if (cmd == NFP_TUNNEL_MAC_OFFLOAD_MOD && !*mac_offloaded)
615 NFP_FLOWER_CMSG_PORT_TYPE_PHYS_PORT) { 734 cmd = NFP_TUNNEL_MAC_OFFLOAD_ADD;
616 nfp_mac_idx = port << 8 | NFP_FLOWER_CMSG_PORT_TYPE_PHYS_PORT;
617 } else if (FIELD_GET(NFP_FLOWER_CMSG_PORT_TYPE, port) ==
618 NFP_FLOWER_CMSG_PORT_TYPE_PCIE_PORT) {
619 port = FIELD_GET(NFP_FLOWER_CMSG_PORT_VNIC, port);
620 nfp_mac_idx = port << 8 | NFP_FLOWER_CMSG_PORT_TYPE_PCIE_PORT;
621 } else {
622 /* Must assign our own unique 8-bit index. */
623 int idx = nfp_tun_get_mac_idx(app, netdev->ifindex);
624 735
625 if (idx < 0) { 736 switch (cmd) {
626 nfp_flower_cmsg_warn(app, "Can't assign non-repr MAC index.\n"); 737 case NFP_TUNNEL_MAC_OFFLOAD_ADD:
627 kfree(entry); 738 err = nfp_tunnel_add_shared_mac(app, netdev, port, false);
628 return; 739 if (err)
629 } 740 goto err_put_non_repr_priv;
630 nfp_mac_idx = idx << 8 | NFP_FLOWER_CMSG_PORT_TYPE_OTHER_PORT; 741
742 if (non_repr)
743 __nfp_flower_non_repr_priv_get(nr_priv);
744
745 *mac_offloaded = true;
746 ether_addr_copy(off_mac, netdev->dev_addr);
747 break;
748 case NFP_TUNNEL_MAC_OFFLOAD_DEL:
749 /* Only attempt delete if add was successful. */
750 if (!*mac_offloaded)
751 break;
752
753 if (non_repr)
754 __nfp_flower_non_repr_priv_put(nr_priv);
755
756 *mac_offloaded = false;
757
758 err = nfp_tunnel_del_shared_mac(app, netdev, netdev->dev_addr,
759 false);
760 if (err)
761 goto err_put_non_repr_priv;
762
763 break;
764 case NFP_TUNNEL_MAC_OFFLOAD_MOD:
765 /* Ignore if changing to the same address. */
766 if (ether_addr_equal(netdev->dev_addr, off_mac))
767 break;
768
769 err = nfp_tunnel_add_shared_mac(app, netdev, port, true);
770 if (err)
771 goto err_put_non_repr_priv;
772
773 /* Delete the previous MAC address. */
774 err = nfp_tunnel_del_shared_mac(app, netdev, off_mac, true);
775 if (err)
776 nfp_flower_cmsg_warn(app, "Failed to remove offload of replaced MAC addr on %s.\n",
777 netdev_name(netdev));
778
779 ether_addr_copy(off_mac, netdev->dev_addr);
780 break;
781 default:
782 err = -EINVAL;
783 goto err_put_non_repr_priv;
631 } 784 }
632 785
633 entry->index = cpu_to_be16(nfp_mac_idx); 786 if (non_repr)
634 ether_addr_copy(entry->addr, netdev->dev_addr); 787 __nfp_flower_non_repr_priv_put(nr_priv);
788
789 return 0;
790
791err_put_non_repr_priv:
792 if (non_repr)
793 __nfp_flower_non_repr_priv_put(nr_priv);
635 794
636 mutex_lock(&priv->nfp_mac_off_lock); 795 return err;
637 priv->nfp_mac_off_count++;
638 list_add_tail(&entry->list, &priv->nfp_mac_off_list);
639 mutex_unlock(&priv->nfp_mac_off_lock);
640} 796}
641 797
642int nfp_tunnel_mac_event_handler(struct nfp_app *app, 798int nfp_tunnel_mac_event_handler(struct nfp_app *app,
643 struct net_device *netdev, 799 struct net_device *netdev,
644 unsigned long event, void *ptr) 800 unsigned long event, void *ptr)
645{ 801{
646 if (event == NETDEV_DOWN || event == NETDEV_UNREGISTER) { 802 int err;
647 /* If non-nfp netdev then free its offload index. */ 803
648 if (nfp_fl_is_netdev_to_offload(netdev)) 804 if (event == NETDEV_DOWN) {
649 nfp_tun_del_mac_idx(app, netdev->ifindex); 805 err = nfp_tunnel_offload_mac(app, netdev,
650 } else if (event == NETDEV_UP || event == NETDEV_CHANGEADDR || 806 NFP_TUNNEL_MAC_OFFLOAD_DEL);
651 event == NETDEV_REGISTER) { 807 if (err)
652 nfp_tun_add_to_mac_offload_list(netdev, app); 808 nfp_flower_cmsg_warn(app, "Failed to delete offload MAC on %s.\n",
653 809 netdev_name(netdev));
654 /* Force a list write to keep NFP up to date. */ 810 } else if (event == NETDEV_UP) {
655 nfp_tunnel_write_macs(app); 811 err = nfp_tunnel_offload_mac(app, netdev,
812 NFP_TUNNEL_MAC_OFFLOAD_ADD);
813 if (err)
814 nfp_flower_cmsg_warn(app, "Failed to offload MAC on %s.\n",
815 netdev_name(netdev));
816 } else if (event == NETDEV_CHANGEADDR) {
817 /* Only offload addr change if netdev is already up. */
818 if (!(netdev->flags & IFF_UP))
819 return NOTIFY_OK;
820
821 err = nfp_tunnel_offload_mac(app, netdev,
822 NFP_TUNNEL_MAC_OFFLOAD_MOD);
823 if (err)
824 nfp_flower_cmsg_warn(app, "Failed to offload MAC change on %s.\n",
825 netdev_name(netdev));
656 } 826 }
657 return NOTIFY_OK; 827 return NOTIFY_OK;
658} 828}
@@ -660,68 +830,62 @@ int nfp_tunnel_mac_event_handler(struct nfp_app *app,
660int nfp_tunnel_config_start(struct nfp_app *app) 830int nfp_tunnel_config_start(struct nfp_app *app)
661{ 831{
662 struct nfp_flower_priv *priv = app->priv; 832 struct nfp_flower_priv *priv = app->priv;
833 int err;
834
835 /* Initialise rhash for MAC offload tracking. */
836 err = rhashtable_init(&priv->tun.offloaded_macs,
837 &offloaded_macs_params);
838 if (err)
839 return err;
663 840
664 /* Initialise priv data for MAC offloading. */ 841 ida_init(&priv->tun.mac_off_ids);
665 priv->nfp_mac_off_count = 0;
666 mutex_init(&priv->nfp_mac_off_lock);
667 INIT_LIST_HEAD(&priv->nfp_mac_off_list);
668 mutex_init(&priv->nfp_mac_index_lock);
669 INIT_LIST_HEAD(&priv->nfp_mac_index_list);
670 ida_init(&priv->nfp_mac_off_ids);
671 842
672 /* Initialise priv data for IPv4 offloading. */ 843 /* Initialise priv data for IPv4 offloading. */
673 mutex_init(&priv->nfp_ipv4_off_lock); 844 mutex_init(&priv->tun.ipv4_off_lock);
674 INIT_LIST_HEAD(&priv->nfp_ipv4_off_list); 845 INIT_LIST_HEAD(&priv->tun.ipv4_off_list);
675 846
676 /* Initialise priv data for neighbour offloading. */ 847 /* Initialise priv data for neighbour offloading. */
677 spin_lock_init(&priv->nfp_neigh_off_lock); 848 spin_lock_init(&priv->tun.neigh_off_lock);
678 INIT_LIST_HEAD(&priv->nfp_neigh_off_list); 849 INIT_LIST_HEAD(&priv->tun.neigh_off_list);
679 priv->nfp_tun_neigh_nb.notifier_call = nfp_tun_neigh_event_handler; 850 priv->tun.neigh_nb.notifier_call = nfp_tun_neigh_event_handler;
851
852 err = register_netevent_notifier(&priv->tun.neigh_nb);
853 if (err) {
854 rhashtable_free_and_destroy(&priv->tun.offloaded_macs,
855 nfp_check_rhashtable_empty, NULL);
856 return err;
857 }
680 858
681 return register_netevent_notifier(&priv->nfp_tun_neigh_nb); 859 return 0;
682} 860}
683 861
684void nfp_tunnel_config_stop(struct nfp_app *app) 862void nfp_tunnel_config_stop(struct nfp_app *app)
685{ 863{
686 struct nfp_tun_mac_offload_entry *mac_entry;
687 struct nfp_flower_priv *priv = app->priv; 864 struct nfp_flower_priv *priv = app->priv;
688 struct nfp_ipv4_route_entry *route_entry; 865 struct nfp_ipv4_route_entry *route_entry;
689 struct nfp_tun_mac_non_nfp_idx *mac_idx;
690 struct nfp_ipv4_addr_entry *ip_entry; 866 struct nfp_ipv4_addr_entry *ip_entry;
691 struct list_head *ptr, *storage; 867 struct list_head *ptr, *storage;
692 868
693 unregister_netevent_notifier(&priv->nfp_tun_neigh_nb); 869 unregister_netevent_notifier(&priv->tun.neigh_nb);
694
695 /* Free any memory that may be occupied by MAC list. */
696 list_for_each_safe(ptr, storage, &priv->nfp_mac_off_list) {
697 mac_entry = list_entry(ptr, struct nfp_tun_mac_offload_entry,
698 list);
699 list_del(&mac_entry->list);
700 kfree(mac_entry);
701 }
702
703 /* Free any memory that may be occupied by MAC index list. */
704 list_for_each_safe(ptr, storage, &priv->nfp_mac_index_list) {
705 mac_idx = list_entry(ptr, struct nfp_tun_mac_non_nfp_idx,
706 list);
707 list_del(&mac_idx->list);
708 kfree(mac_idx);
709 }
710 870
711 ida_destroy(&priv->nfp_mac_off_ids); 871 ida_destroy(&priv->tun.mac_off_ids);
712 872
713 /* Free any memory that may be occupied by ipv4 list. */ 873 /* Free any memory that may be occupied by ipv4 list. */
714 list_for_each_safe(ptr, storage, &priv->nfp_ipv4_off_list) { 874 list_for_each_safe(ptr, storage, &priv->tun.ipv4_off_list) {
715 ip_entry = list_entry(ptr, struct nfp_ipv4_addr_entry, list); 875 ip_entry = list_entry(ptr, struct nfp_ipv4_addr_entry, list);
716 list_del(&ip_entry->list); 876 list_del(&ip_entry->list);
717 kfree(ip_entry); 877 kfree(ip_entry);
718 } 878 }
719 879
720 /* Free any memory that may be occupied by the route list. */ 880 /* Free any memory that may be occupied by the route list. */
721 list_for_each_safe(ptr, storage, &priv->nfp_neigh_off_list) { 881 list_for_each_safe(ptr, storage, &priv->tun.neigh_off_list) {
722 route_entry = list_entry(ptr, struct nfp_ipv4_route_entry, 882 route_entry = list_entry(ptr, struct nfp_ipv4_route_entry,
723 list); 883 list);
724 list_del(&route_entry->list); 884 list_del(&route_entry->list);
725 kfree(route_entry); 885 kfree(route_entry);
726 } 886 }
887
888 /* Destroy rhash. Entries should be cleaned on netdev notifier unreg. */
889 rhashtable_free_and_destroy(&priv->tun.offloaded_macs,
890 nfp_check_rhashtable_empty, NULL);
727} 891}
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
index 16d0479f6891..7a873002e626 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
@@ -396,7 +396,8 @@ static int qlcnic_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
396 396
397static int qlcnic_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], 397static int qlcnic_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
398 struct net_device *netdev, 398 struct net_device *netdev,
399 const unsigned char *addr, u16 vid, u16 flags) 399 const unsigned char *addr, u16 vid, u16 flags,
400 struct netlink_ext_ack *extack)
400{ 401{
401 struct qlcnic_adapter *adapter = netdev_priv(netdev); 402 struct qlcnic_adapter *adapter = netdev_priv(netdev);
402 int err = 0; 403 int err = 0;
diff --git a/drivers/net/ethernet/realtek/atp.c b/drivers/net/ethernet/realtek/atp.c
index 7e011c1c1e6e..cfb67b746595 100644
--- a/drivers/net/ethernet/realtek/atp.c
+++ b/drivers/net/ethernet/realtek/atp.c
@@ -454,14 +454,14 @@ static void hardware_init(struct net_device *dev)
454{ 454{
455 struct net_local *lp = netdev_priv(dev); 455 struct net_local *lp = netdev_priv(dev);
456 long ioaddr = dev->base_addr; 456 long ioaddr = dev->base_addr;
457 int i; 457 int i;
458 458
459 /* Turn off the printer multiplexer on the 8012. */ 459 /* Turn off the printer multiplexer on the 8012. */
460 for (i = 0; i < 8; i++) 460 for (i = 0; i < 8; i++)
461 outb(mux_8012[i], ioaddr + PAR_DATA); 461 outb(mux_8012[i], ioaddr + PAR_DATA);
462 write_reg_high(ioaddr, CMR1, CMR1h_RESET); 462 write_reg_high(ioaddr, CMR1, CMR1h_RESET);
463 463
464 for (i = 0; i < 6; i++) 464 for (i = 0; i < 6; i++)
465 write_reg_byte(ioaddr, PAR0 + i, dev->dev_addr[i]); 465 write_reg_byte(ioaddr, PAR0 + i, dev->dev_addr[i]);
466 466
467 write_reg_high(ioaddr, CMR2, lp->addr_mode); 467 write_reg_high(ioaddr, CMR2, lp->addr_mode);
@@ -471,15 +471,15 @@ static void hardware_init(struct net_device *dev)
471 (read_nibble(ioaddr, CMR2_h) >> 3) & 0x0f); 471 (read_nibble(ioaddr, CMR2_h) >> 3) & 0x0f);
472 } 472 }
473 473
474 write_reg(ioaddr, CMR2, CMR2_IRQOUT); 474 write_reg(ioaddr, CMR2, CMR2_IRQOUT);
475 write_reg_high(ioaddr, CMR1, CMR1h_RxENABLE | CMR1h_TxENABLE); 475 write_reg_high(ioaddr, CMR1, CMR1h_RxENABLE | CMR1h_TxENABLE);
476 476
477 /* Enable the interrupt line from the serial port. */ 477 /* Enable the interrupt line from the serial port. */
478 outb(Ctrl_SelData + Ctrl_IRQEN, ioaddr + PAR_CONTROL); 478 outb(Ctrl_SelData + Ctrl_IRQEN, ioaddr + PAR_CONTROL);
479 479
480 /* Unmask the interesting interrupts. */ 480 /* Unmask the interesting interrupts. */
481 write_reg(ioaddr, IMR, ISR_RxOK | ISR_TxErr | ISR_TxOK); 481 write_reg(ioaddr, IMR, ISR_RxOK | ISR_TxErr | ISR_TxOK);
482 write_reg_high(ioaddr, IMR, ISRh_RxErr); 482 write_reg_high(ioaddr, IMR, ISRh_RxErr);
483 483
484 lp->tx_unit_busy = 0; 484 lp->tx_unit_busy = 0;
485 lp->pac_cnt_in_tx_buf = 0; 485 lp->pac_cnt_in_tx_buf = 0;
@@ -610,10 +610,12 @@ static irqreturn_t atp_interrupt(int irq, void *dev_instance)
610 write_reg(ioaddr, CMR2, CMR2_NULL); 610 write_reg(ioaddr, CMR2, CMR2_NULL);
611 write_reg(ioaddr, IMR, 0); 611 write_reg(ioaddr, IMR, 0);
612 612
613 if (net_debug > 5) printk(KERN_DEBUG "%s: In interrupt ", dev->name); 613 if (net_debug > 5)
614 while (--boguscount > 0) { 614 printk(KERN_DEBUG "%s: In interrupt ", dev->name);
615 while (--boguscount > 0) {
615 int status = read_nibble(ioaddr, ISR); 616 int status = read_nibble(ioaddr, ISR);
616 if (net_debug > 5) printk("loop status %02x..", status); 617 if (net_debug > 5)
618 printk("loop status %02x..", status);
617 619
618 if (status & (ISR_RxOK<<3)) { 620 if (status & (ISR_RxOK<<3)) {
619 handled = 1; 621 handled = 1;
@@ -640,7 +642,8 @@ static irqreturn_t atp_interrupt(int irq, void *dev_instance)
640 } while (--boguscount > 0); 642 } while (--boguscount > 0);
641 } else if (status & ((ISR_TxErr + ISR_TxOK)<<3)) { 643 } else if (status & ((ISR_TxErr + ISR_TxOK)<<3)) {
642 handled = 1; 644 handled = 1;
643 if (net_debug > 6) printk("handling Tx done.."); 645 if (net_debug > 6)
646 printk("handling Tx done..");
644 /* Clear the Tx interrupt. We should check for too many failures 647 /* Clear the Tx interrupt. We should check for too many failures
645 and reinitialize the adapter. */ 648 and reinitialize the adapter. */
646 write_reg(ioaddr, ISR, ISR_TxErr + ISR_TxOK); 649 write_reg(ioaddr, ISR, ISR_TxErr + ISR_TxOK);
@@ -680,7 +683,7 @@ static irqreturn_t atp_interrupt(int irq, void *dev_instance)
680 break; 683 break;
681 } else 684 } else
682 break; 685 break;
683 } 686 }
684 687
685 /* This following code fixes a rare (and very difficult to track down) 688 /* This following code fixes a rare (and very difficult to track down)
686 problem where the adapter forgets its ethernet address. */ 689 problem where the adapter forgets its ethernet address. */
@@ -694,7 +697,7 @@ static irqreturn_t atp_interrupt(int irq, void *dev_instance)
694 } 697 }
695 698
696 /* Tell the adapter that it can go back to using the output line as IRQ. */ 699 /* Tell the adapter that it can go back to using the output line as IRQ. */
697 write_reg(ioaddr, CMR2, CMR2_IRQOUT); 700 write_reg(ioaddr, CMR2, CMR2_IRQOUT);
698 /* Enable the physical interrupt line, which is sure to be low until.. */ 701 /* Enable the physical interrupt line, which is sure to be low until.. */
699 outb(Ctrl_SelData + Ctrl_IRQEN, ioaddr + PAR_CONTROL); 702 outb(Ctrl_SelData + Ctrl_IRQEN, ioaddr + PAR_CONTROL);
700 /* .. we enable the interrupt sources. */ 703 /* .. we enable the interrupt sources. */
diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
index abb94c543aa2..e790a4116f1e 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -639,6 +639,7 @@ struct rtl8169_private {
639 void __iomem *mmio_addr; /* memory map physical address */ 639 void __iomem *mmio_addr; /* memory map physical address */
640 struct pci_dev *pci_dev; 640 struct pci_dev *pci_dev;
641 struct net_device *dev; 641 struct net_device *dev;
642 struct phy_device *phydev;
642 struct napi_struct napi; 643 struct napi_struct napi;
643 u32 msg_enable; 644 u32 msg_enable;
644 u16 mac_version; 645 u16 mac_version;
@@ -679,7 +680,6 @@ struct rtl8169_private {
679 } wk; 680 } wk;
680 681
681 unsigned supports_gmii:1; 682 unsigned supports_gmii:1;
682 struct mii_bus *mii_bus;
683 dma_addr_t counters_phys_addr; 683 dma_addr_t counters_phys_addr;
684 struct rtl8169_counters *counters; 684 struct rtl8169_counters *counters;
685 struct rtl8169_tc_offsets tc_offset; 685 struct rtl8169_tc_offsets tc_offset;
@@ -745,6 +745,16 @@ static void rtl_unlock_work(struct rtl8169_private *tp)
745 mutex_unlock(&tp->wk.mutex); 745 mutex_unlock(&tp->wk.mutex);
746} 746}
747 747
748static void rtl_lock_config_regs(struct rtl8169_private *tp)
749{
750 RTL_W8(tp, Cfg9346, Cfg9346_Lock);
751}
752
753static void rtl_unlock_config_regs(struct rtl8169_private *tp)
754{
755 RTL_W8(tp, Cfg9346, Cfg9346_Unlock);
756}
757
748static void rtl_tx_performance_tweak(struct rtl8169_private *tp, u16 force) 758static void rtl_tx_performance_tweak(struct rtl8169_private *tp, u16 force)
749{ 759{
750 pcie_capability_clear_and_set_word(tp->pci_dev, PCI_EXP_DEVCTL, 760 pcie_capability_clear_and_set_word(tp->pci_dev, PCI_EXP_DEVCTL,
@@ -1278,11 +1288,6 @@ static u8 rtl8168d_efuse_read(struct rtl8169_private *tp, int reg_addr)
1278 RTL_R32(tp, EFUSEAR) & EFUSEAR_DATA_MASK : ~0; 1288 RTL_R32(tp, EFUSEAR) & EFUSEAR_DATA_MASK : ~0;
1279} 1289}
1280 1290
1281static u16 rtl_get_events(struct rtl8169_private *tp)
1282{
1283 return RTL_R16(tp, IntrStatus);
1284}
1285
1286static void rtl_ack_events(struct rtl8169_private *tp, u16 bits) 1291static void rtl_ack_events(struct rtl8169_private *tp, u16 bits)
1287{ 1292{
1288 RTL_W16(tp, IntrStatus, bits); 1293 RTL_W16(tp, IntrStatus, bits);
@@ -1313,7 +1318,7 @@ static void rtl8169_irq_mask_and_ack(struct rtl8169_private *tp)
1313static void rtl_link_chg_patch(struct rtl8169_private *tp) 1318static void rtl_link_chg_patch(struct rtl8169_private *tp)
1314{ 1319{
1315 struct net_device *dev = tp->dev; 1320 struct net_device *dev = tp->dev;
1316 struct phy_device *phydev = dev->phydev; 1321 struct phy_device *phydev = tp->phydev;
1317 1322
1318 if (!netif_running(dev)) 1323 if (!netif_running(dev))
1319 return; 1324 return;
@@ -1431,7 +1436,7 @@ static void __rtl8169_set_wol(struct rtl8169_private *tp, u32 wolopts)
1431 }; 1436 };
1432 u8 options; 1437 u8 options;
1433 1438
1434 RTL_W8(tp, Cfg9346, Cfg9346_Unlock); 1439 rtl_unlock_config_regs(tp);
1435 1440
1436 switch (tp->mac_version) { 1441 switch (tp->mac_version) {
1437 case RTL_GIGA_MAC_VER_34 ... RTL_GIGA_MAC_VER_38: 1442 case RTL_GIGA_MAC_VER_34 ... RTL_GIGA_MAC_VER_38:
@@ -1479,7 +1484,7 @@ static void __rtl8169_set_wol(struct rtl8169_private *tp, u32 wolopts)
1479 break; 1484 break;
1480 } 1485 }
1481 1486
1482 RTL_W8(tp, Cfg9346, Cfg9346_Lock); 1487 rtl_lock_config_regs(tp);
1483 1488
1484 device_set_wakeup_enable(tp_to_dev(tp), wolopts); 1489 device_set_wakeup_enable(tp_to_dev(tp), wolopts);
1485} 1490}
@@ -3994,24 +3999,24 @@ static void rtl8169_init_phy(struct net_device *dev, struct rtl8169_private *tp)
3994 } 3999 }
3995 4000
3996 /* We may have called phy_speed_down before */ 4001 /* We may have called phy_speed_down before */
3997 phy_speed_up(dev->phydev); 4002 phy_speed_up(tp->phydev);
3998 4003
3999 genphy_soft_reset(dev->phydev); 4004 genphy_soft_reset(tp->phydev);
4000 4005
4001 /* It was reported that several chips end up with 10MBit/Half on a 4006 /* It was reported that several chips end up with 10MBit/Half on a
4002 * 1GBit link after resuming from S3. For whatever reason the PHY on 4007 * 1GBit link after resuming from S3. For whatever reason the PHY on
4003 * these chips doesn't properly start a renegotiation when soft-reset. 4008 * these chips doesn't properly start a renegotiation when soft-reset.
4004 * Explicitly requesting a renegotiation fixes this. 4009 * Explicitly requesting a renegotiation fixes this.
4005 */ 4010 */
4006 if (dev->phydev->autoneg == AUTONEG_ENABLE) 4011 if (tp->phydev->autoneg == AUTONEG_ENABLE)
4007 phy_restart_aneg(dev->phydev); 4012 phy_restart_aneg(tp->phydev);
4008} 4013}
4009 4014
4010static void rtl_rar_set(struct rtl8169_private *tp, u8 *addr) 4015static void rtl_rar_set(struct rtl8169_private *tp, u8 *addr)
4011{ 4016{
4012 rtl_lock_work(tp); 4017 rtl_lock_work(tp);
4013 4018
4014 RTL_W8(tp, Cfg9346, Cfg9346_Unlock); 4019 rtl_unlock_config_regs(tp);
4015 4020
4016 RTL_W32(tp, MAC4, addr[4] | addr[5] << 8); 4021 RTL_W32(tp, MAC4, addr[4] | addr[5] << 8);
4017 RTL_R32(tp, MAC4); 4022 RTL_R32(tp, MAC4);
@@ -4022,7 +4027,7 @@ static void rtl_rar_set(struct rtl8169_private *tp, u8 *addr)
4022 if (tp->mac_version == RTL_GIGA_MAC_VER_34) 4027 if (tp->mac_version == RTL_GIGA_MAC_VER_34)
4023 rtl_rar_exgmac_set(tp, addr); 4028 rtl_rar_exgmac_set(tp, addr);
4024 4029
4025 RTL_W8(tp, Cfg9346, Cfg9346_Lock); 4030 rtl_lock_config_regs(tp);
4026 4031
4027 rtl_unlock_work(tp); 4032 rtl_unlock_work(tp);
4028} 4033}
@@ -4049,10 +4054,12 @@ static int rtl_set_mac_address(struct net_device *dev, void *p)
4049 4054
4050static int rtl8169_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) 4055static int rtl8169_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
4051{ 4056{
4057 struct rtl8169_private *tp = netdev_priv(dev);
4058
4052 if (!netif_running(dev)) 4059 if (!netif_running(dev))
4053 return -ENODEV; 4060 return -ENODEV;
4054 4061
4055 return phy_mii_ioctl(dev->phydev, ifr, cmd); 4062 return phy_mii_ioctl(tp->phydev, ifr, cmd);
4056} 4063}
4057 4064
4058static void rtl_init_mdio_ops(struct rtl8169_private *tp) 4065static void rtl_init_mdio_ops(struct rtl8169_private *tp)
@@ -4101,15 +4108,10 @@ static void rtl_wol_suspend_quirk(struct rtl8169_private *tp)
4101 4108
4102static bool rtl_wol_pll_power_down(struct rtl8169_private *tp) 4109static bool rtl_wol_pll_power_down(struct rtl8169_private *tp)
4103{ 4110{
4104 struct phy_device *phydev;
4105
4106 if (!__rtl8169_get_wol(tp)) 4111 if (!__rtl8169_get_wol(tp))
4107 return false; 4112 return false;
4108 4113
4109 /* phydev may not be attached to netdevice */ 4114 phy_speed_down(tp->phydev, false);
4110 phydev = mdiobus_get_phy(tp->mii_bus, 0);
4111
4112 phy_speed_down(phydev, false);
4113 rtl_wol_suspend_quirk(tp); 4115 rtl_wol_suspend_quirk(tp);
4114 4116
4115 return true; 4117 return true;
@@ -4178,7 +4180,7 @@ static void r8168_pll_power_up(struct rtl8169_private *tp)
4178 break; 4180 break;
4179 } 4181 }
4180 4182
4181 phy_resume(tp->dev->phydev); 4183 phy_resume(tp->phydev);
4182 /* give MAC/PHY some time to resume */ 4184 /* give MAC/PHY some time to resume */
4183 msleep(20); 4185 msleep(20);
4184} 4186}
@@ -4234,18 +4236,18 @@ static void rtl8169_init_ring_indexes(struct rtl8169_private *tp)
4234static void rtl_hw_jumbo_enable(struct rtl8169_private *tp) 4236static void rtl_hw_jumbo_enable(struct rtl8169_private *tp)
4235{ 4237{
4236 if (tp->jumbo_ops.enable) { 4238 if (tp->jumbo_ops.enable) {
4237 RTL_W8(tp, Cfg9346, Cfg9346_Unlock); 4239 rtl_unlock_config_regs(tp);
4238 tp->jumbo_ops.enable(tp); 4240 tp->jumbo_ops.enable(tp);
4239 RTL_W8(tp, Cfg9346, Cfg9346_Lock); 4241 rtl_lock_config_regs(tp);
4240 } 4242 }
4241} 4243}
4242 4244
4243static void rtl_hw_jumbo_disable(struct rtl8169_private *tp) 4245static void rtl_hw_jumbo_disable(struct rtl8169_private *tp)
4244{ 4246{
4245 if (tp->jumbo_ops.disable) { 4247 if (tp->jumbo_ops.disable) {
4246 RTL_W8(tp, Cfg9346, Cfg9346_Unlock); 4248 rtl_unlock_config_regs(tp);
4247 tp->jumbo_ops.disable(tp); 4249 tp->jumbo_ops.disable(tp);
4248 RTL_W8(tp, Cfg9346, Cfg9346_Lock); 4250 rtl_lock_config_regs(tp);
4249 } 4251 }
4250} 4252}
4251 4253
@@ -4566,13 +4568,13 @@ static void rtl_set_rx_mode(struct net_device *dev)
4566 4568
4567static void rtl_hw_start(struct rtl8169_private *tp) 4569static void rtl_hw_start(struct rtl8169_private *tp)
4568{ 4570{
4569 RTL_W8(tp, Cfg9346, Cfg9346_Unlock); 4571 rtl_unlock_config_regs(tp);
4570 4572
4571 tp->hw_start(tp); 4573 tp->hw_start(tp);
4572 4574
4573 rtl_set_rx_max_size(tp); 4575 rtl_set_rx_max_size(tp);
4574 rtl_set_rx_tx_desc_registers(tp); 4576 rtl_set_rx_tx_desc_registers(tp);
4575 RTL_W8(tp, Cfg9346, Cfg9346_Lock); 4577 rtl_lock_config_regs(tp);
4576 4578
4577 /* Initially a 10 us delay. Turned it into a PCI commit. - FR */ 4579 /* Initially a 10 us delay. Turned it into a PCI commit. - FR */
4578 RTL_R8(tp, IntrMask); 4580 RTL_R8(tp, IntrMask);
@@ -4696,18 +4698,10 @@ static void rtl_enable_clock_request(struct rtl8169_private *tp)
4696 PCI_EXP_LNKCTL_CLKREQ_EN); 4698 PCI_EXP_LNKCTL_CLKREQ_EN);
4697} 4699}
4698 4700
4699static void rtl_pcie_state_l2l3_enable(struct rtl8169_private *tp, bool enable) 4701static void rtl_pcie_state_l2l3_disable(struct rtl8169_private *tp)
4700{ 4702{
4701 u8 data; 4703 /* work around an issue when PCI reset occurs during L2/L3 state */
4702 4704 RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Rdy_to_L23);
4703 data = RTL_R8(tp, Config3);
4704
4705 if (enable)
4706 data |= Rdy_to_L23;
4707 else
4708 data &= ~Rdy_to_L23;
4709
4710 RTL_W8(tp, Config3, data);
4711} 4705}
4712 4706
4713static void rtl_hw_aspm_clkreq_enable(struct rtl8169_private *tp, bool enable) 4707static void rtl_hw_aspm_clkreq_enable(struct rtl8169_private *tp, bool enable)
@@ -5028,7 +5022,7 @@ static void rtl_hw_start_8411(struct rtl8169_private *tp)
5028 }; 5022 };
5029 5023
5030 rtl_hw_start_8168f(tp); 5024 rtl_hw_start_8168f(tp);
5031 rtl_pcie_state_l2l3_enable(tp, false); 5025 rtl_pcie_state_l2l3_disable(tp);
5032 5026
5033 rtl_ephy_init(tp, e_info_8168f_1, ARRAY_SIZE(e_info_8168f_1)); 5027 rtl_ephy_init(tp, e_info_8168f_1, ARRAY_SIZE(e_info_8168f_1));
5034 5028
@@ -5062,7 +5056,7 @@ static void rtl_hw_start_8168g(struct rtl8169_private *tp)
5062 rtl_w0w1_eri(tp, 0x2fc, ERIAR_MASK_0001, 0x01, 0x06, ERIAR_EXGMAC); 5056 rtl_w0w1_eri(tp, 0x2fc, ERIAR_MASK_0001, 0x01, 0x06, ERIAR_EXGMAC);
5063 rtl_w0w1_eri(tp, 0x1b0, ERIAR_MASK_0011, 0x0000, 0x1000, ERIAR_EXGMAC); 5057 rtl_w0w1_eri(tp, 0x1b0, ERIAR_MASK_0011, 0x0000, 0x1000, ERIAR_EXGMAC);
5064 5058
5065 rtl_pcie_state_l2l3_enable(tp, false); 5059 rtl_pcie_state_l2l3_disable(tp);
5066} 5060}
5067 5061
5068static void rtl_hw_start_8168g_1(struct rtl8169_private *tp) 5062static void rtl_hw_start_8168g_1(struct rtl8169_private *tp)
@@ -5168,7 +5162,7 @@ static void rtl_hw_start_8168h_1(struct rtl8169_private *tp)
5168 5162
5169 rtl_w0w1_eri(tp, 0x1b0, ERIAR_MASK_0011, 0x0000, 0x1000, ERIAR_EXGMAC); 5163 rtl_w0w1_eri(tp, 0x1b0, ERIAR_MASK_0011, 0x0000, 0x1000, ERIAR_EXGMAC);
5170 5164
5171 rtl_pcie_state_l2l3_enable(tp, false); 5165 rtl_pcie_state_l2l3_disable(tp);
5172 5166
5173 rtl_writephy(tp, 0x1f, 0x0c42); 5167 rtl_writephy(tp, 0x1f, 0x0c42);
5174 rg_saw_cnt = (rtl_readphy(tp, 0x13) & 0x3fff); 5168 rg_saw_cnt = (rtl_readphy(tp, 0x13) & 0x3fff);
@@ -5245,7 +5239,7 @@ static void rtl_hw_start_8168ep(struct rtl8169_private *tp)
5245 5239
5246 RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) & ~TX_10M_PS_EN); 5240 RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) & ~TX_10M_PS_EN);
5247 5241
5248 rtl_pcie_state_l2l3_enable(tp, false); 5242 rtl_pcie_state_l2l3_disable(tp);
5249} 5243}
5250 5244
5251static void rtl_hw_start_8168ep_1(struct rtl8169_private *tp) 5245static void rtl_hw_start_8168ep_1(struct rtl8169_private *tp)
@@ -5516,7 +5510,7 @@ static void rtl_hw_start_8105e_1(struct rtl8169_private *tp)
5516 5510
5517 rtl_ephy_init(tp, e_info_8105e_1, ARRAY_SIZE(e_info_8105e_1)); 5511 rtl_ephy_init(tp, e_info_8105e_1, ARRAY_SIZE(e_info_8105e_1));
5518 5512
5519 rtl_pcie_state_l2l3_enable(tp, false); 5513 rtl_pcie_state_l2l3_disable(tp);
5520} 5514}
5521 5515
5522static void rtl_hw_start_8105e_2(struct rtl8169_private *tp) 5516static void rtl_hw_start_8105e_2(struct rtl8169_private *tp)
@@ -5551,7 +5545,7 @@ static void rtl_hw_start_8402(struct rtl8169_private *tp)
5551 rtl_eri_write(tp, 0xb8, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC); 5545 rtl_eri_write(tp, 0xb8, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC);
5552 rtl_w0w1_eri(tp, 0x0d4, ERIAR_MASK_0011, 0x0e00, 0xff00, ERIAR_EXGMAC); 5546 rtl_w0w1_eri(tp, 0x0d4, ERIAR_MASK_0011, 0x0e00, 0xff00, ERIAR_EXGMAC);
5553 5547
5554 rtl_pcie_state_l2l3_enable(tp, false); 5548 rtl_pcie_state_l2l3_disable(tp);
5555} 5549}
5556 5550
5557static void rtl_hw_start_8106(struct rtl8169_private *tp) 5551static void rtl_hw_start_8106(struct rtl8169_private *tp)
@@ -5565,7 +5559,7 @@ static void rtl_hw_start_8106(struct rtl8169_private *tp)
5565 RTL_W8(tp, MCU, RTL_R8(tp, MCU) | EN_NDP | EN_OOB_RESET); 5559 RTL_W8(tp, MCU, RTL_R8(tp, MCU) | EN_NDP | EN_OOB_RESET);
5566 RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) & ~PFM_EN); 5560 RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) & ~PFM_EN);
5567 5561
5568 rtl_pcie_state_l2l3_enable(tp, false); 5562 rtl_pcie_state_l2l3_disable(tp);
5569 rtl_hw_aspm_clkreq_enable(tp, true); 5563 rtl_hw_aspm_clkreq_enable(tp, true);
5570} 5564}
5571 5565
@@ -6201,8 +6195,6 @@ static void rtl8169_pcierr_interrupt(struct net_device *dev)
6201 dev->features &= ~NETIF_F_HIGHDMA; 6195 dev->features &= ~NETIF_F_HIGHDMA;
6202 } 6196 }
6203 6197
6204 rtl8169_hw_reset(tp);
6205
6206 rtl_schedule_task(tp, RTL_FLAG_TASK_RESET_PENDING); 6198 rtl_schedule_task(tp, RTL_FLAG_TASK_RESET_PENDING);
6207} 6199}
6208 6200
@@ -6409,7 +6401,7 @@ release_descriptor:
6409static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance) 6401static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance)
6410{ 6402{
6411 struct rtl8169_private *tp = dev_instance; 6403 struct rtl8169_private *tp = dev_instance;
6412 u16 status = rtl_get_events(tp); 6404 u16 status = RTL_R16(tp, IntrStatus);
6413 u16 irq_mask = RTL_R16(tp, IntrMask); 6405 u16 irq_mask = RTL_R16(tp, IntrMask);
6414 6406
6415 if (status == 0xffff || !(status & irq_mask)) 6407 if (status == 0xffff || !(status & irq_mask))
@@ -6420,8 +6412,8 @@ static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance)
6420 goto out; 6412 goto out;
6421 } 6413 }
6422 6414
6423 if (status & LinkChg && tp->dev->phydev) 6415 if (status & LinkChg)
6424 phy_mac_interrupt(tp->dev->phydev); 6416 phy_mac_interrupt(tp->phydev);
6425 6417
6426 if (unlikely(status & RxFIFOOver && 6418 if (unlikely(status & RxFIFOOver &&
6427 tp->mac_version == RTL_GIGA_MAC_VER_11)) { 6419 tp->mac_version == RTL_GIGA_MAC_VER_11)) {
@@ -6512,12 +6504,12 @@ static void r8169_phylink_handler(struct net_device *ndev)
6512 } 6504 }
6513 6505
6514 if (net_ratelimit()) 6506 if (net_ratelimit())
6515 phy_print_status(ndev->phydev); 6507 phy_print_status(tp->phydev);
6516} 6508}
6517 6509
6518static int r8169_phy_connect(struct rtl8169_private *tp) 6510static int r8169_phy_connect(struct rtl8169_private *tp)
6519{ 6511{
6520 struct phy_device *phydev = mdiobus_get_phy(tp->mii_bus, 0); 6512 struct phy_device *phydev = tp->phydev;
6521 phy_interface_t phy_mode; 6513 phy_interface_t phy_mode;
6522 int ret; 6514 int ret;
6523 6515
@@ -6544,7 +6536,7 @@ static void rtl8169_down(struct net_device *dev)
6544{ 6536{
6545 struct rtl8169_private *tp = netdev_priv(dev); 6537 struct rtl8169_private *tp = netdev_priv(dev);
6546 6538
6547 phy_stop(dev->phydev); 6539 phy_stop(tp->phydev);
6548 6540
6549 napi_disable(&tp->napi); 6541 napi_disable(&tp->napi);
6550 netif_stop_queue(dev); 6542 netif_stop_queue(dev);
@@ -6586,7 +6578,7 @@ static int rtl8169_close(struct net_device *dev)
6586 6578
6587 cancel_work_sync(&tp->wk.work); 6579 cancel_work_sync(&tp->wk.work);
6588 6580
6589 phy_disconnect(dev->phydev); 6581 phy_disconnect(tp->phydev);
6590 6582
6591 pci_free_irq(pdev, 0, tp); 6583 pci_free_irq(pdev, 0, tp);
6592 6584
@@ -6637,10 +6629,6 @@ static int rtl_open(struct net_device *dev)
6637 if (retval < 0) 6629 if (retval < 0)
6638 goto err_free_rx_1; 6630 goto err_free_rx_1;
6639 6631
6640 INIT_WORK(&tp->wk.work, rtl_task);
6641
6642 smp_mb();
6643
6644 rtl_request_firmware(tp); 6632 rtl_request_firmware(tp);
6645 6633
6646 retval = pci_request_irq(pdev, 0, rtl8169_interrupt, NULL, tp, 6634 retval = pci_request_irq(pdev, 0, rtl8169_interrupt, NULL, tp,
@@ -6667,7 +6655,7 @@ static int rtl_open(struct net_device *dev)
6667 if (!rtl8169_init_counter_offsets(tp)) 6655 if (!rtl8169_init_counter_offsets(tp))
6668 netif_warn(tp, hw, dev, "counter reset/update failed\n"); 6656 netif_warn(tp, hw, dev, "counter reset/update failed\n");
6669 6657
6670 phy_start(dev->phydev); 6658 phy_start(tp->phydev);
6671 netif_start_queue(dev); 6659 netif_start_queue(dev);
6672 6660
6673 rtl_unlock_work(tp); 6661 rtl_unlock_work(tp);
@@ -6756,7 +6744,7 @@ static void rtl8169_net_suspend(struct net_device *dev)
6756 if (!netif_running(dev)) 6744 if (!netif_running(dev))
6757 return; 6745 return;
6758 6746
6759 phy_stop(dev->phydev); 6747 phy_stop(tp->phydev);
6760 netif_device_detach(dev); 6748 netif_device_detach(dev);
6761 6749
6762 rtl_lock_work(tp); 6750 rtl_lock_work(tp);
@@ -6791,14 +6779,13 @@ static void __rtl8169_resume(struct net_device *dev)
6791 rtl_pll_power_up(tp); 6779 rtl_pll_power_up(tp);
6792 rtl8169_init_phy(dev, tp); 6780 rtl8169_init_phy(dev, tp);
6793 6781
6794 phy_start(tp->dev->phydev); 6782 phy_start(tp->phydev);
6795 6783
6796 rtl_lock_work(tp); 6784 rtl_lock_work(tp);
6797 napi_enable(&tp->napi); 6785 napi_enable(&tp->napi);
6798 set_bit(RTL_FLAG_TASK_ENABLED, tp->wk.flags); 6786 set_bit(RTL_FLAG_TASK_ENABLED, tp->wk.flags);
6787 rtl_reset_work(tp);
6799 rtl_unlock_work(tp); 6788 rtl_unlock_work(tp);
6800
6801 rtl_schedule_task(tp, RTL_FLAG_TASK_RESET_PENDING);
6802} 6789}
6803 6790
6804static int rtl8169_resume(struct device *device) 6791static int rtl8169_resume(struct device *device)
@@ -6935,7 +6922,7 @@ static void rtl_remove_one(struct pci_dev *pdev)
6935 netif_napi_del(&tp->napi); 6922 netif_napi_del(&tp->napi);
6936 6923
6937 unregister_netdev(dev); 6924 unregister_netdev(dev);
6938 mdiobus_unregister(tp->mii_bus); 6925 mdiobus_unregister(tp->phydev->mdio.bus);
6939 6926
6940 rtl_release_firmware(tp); 6927 rtl_release_firmware(tp);
6941 6928
@@ -6995,9 +6982,9 @@ static int rtl_alloc_irq(struct rtl8169_private *tp)
6995 unsigned int flags; 6982 unsigned int flags;
6996 6983
6997 if (tp->mac_version <= RTL_GIGA_MAC_VER_06) { 6984 if (tp->mac_version <= RTL_GIGA_MAC_VER_06) {
6998 RTL_W8(tp, Cfg9346, Cfg9346_Unlock); 6985 rtl_unlock_config_regs(tp);
6999 RTL_W8(tp, Config2, RTL_R8(tp, Config2) & ~MSIEnable); 6986 RTL_W8(tp, Config2, RTL_R8(tp, Config2) & ~MSIEnable);
7000 RTL_W8(tp, Cfg9346, Cfg9346_Lock); 6987 rtl_lock_config_regs(tp);
7001 flags = PCI_IRQ_LEGACY; 6988 flags = PCI_IRQ_LEGACY;
7002 } else { 6989 } else {
7003 flags = PCI_IRQ_ALL_TYPES; 6990 flags = PCI_IRQ_ALL_TYPES;
@@ -7042,7 +7029,6 @@ static int r8169_mdio_write_reg(struct mii_bus *mii_bus, int phyaddr,
7042static int r8169_mdio_register(struct rtl8169_private *tp) 7029static int r8169_mdio_register(struct rtl8169_private *tp)
7043{ 7030{
7044 struct pci_dev *pdev = tp->pci_dev; 7031 struct pci_dev *pdev = tp->pci_dev;
7045 struct phy_device *phydev;
7046 struct mii_bus *new_bus; 7032 struct mii_bus *new_bus;
7047 int ret; 7033 int ret;
7048 7034
@@ -7064,16 +7050,14 @@ static int r8169_mdio_register(struct rtl8169_private *tp)
7064 if (ret) 7050 if (ret)
7065 return ret; 7051 return ret;
7066 7052
7067 phydev = mdiobus_get_phy(new_bus, 0); 7053 tp->phydev = mdiobus_get_phy(new_bus, 0);
7068 if (!phydev) { 7054 if (!tp->phydev) {
7069 mdiobus_unregister(new_bus); 7055 mdiobus_unregister(new_bus);
7070 return -ENODEV; 7056 return -ENODEV;
7071 } 7057 }
7072 7058
7073 /* PHY will be woken up in rtl_open() */ 7059 /* PHY will be woken up in rtl_open() */
7074 phy_suspend(phydev); 7060 phy_suspend(tp->phydev);
7075
7076 tp->mii_bus = new_bus;
7077 7061
7078 return 0; 7062 return 0;
7079} 7063}
@@ -7171,6 +7155,32 @@ static void rtl_disable_clk(void *data)
7171 clk_disable_unprepare(data); 7155 clk_disable_unprepare(data);
7172} 7156}
7173 7157
7158static int rtl_get_ether_clk(struct rtl8169_private *tp)
7159{
7160 struct device *d = tp_to_dev(tp);
7161 struct clk *clk;
7162 int rc;
7163
7164 clk = devm_clk_get(d, "ether_clk");
7165 if (IS_ERR(clk)) {
7166 rc = PTR_ERR(clk);
7167 if (rc == -ENOENT)
7168 /* clk-core allows NULL (for suspend / resume) */
7169 rc = 0;
7170 else if (rc != -EPROBE_DEFER)
7171 dev_err(d, "failed to get clk: %d\n", rc);
7172 } else {
7173 tp->clk = clk;
7174 rc = clk_prepare_enable(clk);
7175 if (rc)
7176 dev_err(d, "failed to enable clk: %d\n", rc);
7177 else
7178 rc = devm_add_action_or_reset(d, rtl_disable_clk, clk);
7179 }
7180
7181 return rc;
7182}
7183
7174static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) 7184static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
7175{ 7185{
7176 const struct rtl_cfg_info *cfg = rtl_cfg_infos + ent->driver_data; 7186 const struct rtl_cfg_info *cfg = rtl_cfg_infos + ent->driver_data;
@@ -7192,30 +7202,9 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
7192 tp->supports_gmii = cfg->has_gmii; 7202 tp->supports_gmii = cfg->has_gmii;
7193 7203
7194 /* Get the *optional* external "ether_clk" used on some boards */ 7204 /* Get the *optional* external "ether_clk" used on some boards */
7195 tp->clk = devm_clk_get(&pdev->dev, "ether_clk"); 7205 rc = rtl_get_ether_clk(tp);
7196 if (IS_ERR(tp->clk)) { 7206 if (rc)
7197 rc = PTR_ERR(tp->clk); 7207 return rc;
7198 if (rc == -ENOENT) {
7199 /* clk-core allows NULL (for suspend / resume) */
7200 tp->clk = NULL;
7201 } else if (rc == -EPROBE_DEFER) {
7202 return rc;
7203 } else {
7204 dev_err(&pdev->dev, "failed to get clk: %d\n", rc);
7205 return rc;
7206 }
7207 } else {
7208 rc = clk_prepare_enable(tp->clk);
7209 if (rc) {
7210 dev_err(&pdev->dev, "failed to enable clk: %d\n", rc);
7211 return rc;
7212 }
7213
7214 rc = devm_add_action_or_reset(&pdev->dev, rtl_disable_clk,
7215 tp->clk);
7216 if (rc)
7217 return rc;
7218 }
7219 7208
7220 /* enable device (incl. PCI PM wakeup and hotplug setup) */ 7209 /* enable device (incl. PCI PM wakeup and hotplug setup) */
7221 rc = pcim_enable_device(pdev); 7210 rc = pcim_enable_device(pdev);
@@ -7300,6 +7289,7 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
7300 tp->saved_wolopts = __rtl8169_get_wol(tp); 7289 tp->saved_wolopts = __rtl8169_get_wol(tp);
7301 7290
7302 mutex_init(&tp->wk.mutex); 7291 mutex_init(&tp->wk.mutex);
7292 INIT_WORK(&tp->wk.work, rtl_task);
7303 u64_stats_init(&tp->rx_stats.syncp); 7293 u64_stats_init(&tp->rx_stats.syncp);
7304 u64_stats_init(&tp->tx_stats.syncp); 7294 u64_stats_init(&tp->tx_stats.syncp);
7305 7295
@@ -7406,7 +7396,7 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
7406 return 0; 7396 return 0;
7407 7397
7408err_mdio_unregister: 7398err_mdio_unregister:
7409 mdiobus_unregister(tp->mii_bus); 7399 mdiobus_unregister(tp->phydev->mdio.bus);
7410 return rc; 7400 return rc;
7411} 7401}
7412 7402
diff --git a/drivers/net/ethernet/rocker/rocker_main.c b/drivers/net/ethernet/rocker/rocker_main.c
index 6213827e3956..62a205eba9f7 100644
--- a/drivers/net/ethernet/rocker/rocker_main.c
+++ b/drivers/net/ethernet/rocker/rocker_main.c
@@ -2725,7 +2725,7 @@ rocker_fdb_offload_notify(struct rocker_port *rocker_port,
2725 info.vid = recv_info->vid; 2725 info.vid = recv_info->vid;
2726 info.offloaded = true; 2726 info.offloaded = true;
2727 call_switchdev_notifiers(SWITCHDEV_FDB_OFFLOADED, 2727 call_switchdev_notifiers(SWITCHDEV_FDB_OFFLOADED,
2728 rocker_port->dev, &info.info); 2728 rocker_port->dev, &info.info, NULL);
2729} 2729}
2730 2730
2731static void rocker_switchdev_event_work(struct work_struct *work) 2731static void rocker_switchdev_event_work(struct work_struct *work)
diff --git a/drivers/net/ethernet/rocker/rocker_ofdpa.c b/drivers/net/ethernet/rocker/rocker_ofdpa.c
index 6473cc68c2d5..bea7895930f6 100644
--- a/drivers/net/ethernet/rocker/rocker_ofdpa.c
+++ b/drivers/net/ethernet/rocker/rocker_ofdpa.c
@@ -1833,10 +1833,10 @@ static void ofdpa_port_fdb_learn_work(struct work_struct *work)
1833 rtnl_lock(); 1833 rtnl_lock();
1834 if (learned && removing) 1834 if (learned && removing)
1835 call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE, 1835 call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE,
1836 lw->ofdpa_port->dev, &info.info); 1836 lw->ofdpa_port->dev, &info.info, NULL);
1837 else if (learned && !removing) 1837 else if (learned && !removing)
1838 call_switchdev_notifiers(SWITCHDEV_FDB_ADD_TO_BRIDGE, 1838 call_switchdev_notifiers(SWITCHDEV_FDB_ADD_TO_BRIDGE,
1839 lw->ofdpa_port->dev, &info.info); 1839 lw->ofdpa_port->dev, &info.info, NULL);
1840 rtnl_unlock(); 1840 rtnl_unlock();
1841 1841
1842 kfree(work); 1842 kfree(work);
diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c
index b6a50058bb8d..ee42d4a887d7 100644
--- a/drivers/net/ethernet/sfc/ef10.c
+++ b/drivers/net/ethernet/sfc/ef10.c
@@ -6041,10 +6041,11 @@ static const struct efx_ef10_nvram_type_info efx_ef10_nvram_types[] = {
6041 { NVRAM_PARTITION_TYPE_EXPROM_CONFIG_PORT3, 0, 3, "sfc_exp_rom_cfg" }, 6041 { NVRAM_PARTITION_TYPE_EXPROM_CONFIG_PORT3, 0, 3, "sfc_exp_rom_cfg" },
6042 { NVRAM_PARTITION_TYPE_LICENSE, 0, 0, "sfc_license" }, 6042 { NVRAM_PARTITION_TYPE_LICENSE, 0, 0, "sfc_license" },
6043 { NVRAM_PARTITION_TYPE_PHY_MIN, 0xff, 0, "sfc_phy_fw" }, 6043 { NVRAM_PARTITION_TYPE_PHY_MIN, 0xff, 0, "sfc_phy_fw" },
6044 /* MUM and SUC firmware share the same partition type */
6045 { NVRAM_PARTITION_TYPE_MUM_FIRMWARE, 0, 0, "sfc_mumfw" }, 6044 { NVRAM_PARTITION_TYPE_MUM_FIRMWARE, 0, 0, "sfc_mumfw" },
6046 { NVRAM_PARTITION_TYPE_EXPANSION_UEFI, 0, 0, "sfc_uefi" }, 6045 { NVRAM_PARTITION_TYPE_EXPANSION_UEFI, 0, 0, "sfc_uefi" },
6047 { NVRAM_PARTITION_TYPE_STATUS, 0, 0, "sfc_status" } 6046 { NVRAM_PARTITION_TYPE_DYNCONFIG_DEFAULTS, 0, 0, "sfc_dynamic_cfg_dflt" },
6047 { NVRAM_PARTITION_TYPE_ROMCONFIG_DEFAULTS, 0, 0, "sfc_exp_rom_cfg_dflt" },
6048 { NVRAM_PARTITION_TYPE_STATUS, 0, 0, "sfc_status" },
6048}; 6049};
6049 6050
6050static int efx_ef10_mtd_probe_partition(struct efx_nic *efx, 6051static int efx_ef10_mtd_probe_partition(struct efx_nic *efx,
@@ -6071,8 +6072,15 @@ static int efx_ef10_mtd_probe_partition(struct efx_nic *efx,
6071 rc = efx_mcdi_nvram_info(efx, type, &size, &erase_size, &protected); 6072 rc = efx_mcdi_nvram_info(efx, type, &size, &erase_size, &protected);
6072 if (rc) 6073 if (rc)
6073 return rc; 6074 return rc;
6075 if (protected &&
6076 (type != NVRAM_PARTITION_TYPE_DYNCONFIG_DEFAULTS &&
6077 type != NVRAM_PARTITION_TYPE_ROMCONFIG_DEFAULTS))
6078 /* Hide protected partitions that don't provide defaults. */
6079 return -ENODEV;
6080
6074 if (protected) 6081 if (protected)
6075 return -ENODEV; /* hide it */ 6082 /* Protected partitions are read only. */
6083 erase_size = 0;
6076 6084
6077 part->nvram_type = type; 6085 part->nvram_type = type;
6078 6086
diff --git a/drivers/net/ethernet/sfc/mcdi.c b/drivers/net/ethernet/sfc/mcdi.c
index dfad93fca0a6..295ec1787b9f 100644
--- a/drivers/net/ethernet/sfc/mcdi.c
+++ b/drivers/net/ethernet/sfc/mcdi.c
@@ -2074,22 +2074,26 @@ fail:
2074 2074
2075static int efx_mcdi_nvram_update_start(struct efx_nic *efx, unsigned int type) 2075static int efx_mcdi_nvram_update_start(struct efx_nic *efx, unsigned int type)
2076{ 2076{
2077 MCDI_DECLARE_BUF(inbuf, MC_CMD_NVRAM_UPDATE_START_IN_LEN); 2077 MCDI_DECLARE_BUF(inbuf, MC_CMD_NVRAM_UPDATE_START_V2_IN_LEN);
2078 int rc; 2078 int rc;
2079 2079
2080 MCDI_SET_DWORD(inbuf, NVRAM_UPDATE_START_IN_TYPE, type); 2080 MCDI_SET_DWORD(inbuf, NVRAM_UPDATE_START_IN_TYPE, type);
2081 MCDI_POPULATE_DWORD_1(inbuf, NVRAM_UPDATE_START_V2_IN_FLAGS,
2082 NVRAM_UPDATE_START_V2_IN_FLAG_REPORT_VERIFY_RESULT,
2083 1);
2081 2084
2082 BUILD_BUG_ON(MC_CMD_NVRAM_UPDATE_START_OUT_LEN != 0); 2085 BUILD_BUG_ON(MC_CMD_NVRAM_UPDATE_START_OUT_LEN != 0);
2083 2086
2084 rc = efx_mcdi_rpc(efx, MC_CMD_NVRAM_UPDATE_START, inbuf, sizeof(inbuf), 2087 rc = efx_mcdi_rpc(efx, MC_CMD_NVRAM_UPDATE_START, inbuf, sizeof(inbuf),
2085 NULL, 0, NULL); 2088 NULL, 0, NULL);
2089
2086 return rc; 2090 return rc;
2087} 2091}
2088 2092
2089static int efx_mcdi_nvram_read(struct efx_nic *efx, unsigned int type, 2093static int efx_mcdi_nvram_read(struct efx_nic *efx, unsigned int type,
2090 loff_t offset, u8 *buffer, size_t length) 2094 loff_t offset, u8 *buffer, size_t length)
2091{ 2095{
2092 MCDI_DECLARE_BUF(inbuf, MC_CMD_NVRAM_READ_IN_LEN); 2096 MCDI_DECLARE_BUF(inbuf, MC_CMD_NVRAM_READ_IN_V2_LEN);
2093 MCDI_DECLARE_BUF(outbuf, 2097 MCDI_DECLARE_BUF(outbuf,
2094 MC_CMD_NVRAM_READ_OUT_LEN(EFX_MCDI_NVRAM_LEN_MAX)); 2098 MC_CMD_NVRAM_READ_OUT_LEN(EFX_MCDI_NVRAM_LEN_MAX));
2095 size_t outlen; 2099 size_t outlen;
@@ -2098,6 +2102,8 @@ static int efx_mcdi_nvram_read(struct efx_nic *efx, unsigned int type,
2098 MCDI_SET_DWORD(inbuf, NVRAM_READ_IN_TYPE, type); 2102 MCDI_SET_DWORD(inbuf, NVRAM_READ_IN_TYPE, type);
2099 MCDI_SET_DWORD(inbuf, NVRAM_READ_IN_OFFSET, offset); 2103 MCDI_SET_DWORD(inbuf, NVRAM_READ_IN_OFFSET, offset);
2100 MCDI_SET_DWORD(inbuf, NVRAM_READ_IN_LENGTH, length); 2104 MCDI_SET_DWORD(inbuf, NVRAM_READ_IN_LENGTH, length);
2105 MCDI_SET_DWORD(inbuf, NVRAM_READ_IN_V2_MODE,
2106 MC_CMD_NVRAM_READ_IN_V2_DEFAULT);
2101 2107
2102 rc = efx_mcdi_rpc(efx, MC_CMD_NVRAM_READ, inbuf, sizeof(inbuf), 2108 rc = efx_mcdi_rpc(efx, MC_CMD_NVRAM_READ, inbuf, sizeof(inbuf),
2103 outbuf, sizeof(outbuf), &outlen); 2109 outbuf, sizeof(outbuf), &outlen);
@@ -2147,15 +2153,51 @@ static int efx_mcdi_nvram_erase(struct efx_nic *efx, unsigned int type,
2147 2153
2148static int efx_mcdi_nvram_update_finish(struct efx_nic *efx, unsigned int type) 2154static int efx_mcdi_nvram_update_finish(struct efx_nic *efx, unsigned int type)
2149{ 2155{
2150 MCDI_DECLARE_BUF(inbuf, MC_CMD_NVRAM_UPDATE_FINISH_IN_LEN); 2156 MCDI_DECLARE_BUF(inbuf, MC_CMD_NVRAM_UPDATE_FINISH_V2_IN_LEN);
2151 int rc; 2157 MCDI_DECLARE_BUF(outbuf, MC_CMD_NVRAM_UPDATE_FINISH_V2_OUT_LEN);
2158 size_t outlen;
2159 int rc, rc2;
2152 2160
2153 MCDI_SET_DWORD(inbuf, NVRAM_UPDATE_FINISH_IN_TYPE, type); 2161 MCDI_SET_DWORD(inbuf, NVRAM_UPDATE_FINISH_IN_TYPE, type);
2154 2162 /* Always set this flag. Old firmware ignores it */
2155 BUILD_BUG_ON(MC_CMD_NVRAM_UPDATE_FINISH_OUT_LEN != 0); 2163 MCDI_POPULATE_DWORD_1(inbuf, NVRAM_UPDATE_FINISH_V2_IN_FLAGS,
2164 NVRAM_UPDATE_FINISH_V2_IN_FLAG_REPORT_VERIFY_RESULT,
2165 1);
2156 2166
2157 rc = efx_mcdi_rpc(efx, MC_CMD_NVRAM_UPDATE_FINISH, inbuf, sizeof(inbuf), 2167 rc = efx_mcdi_rpc(efx, MC_CMD_NVRAM_UPDATE_FINISH, inbuf, sizeof(inbuf),
2158 NULL, 0, NULL); 2168 outbuf, sizeof(outbuf), &outlen);
2169 if (!rc && outlen >= MC_CMD_NVRAM_UPDATE_FINISH_V2_OUT_LEN) {
2170 rc2 = MCDI_DWORD(outbuf, NVRAM_UPDATE_FINISH_V2_OUT_RESULT_CODE);
2171 if (rc2 != MC_CMD_NVRAM_VERIFY_RC_SUCCESS)
2172 netif_err(efx, drv, efx->net_dev,
2173 "NVRAM update failed verification with code 0x%x\n",
2174 rc2);
2175 switch (rc2) {
2176 case MC_CMD_NVRAM_VERIFY_RC_SUCCESS:
2177 break;
2178 case MC_CMD_NVRAM_VERIFY_RC_CMS_CHECK_FAILED:
2179 case MC_CMD_NVRAM_VERIFY_RC_MESSAGE_DIGEST_CHECK_FAILED:
2180 case MC_CMD_NVRAM_VERIFY_RC_SIGNATURE_CHECK_FAILED:
2181 case MC_CMD_NVRAM_VERIFY_RC_TRUSTED_APPROVERS_CHECK_FAILED:
2182 case MC_CMD_NVRAM_VERIFY_RC_SIGNATURE_CHAIN_CHECK_FAILED:
2183 rc = -EIO;
2184 break;
2185 case MC_CMD_NVRAM_VERIFY_RC_INVALID_CMS_FORMAT:
2186 case MC_CMD_NVRAM_VERIFY_RC_BAD_MESSAGE_DIGEST:
2187 rc = -EINVAL;
2188 break;
2189 case MC_CMD_NVRAM_VERIFY_RC_NO_VALID_SIGNATURES:
2190 case MC_CMD_NVRAM_VERIFY_RC_NO_TRUSTED_APPROVERS:
2191 case MC_CMD_NVRAM_VERIFY_RC_NO_SIGNATURE_MATCH:
2192 rc = -EPERM;
2193 break;
2194 default:
2195 netif_err(efx, drv, efx->net_dev,
2196 "Unknown response to NVRAM_UPDATE_FINISH\n");
2197 rc = -EIO;
2198 }
2199 }
2200
2159 return rc; 2201 return rc;
2160} 2202}
2161 2203
diff --git a/drivers/net/ethernet/sfc/mtd.c b/drivers/net/ethernet/sfc/mtd.c
index 4ac30b6e5dab..0d03e0577d85 100644
--- a/drivers/net/ethernet/sfc/mtd.c
+++ b/drivers/net/ethernet/sfc/mtd.c
@@ -66,6 +66,9 @@ int efx_mtd_add(struct efx_nic *efx, struct efx_mtd_partition *parts,
66 66
67 part->mtd.writesize = 1; 67 part->mtd.writesize = 1;
68 68
69 if (!(part->mtd.flags & MTD_NO_ERASE))
70 part->mtd.flags |= MTD_WRITEABLE;
71
69 part->mtd.owner = THIS_MODULE; 72 part->mtd.owner = THIS_MODULE;
70 part->mtd.priv = efx; 73 part->mtd.priv = efx;
71 part->mtd.name = part->name; 74 part->mtd.name = part->name;
diff --git a/drivers/net/ethernet/sun/sungem.c b/drivers/net/ethernet/sun/sungem.c
index b9221fc1674d..3e7631160384 100644
--- a/drivers/net/ethernet/sun/sungem.c
+++ b/drivers/net/ethernet/sun/sungem.c
@@ -2760,7 +2760,7 @@ static void get_gem_mac_nonobp(struct pci_dev *pdev, unsigned char *dev_addr)
2760 void __iomem *p = pci_map_rom(pdev, &size); 2760 void __iomem *p = pci_map_rom(pdev, &size);
2761 2761
2762 if (p) { 2762 if (p) {
2763 int found; 2763 int found;
2764 2764
2765 found = readb(p) == 0x55 && 2765 found = readb(p) == 0x55 &&
2766 readb(p + 1) == 0xaa && 2766 readb(p + 1) == 0xaa &&
diff --git a/drivers/net/ethernet/ti/cpsw-phy-sel.c b/drivers/net/ethernet/ti/cpsw-phy-sel.c
index 396e1cd10667..fec275e2208d 100644
--- a/drivers/net/ethernet/ti/cpsw-phy-sel.c
+++ b/drivers/net/ethernet/ti/cpsw-phy-sel.c
@@ -78,7 +78,7 @@ static void cpsw_gmii_sel_am3352(struct cpsw_phy_sel_priv *priv,
78 case PHY_INTERFACE_MODE_MII: 78 case PHY_INTERFACE_MODE_MII:
79 mode = AM33XX_GMII_SEL_MODE_MII; 79 mode = AM33XX_GMII_SEL_MODE_MII;
80 break; 80 break;
81 }; 81 }
82 82
83 mask = GMII_SEL_MODE_MASK << (slave * 2) | BIT(slave + 6); 83 mask = GMII_SEL_MODE_MASK << (slave * 2) | BIT(slave + 6);
84 mask |= BIT(slave + 4); 84 mask |= BIT(slave + 4);
@@ -133,7 +133,7 @@ static void cpsw_gmii_sel_dra7xx(struct cpsw_phy_sel_priv *priv,
133 case PHY_INTERFACE_MODE_MII: 133 case PHY_INTERFACE_MODE_MII:
134 mode = AM33XX_GMII_SEL_MODE_MII; 134 mode = AM33XX_GMII_SEL_MODE_MII;
135 break; 135 break;
136 }; 136 }
137 137
138 switch (slave) { 138 switch (slave) {
139 case 0: 139 case 0:
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 6d067176320f..cf22a79af66b 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -963,7 +963,8 @@ static int macvlan_vlan_rx_kill_vid(struct net_device *dev,
963static int macvlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], 963static int macvlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
964 struct net_device *dev, 964 struct net_device *dev,
965 const unsigned char *addr, u16 vid, 965 const unsigned char *addr, u16 vid,
966 u16 flags) 966 u16 flags,
967 struct netlink_ext_ack *extack)
967{ 968{
968 struct macvlan_dev *vlan = netdev_priv(dev); 969 struct macvlan_dev *vlan = netdev_priv(dev);
969 int err = -EINVAL; 970 int err = -EINVAL;
diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c
index 66b9cfe692fc..2e53ba3fa2e7 100644
--- a/drivers/net/phy/mdio_bus.c
+++ b/drivers/net/phy/mdio_bus.c
@@ -39,8 +39,6 @@
39#include <linux/io.h> 39#include <linux/io.h>
40#include <linux/uaccess.h> 40#include <linux/uaccess.h>
41 41
42#include <asm/irq.h>
43
44#define CREATE_TRACE_POINTS 42#define CREATE_TRACE_POINTS
45#include <trace/events/mdio.h> 43#include <trace/events/mdio.h>
46 44
diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
index b1f959935f50..0d62b548ab39 100644
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -426,9 +426,6 @@ static int ksz9021_config_init(struct phy_device *phydev)
426 return 0; 426 return 0;
427} 427}
428 428
429#define MII_KSZ9031RN_MMD_CTRL_REG 0x0d
430#define MII_KSZ9031RN_MMD_REGDATA_REG 0x0e
431#define OP_DATA 1
432#define KSZ9031_PS_TO_REG 60 429#define KSZ9031_PS_TO_REG 60
433 430
434/* Extended registers */ 431/* Extended registers */
@@ -446,24 +443,6 @@ static int ksz9021_config_init(struct phy_device *phydev)
446#define MII_KSZ9031RN_EDPD 0x23 443#define MII_KSZ9031RN_EDPD 0x23
447#define MII_KSZ9031RN_EDPD_ENABLE BIT(0) 444#define MII_KSZ9031RN_EDPD_ENABLE BIT(0)
448 445
449static int ksz9031_extended_write(struct phy_device *phydev,
450 u8 mode, u32 dev_addr, u32 regnum, u16 val)
451{
452 phy_write(phydev, MII_KSZ9031RN_MMD_CTRL_REG, dev_addr);
453 phy_write(phydev, MII_KSZ9031RN_MMD_REGDATA_REG, regnum);
454 phy_write(phydev, MII_KSZ9031RN_MMD_CTRL_REG, (mode << 14) | dev_addr);
455 return phy_write(phydev, MII_KSZ9031RN_MMD_REGDATA_REG, val);
456}
457
458static int ksz9031_extended_read(struct phy_device *phydev,
459 u8 mode, u32 dev_addr, u32 regnum)
460{
461 phy_write(phydev, MII_KSZ9031RN_MMD_CTRL_REG, dev_addr);
462 phy_write(phydev, MII_KSZ9031RN_MMD_REGDATA_REG, regnum);
463 phy_write(phydev, MII_KSZ9031RN_MMD_CTRL_REG, (mode << 14) | dev_addr);
464 return phy_read(phydev, MII_KSZ9031RN_MMD_REGDATA_REG);
465}
466
467static int ksz9031_of_load_skew_values(struct phy_device *phydev, 446static int ksz9031_of_load_skew_values(struct phy_device *phydev,
468 const struct device_node *of_node, 447 const struct device_node *of_node,
469 u16 reg, size_t field_sz, 448 u16 reg, size_t field_sz,
@@ -484,7 +463,7 @@ static int ksz9031_of_load_skew_values(struct phy_device *phydev,
484 return 0; 463 return 0;
485 464
486 if (matches < numfields) 465 if (matches < numfields)
487 newval = ksz9031_extended_read(phydev, OP_DATA, 2, reg); 466 newval = phy_read_mmd(phydev, 2, reg);
488 else 467 else
489 newval = 0; 468 newval = 0;
490 469
@@ -498,7 +477,7 @@ static int ksz9031_of_load_skew_values(struct phy_device *phydev,
498 << (field_sz * i)); 477 << (field_sz * i));
499 } 478 }
500 479
501 return ksz9031_extended_write(phydev, OP_DATA, 2, reg, newval); 480 return phy_write_mmd(phydev, 2, reg, newval);
502} 481}
503 482
504/* Center KSZ9031RNX FLP timing at 16ms. */ 483/* Center KSZ9031RNX FLP timing at 16ms. */
@@ -506,13 +485,13 @@ static int ksz9031_center_flp_timing(struct phy_device *phydev)
506{ 485{
507 int result; 486 int result;
508 487
509 result = ksz9031_extended_write(phydev, OP_DATA, 0, 488 result = phy_write_mmd(phydev, 0, MII_KSZ9031RN_FLP_BURST_TX_HI,
510 MII_KSZ9031RN_FLP_BURST_TX_HI, 0x0006); 489 0x0006);
511 if (result) 490 if (result)
512 return result; 491 return result;
513 492
514 result = ksz9031_extended_write(phydev, OP_DATA, 0, 493 result = phy_write_mmd(phydev, 0, MII_KSZ9031RN_FLP_BURST_TX_LO,
515 MII_KSZ9031RN_FLP_BURST_TX_LO, 0x1A80); 494 0x1A80);
516 if (result) 495 if (result)
517 return result; 496 return result;
518 497
@@ -524,11 +503,11 @@ static int ksz9031_enable_edpd(struct phy_device *phydev)
524{ 503{
525 int reg; 504 int reg;
526 505
527 reg = ksz9031_extended_read(phydev, OP_DATA, 0x1C, MII_KSZ9031RN_EDPD); 506 reg = phy_read_mmd(phydev, 0x1C, MII_KSZ9031RN_EDPD);
528 if (reg < 0) 507 if (reg < 0)
529 return reg; 508 return reg;
530 return ksz9031_extended_write(phydev, OP_DATA, 0x1C, MII_KSZ9031RN_EDPD, 509 return phy_write_mmd(phydev, 0x1C, MII_KSZ9031RN_EDPD,
531 reg | MII_KSZ9031RN_EDPD_ENABLE); 510 reg | MII_KSZ9031RN_EDPD_ENABLE);
532} 511}
533 512
534static int ksz9031_config_init(struct phy_device *phydev) 513static int ksz9031_config_init(struct phy_device *phydev)
@@ -654,7 +633,7 @@ static int ksz9131_of_load_skew_values(struct phy_device *phydev,
654 return 0; 633 return 0;
655 634
656 if (matches < numfields) 635 if (matches < numfields)
657 newval = ksz9031_extended_read(phydev, OP_DATA, 2, reg); 636 newval = phy_read_mmd(phydev, 2, reg);
658 else 637 else
659 newval = 0; 638 newval = 0;
660 639
@@ -668,7 +647,7 @@ static int ksz9131_of_load_skew_values(struct phy_device *phydev,
668 << (field_sz * i)); 647 << (field_sz * i));
669 } 648 }
670 649
671 return ksz9031_extended_write(phydev, OP_DATA, 2, reg, newval); 650 return phy_write_mmd(phydev, 2, reg, newval);
672} 651}
673 652
674static int ksz9131_config_init(struct phy_device *phydev) 653static int ksz9131_config_init(struct phy_device *phydev)
diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index 189cd2048c3a..745a705a505a 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -13,8 +13,6 @@
13 * 13 *
14 */ 14 */
15 15
16#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
17
18#include <linux/kernel.h> 16#include <linux/kernel.h>
19#include <linux/string.h> 17#include <linux/string.h>
20#include <linux/errno.h> 18#include <linux/errno.h>
@@ -36,8 +34,6 @@
36#include <linux/uaccess.h> 34#include <linux/uaccess.h>
37#include <linux/atomic.h> 35#include <linux/atomic.h>
38 36
39#include <asm/irq.h>
40
41#define PHY_STATE_STR(_state) \ 37#define PHY_STATE_STR(_state) \
42 case PHY_##_state: \ 38 case PHY_##_state: \
43 return __stringify(_state); \ 39 return __stringify(_state); \
@@ -51,7 +47,6 @@ static const char *phy_state_to_str(enum phy_state st)
51 PHY_STATE_STR(RUNNING) 47 PHY_STATE_STR(RUNNING)
52 PHY_STATE_STR(NOLINK) 48 PHY_STATE_STR(NOLINK)
53 PHY_STATE_STR(FORCING) 49 PHY_STATE_STR(FORCING)
54 PHY_STATE_STR(CHANGELINK)
55 PHY_STATE_STR(HALTED) 50 PHY_STATE_STR(HALTED)
56 PHY_STATE_STR(RESUMING) 51 PHY_STATE_STR(RESUMING)
57 } 52 }
@@ -809,8 +804,7 @@ int phy_start_interrupts(struct phy_device *phydev)
809 if (request_threaded_irq(phydev->irq, NULL, phy_interrupt, 804 if (request_threaded_irq(phydev->irq, NULL, phy_interrupt,
810 IRQF_ONESHOT | IRQF_SHARED, 805 IRQF_ONESHOT | IRQF_SHARED,
811 phydev_name(phydev), phydev) < 0) { 806 phydev_name(phydev), phydev) < 0) {
812 pr_warn("%s: Can't get IRQ %d (PHY)\n", 807 phydev_warn(phydev, "Can't get IRQ %d\n", phydev->irq);
813 phydev->mdio.bus->name, phydev->irq);
814 phydev->irq = PHY_POLL; 808 phydev->irq = PHY_POLL;
815 return 0; 809 return 0;
816 } 810 }
@@ -820,23 +814,6 @@ int phy_start_interrupts(struct phy_device *phydev)
820EXPORT_SYMBOL(phy_start_interrupts); 814EXPORT_SYMBOL(phy_start_interrupts);
821 815
822/** 816/**
823 * phy_stop_interrupts - disable interrupts from a PHY device
824 * @phydev: target phy_device struct
825 */
826int phy_stop_interrupts(struct phy_device *phydev)
827{
828 int err = phy_disable_interrupts(phydev);
829
830 if (err)
831 phy_error(phydev);
832
833 free_irq(phydev->irq, phydev);
834
835 return err;
836}
837EXPORT_SYMBOL(phy_stop_interrupts);
838
839/**
840 * phy_stop - Bring down the PHY link, and stop checking the status 817 * phy_stop - Bring down the PHY link, and stop checking the status
841 * @phydev: target phy_device struct 818 * @phydev: target phy_device struct
842 */ 819 */
@@ -859,6 +836,7 @@ void phy_stop(struct phy_device *phydev)
859 mutex_unlock(&phydev->lock); 836 mutex_unlock(&phydev->lock);
860 837
861 phy_state_machine(&phydev->state_queue.work); 838 phy_state_machine(&phydev->state_queue.work);
839 phy_stop_machine(phydev);
862 840
863 /* Cannot call flush_scheduled_work() here as desired because 841 /* Cannot call flush_scheduled_work() here as desired because
864 * of rtnl_lock(), but PHY_HALTED shall guarantee irq handler 842 * of rtnl_lock(), but PHY_HALTED shall guarantee irq handler
@@ -939,7 +917,6 @@ void phy_state_machine(struct work_struct *work)
939 break; 917 break;
940 case PHY_NOLINK: 918 case PHY_NOLINK:
941 case PHY_RUNNING: 919 case PHY_RUNNING:
942 case PHY_CHANGELINK:
943 case PHY_RESUMING: 920 case PHY_RESUMING:
944 err = phy_check_link_status(phydev); 921 err = phy_check_link_status(phydev);
945 break; 922 break;
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 46c86725a693..b61db0a5ba3a 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -37,8 +37,6 @@
37#include <linux/uaccess.h> 37#include <linux/uaccess.h>
38#include <linux/of.h> 38#include <linux/of.h>
39 39
40#include <asm/irq.h>
41
42MODULE_DESCRIPTION("PHY library"); 40MODULE_DESCRIPTION("PHY library");
43MODULE_AUTHOR("Andy Fleming"); 41MODULE_AUTHOR("Andy Fleming");
44MODULE_LICENSE("GPL"); 42MODULE_LICENSE("GPL");
@@ -560,12 +558,31 @@ static const struct device_type mdio_bus_phy_type = {
560 .pm = MDIO_BUS_PHY_PM_OPS, 558 .pm = MDIO_BUS_PHY_PM_OPS,
561}; 559};
562 560
561static int phy_request_driver_module(struct phy_device *dev, int phy_id)
562{
563 int ret;
564
565 ret = request_module(MDIO_MODULE_PREFIX MDIO_ID_FMT,
566 MDIO_ID_ARGS(phy_id));
567 /* we only check for failures in executing the usermode binary,
568 * not whether a PHY driver module exists for the PHY ID
569 */
570 if (IS_ENABLED(CONFIG_MODULES) && ret < 0) {
571 phydev_err(dev, "error %d loading PHY driver module for ID 0x%08x\n",
572 ret, phy_id);
573 return ret;
574 }
575
576 return 0;
577}
578
563struct phy_device *phy_device_create(struct mii_bus *bus, int addr, int phy_id, 579struct phy_device *phy_device_create(struct mii_bus *bus, int addr, int phy_id,
564 bool is_c45, 580 bool is_c45,
565 struct phy_c45_device_ids *c45_ids) 581 struct phy_c45_device_ids *c45_ids)
566{ 582{
567 struct phy_device *dev; 583 struct phy_device *dev;
568 struct mdio_device *mdiodev; 584 struct mdio_device *mdiodev;
585 int ret = 0;
569 586
570 /* We allocate the device, and initialize the default values */ 587 /* We allocate the device, and initialize the default values */
571 dev = kzalloc(sizeof(*dev), GFP_KERNEL); 588 dev = kzalloc(sizeof(*dev), GFP_KERNEL);
@@ -622,15 +639,21 @@ struct phy_device *phy_device_create(struct mii_bus *bus, int addr, int phy_id,
622 if (!(c45_ids->devices_in_package & (1 << i))) 639 if (!(c45_ids->devices_in_package & (1 << i)))
623 continue; 640 continue;
624 641
625 request_module(MDIO_MODULE_PREFIX MDIO_ID_FMT, 642 ret = phy_request_driver_module(dev,
626 MDIO_ID_ARGS(c45_ids->device_ids[i])); 643 c45_ids->device_ids[i]);
644 if (ret)
645 break;
627 } 646 }
628 } else { 647 } else {
629 request_module(MDIO_MODULE_PREFIX MDIO_ID_FMT, 648 ret = phy_request_driver_module(dev, phy_id);
630 MDIO_ID_ARGS(phy_id));
631 } 649 }
632 650
633 device_initialize(&mdiodev->dev); 651 if (!ret) {
652 device_initialize(&mdiodev->dev);
653 } else {
654 kfree(dev);
655 dev = ERR_PTR(ret);
656 }
634 657
635 return dev; 658 return dev;
636} 659}
@@ -762,15 +785,8 @@ static int get_phy_id(struct mii_bus *bus, int addr, u32 *phy_id,
762 /* Grab the bits from PHYIR1, and put them in the upper half */ 785 /* Grab the bits from PHYIR1, and put them in the upper half */
763 phy_reg = mdiobus_read(bus, addr, MII_PHYSID1); 786 phy_reg = mdiobus_read(bus, addr, MII_PHYSID1);
764 if (phy_reg < 0) { 787 if (phy_reg < 0) {
765 /* if there is no device, return without an error so scanning 788 /* returning -ENODEV doesn't stop bus scanning */
766 * the bus works properly 789 return (phy_reg == -EIO || phy_reg == -ENODEV) ? -ENODEV : -EIO;
767 */
768 if (phy_reg == -EIO || phy_reg == -ENODEV) {
769 *phy_id = 0xffffffff;
770 return 0;
771 }
772
773 return -EIO;
774 } 790 }
775 791
776 *phy_id = (phy_reg & 0xffff) << 16; 792 *phy_id = (phy_reg & 0xffff) << 16;
@@ -831,13 +847,13 @@ int phy_device_register(struct phy_device *phydev)
831 /* Run all of the fixups for this PHY */ 847 /* Run all of the fixups for this PHY */
832 err = phy_scan_fixups(phydev); 848 err = phy_scan_fixups(phydev);
833 if (err) { 849 if (err) {
834 pr_err("PHY %d failed to initialize\n", phydev->mdio.addr); 850 phydev_err(phydev, "failed to initialize\n");
835 goto out; 851 goto out;
836 } 852 }
837 853
838 err = device_add(&phydev->mdio.dev); 854 err = device_add(&phydev->mdio.dev);
839 if (err) { 855 if (err) {
840 pr_err("PHY %d failed to add\n", phydev->mdio.addr); 856 phydev_err(phydev, "failed to add\n");
841 goto out; 857 goto out;
842 } 858 }
843 859
@@ -995,10 +1011,11 @@ EXPORT_SYMBOL(phy_connect);
995 */ 1011 */
996void phy_disconnect(struct phy_device *phydev) 1012void phy_disconnect(struct phy_device *phydev)
997{ 1013{
998 if (phydev->irq > 0) 1014 if (phy_is_started(phydev))
999 phy_stop_interrupts(phydev); 1015 phy_stop(phydev);
1000 1016
1001 phy_stop_machine(phydev); 1017 if (phy_interrupt_is_valid(phydev))
1018 free_irq(phydev->irq, phydev);
1002 1019
1003 phydev->adjust_link = NULL; 1020 phydev->adjust_link = NULL;
1004 1021
@@ -1291,6 +1308,36 @@ struct phy_device *phy_attach(struct net_device *dev, const char *bus_id,
1291} 1308}
1292EXPORT_SYMBOL(phy_attach); 1309EXPORT_SYMBOL(phy_attach);
1293 1310
1311static bool phy_driver_is_genphy_kind(struct phy_device *phydev,
1312 struct device_driver *driver)
1313{
1314 struct device *d = &phydev->mdio.dev;
1315 bool ret = false;
1316
1317 if (!phydev->drv)
1318 return ret;
1319
1320 get_device(d);
1321 ret = d->driver == driver;
1322 put_device(d);
1323
1324 return ret;
1325}
1326
1327bool phy_driver_is_genphy(struct phy_device *phydev)
1328{
1329 return phy_driver_is_genphy_kind(phydev,
1330 &genphy_driver.mdiodrv.driver);
1331}
1332EXPORT_SYMBOL_GPL(phy_driver_is_genphy);
1333
1334bool phy_driver_is_genphy_10g(struct phy_device *phydev)
1335{
1336 return phy_driver_is_genphy_kind(phydev,
1337 &genphy_10g_driver.mdiodrv.driver);
1338}
1339EXPORT_SYMBOL_GPL(phy_driver_is_genphy_10g);
1340
1294/** 1341/**
1295 * phy_detach - detach a PHY device from its network device 1342 * phy_detach - detach a PHY device from its network device
1296 * @phydev: target phy_device struct 1343 * @phydev: target phy_device struct
@@ -1322,8 +1369,8 @@ void phy_detach(struct phy_device *phydev)
1322 * from the generic driver so that there's a chance a 1369 * from the generic driver so that there's a chance a
1323 * real driver could be loaded 1370 * real driver could be loaded
1324 */ 1371 */
1325 if (phydev->mdio.dev.driver == &genphy_10g_driver.mdiodrv.driver || 1372 if (phy_driver_is_genphy(phydev) ||
1326 phydev->mdio.dev.driver == &genphy_driver.mdiodrv.driver) 1373 phy_driver_is_genphy_10g(phydev))
1327 device_release_driver(&phydev->mdio.dev); 1374 device_release_driver(&phydev->mdio.dev);
1328 1375
1329 /* 1376 /*
@@ -2267,14 +2314,6 @@ int phy_driver_register(struct phy_driver *new_driver, struct module *owner)
2267 new_driver->mdiodrv.driver.remove = phy_remove; 2314 new_driver->mdiodrv.driver.remove = phy_remove;
2268 new_driver->mdiodrv.driver.owner = owner; 2315 new_driver->mdiodrv.driver.owner = owner;
2269 2316
2270 /* The following works around an issue where the PHY driver doesn't bind
2271 * to the device, resulting in the genphy driver being used instead of
2272 * the dedicated driver. The root cause of the issue isn't known yet
2273 * and seems to be in the base driver core. Once this is fixed we may
2274 * remove this workaround.
2275 */
2276 new_driver->mdiodrv.driver.probe_type = PROBE_FORCE_SYNCHRONOUS;
2277
2278 retval = driver_register(&new_driver->mdiodrv.driver); 2317 retval = driver_register(&new_driver->mdiodrv.driver);
2279 if (retval) { 2318 if (retval) {
2280 pr_err("%s: Error %d in registering driver\n", 2319 pr_err("%s: Error %d in registering driver\n",
diff --git a/drivers/net/sb1000.c b/drivers/net/sb1000.c
index 7820fced33f6..941cfa8f1c2a 100644
--- a/drivers/net/sb1000.c
+++ b/drivers/net/sb1000.c
@@ -535,17 +535,20 @@ sb1000_activate(const int ioaddr[], const char* name)
535 int status; 535 int status;
536 536
537 ssleep(1); 537 ssleep(1);
538 if ((status = card_send_command(ioaddr, name, Command0, st))) 538 status = card_send_command(ioaddr, name, Command0, st);
539 if (status)
539 return status; 540 return status;
540 if ((status = card_send_command(ioaddr, name, Command1, st))) 541 status = card_send_command(ioaddr, name, Command1, st);
542 if (status)
541 return status; 543 return status;
542 if (st[3] != 0xf1) { 544 if (st[3] != 0xf1) {
543 if ((status = sb1000_start_get_set_command(ioaddr, name))) 545 status = sb1000_start_get_set_command(ioaddr, name);
546 if (status)
544 return status; 547 return status;
545 return -EIO; 548 return -EIO;
546 } 549 }
547 udelay(1000); 550 udelay(1000);
548 return sb1000_start_get_set_command(ioaddr, name); 551 return sb1000_start_get_set_command(ioaddr, name);
549} 552}
550 553
551/* get SB1000 firmware version */ 554/* get SB1000 firmware version */
diff --git a/drivers/net/usb/rtl8150.c b/drivers/net/usb/rtl8150.c
index 80373a9171dd..3f145e4c6c08 100644
--- a/drivers/net/usb/rtl8150.c
+++ b/drivers/net/usb/rtl8150.c
@@ -388,7 +388,6 @@ static void read_bulk_callback(struct urb *urb)
388 unsigned pkt_len, res; 388 unsigned pkt_len, res;
389 struct sk_buff *skb; 389 struct sk_buff *skb;
390 struct net_device *netdev; 390 struct net_device *netdev;
391 u16 rx_stat;
392 int status = urb->status; 391 int status = urb->status;
393 int result; 392 int result;
394 unsigned long flags; 393 unsigned long flags;
@@ -424,7 +423,6 @@ static void read_bulk_callback(struct urb *urb)
424 goto goon; 423 goto goon;
425 424
426 res = urb->actual_length; 425 res = urb->actual_length;
427 rx_stat = le16_to_cpu(*(__le16 *)(urb->transfer_buffer + res - 4));
428 pkt_len = res - 4; 426 pkt_len = res - 4;
429 427
430 skb_put(dev->rx_skb, pkt_len); 428 skb_put(dev->rx_skb, pkt_len);
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 8fadd8eaf601..2a0edd4653e3 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -1035,6 +1035,7 @@ static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq,
1035 goto frame_err; 1035 goto frame_err;
1036 } 1036 }
1037 1037
1038 skb_record_rx_queue(skb, vq2rxq(rq->vq));
1038 skb->protocol = eth_type_trans(skb, dev); 1039 skb->protocol = eth_type_trans(skb, dev);
1039 pr_debug("Receiving skb proto 0x%04x len %i type %i\n", 1040 pr_debug("Receiving skb proto 0x%04x len %i type %i\n",
1040 ntohs(skb->protocol), skb->len, skb->pkt_type); 1041 ntohs(skb->protocol), skb->len, skb->pkt_type);
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 5209ee9aac47..ef45c3c925be 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -361,10 +361,11 @@ errout:
361static void vxlan_fdb_switchdev_notifier_info(const struct vxlan_dev *vxlan, 361static void vxlan_fdb_switchdev_notifier_info(const struct vxlan_dev *vxlan,
362 const struct vxlan_fdb *fdb, 362 const struct vxlan_fdb *fdb,
363 const struct vxlan_rdst *rd, 363 const struct vxlan_rdst *rd,
364 struct netlink_ext_ack *extack,
364 struct switchdev_notifier_vxlan_fdb_info *fdb_info) 365 struct switchdev_notifier_vxlan_fdb_info *fdb_info)
365{ 366{
366 fdb_info->info.dev = vxlan->dev; 367 fdb_info->info.dev = vxlan->dev;
367 fdb_info->info.extack = NULL; 368 fdb_info->info.extack = extack;
368 fdb_info->remote_ip = rd->remote_ip; 369 fdb_info->remote_ip = rd->remote_ip;
369 fdb_info->remote_port = rd->remote_port; 370 fdb_info->remote_port = rd->remote_port;
370 fdb_info->remote_vni = rd->remote_vni; 371 fdb_info->remote_vni = rd->remote_vni;
@@ -375,41 +376,50 @@ static void vxlan_fdb_switchdev_notifier_info(const struct vxlan_dev *vxlan,
375 fdb_info->added_by_user = fdb->flags & NTF_VXLAN_ADDED_BY_USER; 376 fdb_info->added_by_user = fdb->flags & NTF_VXLAN_ADDED_BY_USER;
376} 377}
377 378
378static void vxlan_fdb_switchdev_call_notifiers(struct vxlan_dev *vxlan, 379static int vxlan_fdb_switchdev_call_notifiers(struct vxlan_dev *vxlan,
379 struct vxlan_fdb *fdb, 380 struct vxlan_fdb *fdb,
380 struct vxlan_rdst *rd, 381 struct vxlan_rdst *rd,
381 bool adding) 382 bool adding,
383 struct netlink_ext_ack *extack)
382{ 384{
383 struct switchdev_notifier_vxlan_fdb_info info; 385 struct switchdev_notifier_vxlan_fdb_info info;
384 enum switchdev_notifier_type notifier_type; 386 enum switchdev_notifier_type notifier_type;
387 int ret;
385 388
386 if (WARN_ON(!rd)) 389 if (WARN_ON(!rd))
387 return; 390 return 0;
388 391
389 notifier_type = adding ? SWITCHDEV_VXLAN_FDB_ADD_TO_DEVICE 392 notifier_type = adding ? SWITCHDEV_VXLAN_FDB_ADD_TO_DEVICE
390 : SWITCHDEV_VXLAN_FDB_DEL_TO_DEVICE; 393 : SWITCHDEV_VXLAN_FDB_DEL_TO_DEVICE;
391 vxlan_fdb_switchdev_notifier_info(vxlan, fdb, rd, &info); 394 vxlan_fdb_switchdev_notifier_info(vxlan, fdb, rd, NULL, &info);
392 call_switchdev_notifiers(notifier_type, vxlan->dev, 395 ret = call_switchdev_notifiers(notifier_type, vxlan->dev,
393 &info.info); 396 &info.info, extack);
397 return notifier_to_errno(ret);
394} 398}
395 399
396static void vxlan_fdb_notify(struct vxlan_dev *vxlan, struct vxlan_fdb *fdb, 400static int vxlan_fdb_notify(struct vxlan_dev *vxlan, struct vxlan_fdb *fdb,
397 struct vxlan_rdst *rd, int type, bool swdev_notify) 401 struct vxlan_rdst *rd, int type, bool swdev_notify,
402 struct netlink_ext_ack *extack)
398{ 403{
404 int err;
405
399 if (swdev_notify) { 406 if (swdev_notify) {
400 switch (type) { 407 switch (type) {
401 case RTM_NEWNEIGH: 408 case RTM_NEWNEIGH:
402 vxlan_fdb_switchdev_call_notifiers(vxlan, fdb, rd, 409 err = vxlan_fdb_switchdev_call_notifiers(vxlan, fdb, rd,
403 true); 410 true, extack);
411 if (err)
412 return err;
404 break; 413 break;
405 case RTM_DELNEIGH: 414 case RTM_DELNEIGH:
406 vxlan_fdb_switchdev_call_notifiers(vxlan, fdb, rd, 415 vxlan_fdb_switchdev_call_notifiers(vxlan, fdb, rd,
407 false); 416 false, extack);
408 break; 417 break;
409 } 418 }
410 } 419 }
411 420
412 __vxlan_fdb_notify(vxlan, fdb, rd, type); 421 __vxlan_fdb_notify(vxlan, fdb, rd, type);
422 return 0;
413} 423}
414 424
415static void vxlan_ip_miss(struct net_device *dev, union vxlan_addr *ipa) 425static void vxlan_ip_miss(struct net_device *dev, union vxlan_addr *ipa)
@@ -423,7 +433,7 @@ static void vxlan_ip_miss(struct net_device *dev, union vxlan_addr *ipa)
423 .remote_vni = cpu_to_be32(VXLAN_N_VID), 433 .remote_vni = cpu_to_be32(VXLAN_N_VID),
424 }; 434 };
425 435
426 vxlan_fdb_notify(vxlan, &f, &remote, RTM_GETNEIGH, true); 436 vxlan_fdb_notify(vxlan, &f, &remote, RTM_GETNEIGH, true, NULL);
427} 437}
428 438
429static void vxlan_fdb_miss(struct vxlan_dev *vxlan, const u8 eth_addr[ETH_ALEN]) 439static void vxlan_fdb_miss(struct vxlan_dev *vxlan, const u8 eth_addr[ETH_ALEN])
@@ -435,7 +445,7 @@ static void vxlan_fdb_miss(struct vxlan_dev *vxlan, const u8 eth_addr[ETH_ALEN])
435 445
436 memcpy(f.eth_addr, eth_addr, ETH_ALEN); 446 memcpy(f.eth_addr, eth_addr, ETH_ALEN);
437 447
438 vxlan_fdb_notify(vxlan, &f, &remote, RTM_GETNEIGH, true); 448 vxlan_fdb_notify(vxlan, &f, &remote, RTM_GETNEIGH, true, NULL);
439} 449}
440 450
441/* Hash Ethernet address */ 451/* Hash Ethernet address */
@@ -545,7 +555,7 @@ int vxlan_fdb_find_uc(struct net_device *dev, const u8 *mac, __be32 vni,
545 } 555 }
546 556
547 rdst = first_remote_rcu(f); 557 rdst = first_remote_rcu(f);
548 vxlan_fdb_switchdev_notifier_info(vxlan, f, rdst, fdb_info); 558 vxlan_fdb_switchdev_notifier_info(vxlan, f, rdst, NULL, fdb_info);
549 559
550out: 560out:
551 rcu_read_unlock(); 561 rcu_read_unlock();
@@ -556,19 +566,21 @@ EXPORT_SYMBOL_GPL(vxlan_fdb_find_uc);
556static int vxlan_fdb_notify_one(struct notifier_block *nb, 566static int vxlan_fdb_notify_one(struct notifier_block *nb,
557 const struct vxlan_dev *vxlan, 567 const struct vxlan_dev *vxlan,
558 const struct vxlan_fdb *f, 568 const struct vxlan_fdb *f,
559 const struct vxlan_rdst *rdst) 569 const struct vxlan_rdst *rdst,
570 struct netlink_ext_ack *extack)
560{ 571{
561 struct switchdev_notifier_vxlan_fdb_info fdb_info; 572 struct switchdev_notifier_vxlan_fdb_info fdb_info;
562 int rc; 573 int rc;
563 574
564 vxlan_fdb_switchdev_notifier_info(vxlan, f, rdst, &fdb_info); 575 vxlan_fdb_switchdev_notifier_info(vxlan, f, rdst, extack, &fdb_info);
565 rc = nb->notifier_call(nb, SWITCHDEV_VXLAN_FDB_ADD_TO_DEVICE, 576 rc = nb->notifier_call(nb, SWITCHDEV_VXLAN_FDB_ADD_TO_DEVICE,
566 &fdb_info); 577 &fdb_info);
567 return notifier_to_errno(rc); 578 return notifier_to_errno(rc);
568} 579}
569 580
570int vxlan_fdb_replay(const struct net_device *dev, __be32 vni, 581int vxlan_fdb_replay(const struct net_device *dev, __be32 vni,
571 struct notifier_block *nb) 582 struct notifier_block *nb,
583 struct netlink_ext_ack *extack)
572{ 584{
573 struct vxlan_dev *vxlan; 585 struct vxlan_dev *vxlan;
574 struct vxlan_rdst *rdst; 586 struct vxlan_rdst *rdst;
@@ -586,7 +598,8 @@ int vxlan_fdb_replay(const struct net_device *dev, __be32 vni,
586 if (f->vni == vni) { 598 if (f->vni == vni) {
587 list_for_each_entry(rdst, &f->remotes, list) { 599 list_for_each_entry(rdst, &f->remotes, list) {
588 rc = vxlan_fdb_notify_one(nb, vxlan, 600 rc = vxlan_fdb_notify_one(nb, vxlan,
589 f, rdst); 601 f, rdst,
602 extack);
590 if (rc) 603 if (rc)
591 goto out; 604 goto out;
592 } 605 }
@@ -625,7 +638,7 @@ EXPORT_SYMBOL_GPL(vxlan_fdb_clear_offload);
625/* Replace destination of unicast mac */ 638/* Replace destination of unicast mac */
626static int vxlan_fdb_replace(struct vxlan_fdb *f, 639static int vxlan_fdb_replace(struct vxlan_fdb *f,
627 union vxlan_addr *ip, __be16 port, __be32 vni, 640 union vxlan_addr *ip, __be16 port, __be32 vni,
628 __u32 ifindex) 641 __u32 ifindex, struct vxlan_rdst *oldrd)
629{ 642{
630 struct vxlan_rdst *rd; 643 struct vxlan_rdst *rd;
631 644
@@ -637,6 +650,7 @@ static int vxlan_fdb_replace(struct vxlan_fdb *f,
637 if (!rd) 650 if (!rd)
638 return 0; 651 return 0;
639 652
653 *oldrd = *rd;
640 dst_cache_reset(&rd->dst_cache); 654 dst_cache_reset(&rd->dst_cache);
641 rd->remote_ip = *ip; 655 rd->remote_ip = *ip;
642 rd->remote_port = port; 656 rd->remote_port = port;
@@ -826,120 +840,177 @@ static int vxlan_fdb_create(struct vxlan_dev *vxlan,
826 return 0; 840 return 0;
827} 841}
828 842
829/* Add new entry to forwarding table -- assumes lock held */ 843static void vxlan_fdb_free(struct rcu_head *head)
830static int vxlan_fdb_update(struct vxlan_dev *vxlan, 844{
831 const u8 *mac, union vxlan_addr *ip, 845 struct vxlan_fdb *f = container_of(head, struct vxlan_fdb, rcu);
832 __u16 state, __u16 flags, 846 struct vxlan_rdst *rd, *nd;
833 __be16 port, __be32 src_vni, __be32 vni, 847
834 __u32 ifindex, __u16 ndm_flags, 848 list_for_each_entry_safe(rd, nd, &f->remotes, list) {
835 bool swdev_notify) 849 dst_cache_destroy(&rd->dst_cache);
850 kfree(rd);
851 }
852 kfree(f);
853}
854
855static void vxlan_fdb_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f,
856 bool do_notify, bool swdev_notify)
857{
858 struct vxlan_rdst *rd;
859
860 netdev_dbg(vxlan->dev, "delete %pM\n", f->eth_addr);
861
862 --vxlan->addrcnt;
863 if (do_notify)
864 list_for_each_entry(rd, &f->remotes, list)
865 vxlan_fdb_notify(vxlan, f, rd, RTM_DELNEIGH,
866 swdev_notify, NULL);
867
868 hlist_del_rcu(&f->hlist);
869 call_rcu(&f->rcu, vxlan_fdb_free);
870}
871
872static int vxlan_fdb_update_existing(struct vxlan_dev *vxlan,
873 union vxlan_addr *ip,
874 __u16 state, __u16 flags,
875 __be16 port, __be32 vni,
876 __u32 ifindex, __u16 ndm_flags,
877 struct vxlan_fdb *f,
878 bool swdev_notify,
879 struct netlink_ext_ack *extack)
836{ 880{
837 __u16 fdb_flags = (ndm_flags & ~NTF_USE); 881 __u16 fdb_flags = (ndm_flags & ~NTF_USE);
838 struct vxlan_rdst *rd = NULL; 882 struct vxlan_rdst *rd = NULL;
839 struct vxlan_fdb *f; 883 struct vxlan_rdst oldrd;
840 int notify = 0; 884 int notify = 0;
841 int rc; 885 int rc = 0;
842 886 int err;
843 f = __vxlan_find_mac(vxlan, mac, src_vni);
844 if (f) {
845 if (flags & NLM_F_EXCL) {
846 netdev_dbg(vxlan->dev,
847 "lost race to create %pM\n", mac);
848 return -EEXIST;
849 }
850 887
851 /* Do not allow an externally learned entry to take over an 888 /* Do not allow an externally learned entry to take over an entry added
852 * entry added by the user. 889 * by the user.
853 */ 890 */
854 if (!(fdb_flags & NTF_EXT_LEARNED) || 891 if (!(fdb_flags & NTF_EXT_LEARNED) ||
855 !(f->flags & NTF_VXLAN_ADDED_BY_USER)) { 892 !(f->flags & NTF_VXLAN_ADDED_BY_USER)) {
856 if (f->state != state) { 893 if (f->state != state) {
857 f->state = state; 894 f->state = state;
858 f->updated = jiffies; 895 f->updated = jiffies;
859 notify = 1; 896 notify = 1;
860 }
861 if (f->flags != fdb_flags) {
862 f->flags = fdb_flags;
863 f->updated = jiffies;
864 notify = 1;
865 }
866 } 897 }
867 898 if (f->flags != fdb_flags) {
868 if ((flags & NLM_F_REPLACE)) { 899 f->flags = fdb_flags;
869 /* Only change unicasts */ 900 f->updated = jiffies;
870 if (!(is_multicast_ether_addr(f->eth_addr) || 901 notify = 1;
871 is_zero_ether_addr(f->eth_addr))) {
872 notify |= vxlan_fdb_replace(f, ip, port, vni,
873 ifindex);
874 } else
875 return -EOPNOTSUPP;
876 } 902 }
877 if ((flags & NLM_F_APPEND) && 903 }
878 (is_multicast_ether_addr(f->eth_addr) ||
879 is_zero_ether_addr(f->eth_addr))) {
880 rc = vxlan_fdb_append(f, ip, port, vni, ifindex, &rd);
881 904
882 if (rc < 0) 905 if ((flags & NLM_F_REPLACE)) {
883 return rc; 906 /* Only change unicasts */
907 if (!(is_multicast_ether_addr(f->eth_addr) ||
908 is_zero_ether_addr(f->eth_addr))) {
909 rc = vxlan_fdb_replace(f, ip, port, vni,
910 ifindex, &oldrd);
884 notify |= rc; 911 notify |= rc;
885 } 912 } else {
886
887 if (ndm_flags & NTF_USE)
888 f->used = jiffies;
889 } else {
890 if (!(flags & NLM_F_CREATE))
891 return -ENOENT;
892
893 /* Disallow replace to add a multicast entry */
894 if ((flags & NLM_F_REPLACE) &&
895 (is_multicast_ether_addr(mac) || is_zero_ether_addr(mac)))
896 return -EOPNOTSUPP; 913 return -EOPNOTSUPP;
914 }
915 }
916 if ((flags & NLM_F_APPEND) &&
917 (is_multicast_ether_addr(f->eth_addr) ||
918 is_zero_ether_addr(f->eth_addr))) {
919 rc = vxlan_fdb_append(f, ip, port, vni, ifindex, &rd);
897 920
898 netdev_dbg(vxlan->dev, "add %pM -> %pIS\n", mac, ip);
899 rc = vxlan_fdb_create(vxlan, mac, ip, state, port, src_vni,
900 vni, ifindex, fdb_flags, &f);
901 if (rc < 0) 921 if (rc < 0)
902 return rc; 922 return rc;
903 notify = 1; 923 notify |= rc;
904 } 924 }
905 925
926 if (ndm_flags & NTF_USE)
927 f->used = jiffies;
928
906 if (notify) { 929 if (notify) {
907 if (rd == NULL) 930 if (rd == NULL)
908 rd = first_remote_rtnl(f); 931 rd = first_remote_rtnl(f);
909 vxlan_fdb_notify(vxlan, f, rd, RTM_NEWNEIGH, swdev_notify); 932
933 err = vxlan_fdb_notify(vxlan, f, rd, RTM_NEWNEIGH,
934 swdev_notify, extack);
935 if (err)
936 goto err_notify;
910 } 937 }
911 938
912 return 0; 939 return 0;
940
941err_notify:
942 if ((flags & NLM_F_REPLACE) && rc)
943 *rd = oldrd;
944 else if ((flags & NLM_F_APPEND) && rc)
945 list_del_rcu(&rd->list);
946 return err;
913} 947}
914 948
915static void vxlan_fdb_free(struct rcu_head *head) 949static int vxlan_fdb_update_create(struct vxlan_dev *vxlan,
950 const u8 *mac, union vxlan_addr *ip,
951 __u16 state, __u16 flags,
952 __be16 port, __be32 src_vni, __be32 vni,
953 __u32 ifindex, __u16 ndm_flags,
954 bool swdev_notify,
955 struct netlink_ext_ack *extack)
916{ 956{
917 struct vxlan_fdb *f = container_of(head, struct vxlan_fdb, rcu); 957 __u16 fdb_flags = (ndm_flags & ~NTF_USE);
918 struct vxlan_rdst *rd, *nd; 958 struct vxlan_fdb *f;
959 int rc;
919 960
920 list_for_each_entry_safe(rd, nd, &f->remotes, list) { 961 /* Disallow replace to add a multicast entry */
921 dst_cache_destroy(&rd->dst_cache); 962 if ((flags & NLM_F_REPLACE) &&
922 kfree(rd); 963 (is_multicast_ether_addr(mac) || is_zero_ether_addr(mac)))
923 } 964 return -EOPNOTSUPP;
924 kfree(f); 965
966 netdev_dbg(vxlan->dev, "add %pM -> %pIS\n", mac, ip);
967 rc = vxlan_fdb_create(vxlan, mac, ip, state, port, src_vni,
968 vni, ifindex, fdb_flags, &f);
969 if (rc < 0)
970 return rc;
971
972 rc = vxlan_fdb_notify(vxlan, f, first_remote_rtnl(f), RTM_NEWNEIGH,
973 swdev_notify, extack);
974 if (rc)
975 goto err_notify;
976
977 return 0;
978
979err_notify:
980 vxlan_fdb_destroy(vxlan, f, false, false);
981 return rc;
925} 982}
926 983
927static void vxlan_fdb_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f, 984/* Add new entry to forwarding table -- assumes lock held */
928 bool do_notify, bool swdev_notify) 985static int vxlan_fdb_update(struct vxlan_dev *vxlan,
986 const u8 *mac, union vxlan_addr *ip,
987 __u16 state, __u16 flags,
988 __be16 port, __be32 src_vni, __be32 vni,
989 __u32 ifindex, __u16 ndm_flags,
990 bool swdev_notify,
991 struct netlink_ext_ack *extack)
929{ 992{
930 struct vxlan_rdst *rd; 993 struct vxlan_fdb *f;
931 994
932 netdev_dbg(vxlan->dev, 995 f = __vxlan_find_mac(vxlan, mac, src_vni);
933 "delete %pM\n", f->eth_addr); 996 if (f) {
997 if (flags & NLM_F_EXCL) {
998 netdev_dbg(vxlan->dev,
999 "lost race to create %pM\n", mac);
1000 return -EEXIST;
1001 }
934 1002
935 --vxlan->addrcnt; 1003 return vxlan_fdb_update_existing(vxlan, ip, state, flags, port,
936 if (do_notify) 1004 vni, ifindex, ndm_flags, f,
937 list_for_each_entry(rd, &f->remotes, list) 1005 swdev_notify, extack);
938 vxlan_fdb_notify(vxlan, f, rd, RTM_DELNEIGH, 1006 } else {
939 swdev_notify); 1007 if (!(flags & NLM_F_CREATE))
1008 return -ENOENT;
940 1009
941 hlist_del_rcu(&f->hlist); 1010 return vxlan_fdb_update_create(vxlan, mac, ip, state, flags,
942 call_rcu(&f->rcu, vxlan_fdb_free); 1011 port, src_vni, vni, ifindex,
1012 ndm_flags, swdev_notify, extack);
1013 }
943} 1014}
944 1015
945static void vxlan_dst_free(struct rcu_head *head) 1016static void vxlan_dst_free(struct rcu_head *head)
@@ -954,7 +1025,7 @@ static void vxlan_fdb_dst_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f,
954 struct vxlan_rdst *rd, bool swdev_notify) 1025 struct vxlan_rdst *rd, bool swdev_notify)
955{ 1026{
956 list_del_rcu(&rd->list); 1027 list_del_rcu(&rd->list);
957 vxlan_fdb_notify(vxlan, f, rd, RTM_DELNEIGH, swdev_notify); 1028 vxlan_fdb_notify(vxlan, f, rd, RTM_DELNEIGH, swdev_notify, NULL);
958 call_rcu(&rd->rcu, vxlan_dst_free); 1029 call_rcu(&rd->rcu, vxlan_dst_free);
959} 1030}
960 1031
@@ -1025,7 +1096,8 @@ static int vxlan_fdb_parse(struct nlattr *tb[], struct vxlan_dev *vxlan,
1025/* Add static entry (via netlink) */ 1096/* Add static entry (via netlink) */
1026static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], 1097static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
1027 struct net_device *dev, 1098 struct net_device *dev,
1028 const unsigned char *addr, u16 vid, u16 flags) 1099 const unsigned char *addr, u16 vid, u16 flags,
1100 struct netlink_ext_ack *extack)
1029{ 1101{
1030 struct vxlan_dev *vxlan = netdev_priv(dev); 1102 struct vxlan_dev *vxlan = netdev_priv(dev);
1031 /* struct net *net = dev_net(vxlan->dev); */ 1103 /* struct net *net = dev_net(vxlan->dev); */
@@ -1055,7 +1127,7 @@ static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
1055 err = vxlan_fdb_update(vxlan, addr, &ip, ndm->ndm_state, flags, 1127 err = vxlan_fdb_update(vxlan, addr, &ip, ndm->ndm_state, flags,
1056 port, src_vni, vni, ifindex, 1128 port, src_vni, vni, ifindex,
1057 ndm->ndm_flags | NTF_VXLAN_ADDED_BY_USER, 1129 ndm->ndm_flags | NTF_VXLAN_ADDED_BY_USER,
1058 true); 1130 true, extack);
1059 spin_unlock_bh(&vxlan->hash_lock); 1131 spin_unlock_bh(&vxlan->hash_lock);
1060 1132
1061 return err; 1133 return err;
@@ -1223,7 +1295,7 @@ static bool vxlan_snoop(struct net_device *dev,
1223 1295
1224 rdst->remote_ip = *src_ip; 1296 rdst->remote_ip = *src_ip;
1225 f->updated = jiffies; 1297 f->updated = jiffies;
1226 vxlan_fdb_notify(vxlan, f, rdst, RTM_NEWNEIGH, true); 1298 vxlan_fdb_notify(vxlan, f, rdst, RTM_NEWNEIGH, true, NULL);
1227 } else { 1299 } else {
1228 /* learned new entry */ 1300 /* learned new entry */
1229 spin_lock(&vxlan->hash_lock); 1301 spin_lock(&vxlan->hash_lock);
@@ -1236,7 +1308,7 @@ static bool vxlan_snoop(struct net_device *dev,
1236 vxlan->cfg.dst_port, 1308 vxlan->cfg.dst_port,
1237 vni, 1309 vni,
1238 vxlan->default_dst.remote_vni, 1310 vxlan->default_dst.remote_vni,
1239 ifindex, NTF_SELF, true); 1311 ifindex, NTF_SELF, true, NULL);
1240 spin_unlock(&vxlan->hash_lock); 1312 spin_unlock(&vxlan->hash_lock);
1241 } 1313 }
1242 1314
@@ -3478,9 +3550,12 @@ static int __vxlan_dev_create(struct net *net, struct net_device *dev,
3478 goto errout; 3550 goto errout;
3479 3551
3480 /* notify default fdb entry */ 3552 /* notify default fdb entry */
3481 if (f) 3553 if (f) {
3482 vxlan_fdb_notify(vxlan, f, first_remote_rtnl(f), RTM_NEWNEIGH, 3554 err = vxlan_fdb_notify(vxlan, f, first_remote_rtnl(f),
3483 true); 3555 RTM_NEWNEIGH, true, extack);
3556 if (err)
3557 goto errout;
3558 }
3484 3559
3485 list_add(&vxlan->next, &vn->vxlan_list); 3560 list_add(&vxlan->next, &vn->vxlan_list);
3486 return 0; 3561 return 0;
@@ -3727,8 +3802,7 @@ static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[],
3727{ 3802{
3728 struct vxlan_dev *vxlan = netdev_priv(dev); 3803 struct vxlan_dev *vxlan = netdev_priv(dev);
3729 struct vxlan_rdst *dst = &vxlan->default_dst; 3804 struct vxlan_rdst *dst = &vxlan->default_dst;
3730 unsigned long old_age_interval; 3805 struct net_device *lowerdev;
3731 struct vxlan_rdst old_dst;
3732 struct vxlan_config conf; 3806 struct vxlan_config conf;
3733 int err; 3807 int err;
3734 3808
@@ -3737,46 +3811,43 @@ static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[],
3737 if (err) 3811 if (err)
3738 return err; 3812 return err;
3739 3813
3740 old_age_interval = vxlan->cfg.age_interval; 3814 err = vxlan_config_validate(vxlan->net, &conf, &lowerdev,
3741 memcpy(&old_dst, dst, sizeof(struct vxlan_rdst)); 3815 vxlan, extack);
3742
3743 err = vxlan_dev_configure(vxlan->net, dev, &conf, true, extack);
3744 if (err) 3816 if (err)
3745 return err; 3817 return err;
3746 3818
3747 if (old_age_interval != vxlan->cfg.age_interval)
3748 mod_timer(&vxlan->age_timer, jiffies);
3749
3750 /* handle default dst entry */ 3819 /* handle default dst entry */
3751 if (!vxlan_addr_equal(&dst->remote_ip, &old_dst.remote_ip)) { 3820 if (!vxlan_addr_equal(&conf.remote_ip, &dst->remote_ip)) {
3752 spin_lock_bh(&vxlan->hash_lock); 3821 spin_lock_bh(&vxlan->hash_lock);
3753 if (!vxlan_addr_any(&old_dst.remote_ip)) 3822 if (!vxlan_addr_any(&conf.remote_ip)) {
3754 __vxlan_fdb_delete(vxlan, all_zeros_mac,
3755 old_dst.remote_ip,
3756 vxlan->cfg.dst_port,
3757 old_dst.remote_vni,
3758 old_dst.remote_vni,
3759 old_dst.remote_ifindex,
3760 true);
3761
3762 if (!vxlan_addr_any(&dst->remote_ip)) {
3763 err = vxlan_fdb_update(vxlan, all_zeros_mac, 3823 err = vxlan_fdb_update(vxlan, all_zeros_mac,
3764 &dst->remote_ip, 3824 &conf.remote_ip,
3765 NUD_REACHABLE | NUD_PERMANENT, 3825 NUD_REACHABLE | NUD_PERMANENT,
3766 NLM_F_APPEND | NLM_F_CREATE, 3826 NLM_F_APPEND | NLM_F_CREATE,
3767 vxlan->cfg.dst_port, 3827 vxlan->cfg.dst_port,
3768 dst->remote_vni, 3828 conf.vni, conf.vni,
3769 dst->remote_vni, 3829 conf.remote_ifindex,
3770 dst->remote_ifindex, 3830 NTF_SELF, true, extack);
3771 NTF_SELF, true);
3772 if (err) { 3831 if (err) {
3773 spin_unlock_bh(&vxlan->hash_lock); 3832 spin_unlock_bh(&vxlan->hash_lock);
3774 return err; 3833 return err;
3775 } 3834 }
3776 } 3835 }
3836 if (!vxlan_addr_any(&dst->remote_ip))
3837 __vxlan_fdb_delete(vxlan, all_zeros_mac,
3838 dst->remote_ip,
3839 vxlan->cfg.dst_port,
3840 dst->remote_vni,
3841 dst->remote_vni,
3842 dst->remote_ifindex,
3843 true);
3777 spin_unlock_bh(&vxlan->hash_lock); 3844 spin_unlock_bh(&vxlan->hash_lock);
3778 } 3845 }
3779 3846
3847 if (conf.age_interval != vxlan->cfg.age_interval)
3848 mod_timer(&vxlan->age_timer, jiffies);
3849
3850 vxlan_config_apply(dev, &conf, lowerdev, vxlan->net, true);
3780 return 0; 3851 return 0;
3781} 3852}
3782 3853
@@ -4051,8 +4122,11 @@ vxlan_fdb_external_learn_add(struct net_device *dev,
4051 struct switchdev_notifier_vxlan_fdb_info *fdb_info) 4122 struct switchdev_notifier_vxlan_fdb_info *fdb_info)
4052{ 4123{
4053 struct vxlan_dev *vxlan = netdev_priv(dev); 4124 struct vxlan_dev *vxlan = netdev_priv(dev);
4125 struct netlink_ext_ack *extack;
4054 int err; 4126 int err;
4055 4127
4128 extack = switchdev_notifier_info_to_extack(&fdb_info->info);
4129
4056 spin_lock_bh(&vxlan->hash_lock); 4130 spin_lock_bh(&vxlan->hash_lock);
4057 err = vxlan_fdb_update(vxlan, fdb_info->eth_addr, &fdb_info->remote_ip, 4131 err = vxlan_fdb_update(vxlan, fdb_info->eth_addr, &fdb_info->remote_ip,
4058 NUD_REACHABLE, 4132 NUD_REACHABLE,
@@ -4062,7 +4136,7 @@ vxlan_fdb_external_learn_add(struct net_device *dev,
4062 fdb_info->remote_vni, 4136 fdb_info->remote_vni,
4063 fdb_info->remote_ifindex, 4137 fdb_info->remote_ifindex,
4064 NTF_USE | NTF_SELF | NTF_EXT_LEARNED, 4138 NTF_USE | NTF_SELF | NTF_EXT_LEARNED,
4065 false); 4139 false, extack);
4066 spin_unlock_bh(&vxlan->hash_lock); 4140 spin_unlock_bh(&vxlan->hash_lock);
4067 4141
4068 return err; 4142 return err;
diff --git a/drivers/net/wan/dscc4.c b/drivers/net/wan/dscc4.c
index c0b0f525c87c..d5dc823f781e 100644
--- a/drivers/net/wan/dscc4.c
+++ b/drivers/net/wan/dscc4.c
@@ -456,16 +456,16 @@ static int state_check(u32 state, struct dscc4_dev_priv *dpriv,
456 int ret = 0; 456 int ret = 0;
457 457
458 if (debug > 1) { 458 if (debug > 1) {
459 if (SOURCE_ID(state) != dpriv->dev_id) { 459 if (SOURCE_ID(state) != dpriv->dev_id) {
460 printk(KERN_DEBUG "%s (%s): Source Id=%d, state=%08x\n", 460 printk(KERN_DEBUG "%s (%s): Source Id=%d, state=%08x\n",
461 dev->name, msg, SOURCE_ID(state), state ); 461 dev->name, msg, SOURCE_ID(state), state);
462 ret = -1; 462 ret = -1;
463 } 463 }
464 if (state & 0x0df80c00) { 464 if (state & 0x0df80c00) {
465 printk(KERN_DEBUG "%s (%s): state=%08x (UFO alert)\n", 465 printk(KERN_DEBUG "%s (%s): state=%08x (UFO alert)\n",
466 dev->name, msg, state); 466 dev->name, msg, state);
467 ret = -1; 467 ret = -1;
468 } 468 }
469 } 469 }
470 return ret; 470 return ret;
471} 471}
@@ -1760,25 +1760,25 @@ try:
1760 } else { /* SccEvt */ 1760 } else { /* SccEvt */
1761 if (debug > 1) { 1761 if (debug > 1) {
1762 //FIXME: verifier la presence de tous les evenements 1762 //FIXME: verifier la presence de tous les evenements
1763 static struct { 1763 static struct {
1764 u32 mask; 1764 u32 mask;
1765 const char *irq_name; 1765 const char *irq_name;
1766 } evts[] = { 1766 } evts[] = {
1767 { 0x00008000, "TIN"}, 1767 { 0x00008000, "TIN"},
1768 { 0x00000020, "RSC"}, 1768 { 0x00000020, "RSC"},
1769 { 0x00000010, "PCE"}, 1769 { 0x00000010, "PCE"},
1770 { 0x00000008, "PLLA"}, 1770 { 0x00000008, "PLLA"},
1771 { 0, NULL} 1771 { 0, NULL}
1772 }, *evt; 1772 }, *evt;
1773 1773
1774 for (evt = evts; evt->irq_name; evt++) { 1774 for (evt = evts; evt->irq_name; evt++) {
1775 if (state & evt->mask) { 1775 if (state & evt->mask) {
1776 printk(KERN_DEBUG "%s: %s\n", 1776 printk(KERN_DEBUG "%s: %s\n",
1777 dev->name, evt->irq_name); 1777 dev->name, evt->irq_name);
1778 if (!(state &= ~evt->mask)) 1778 if (!(state &= ~evt->mask))
1779 goto try; 1779 goto try;
1780 }
1780 } 1781 }
1781 }
1782 } else { 1782 } else {
1783 if (!(state &= ~0x0000c03c)) 1783 if (!(state &= ~0x0000c03c))
1784 goto try; 1784 goto try;
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 1377d085ef99..a57b9a853aab 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1152,7 +1152,8 @@ struct dev_ifalias {
1152 * 1152 *
1153 * int (*ndo_fdb_add)(struct ndmsg *ndm, struct nlattr *tb[], 1153 * int (*ndo_fdb_add)(struct ndmsg *ndm, struct nlattr *tb[],
1154 * struct net_device *dev, 1154 * struct net_device *dev,
1155 * const unsigned char *addr, u16 vid, u16 flags) 1155 * const unsigned char *addr, u16 vid, u16 flags,
1156 * struct netlink_ext_ack *extack);
1156 * Adds an FDB entry to dev for addr. 1157 * Adds an FDB entry to dev for addr.
1157 * int (*ndo_fdb_del)(struct ndmsg *ndm, struct nlattr *tb[], 1158 * int (*ndo_fdb_del)(struct ndmsg *ndm, struct nlattr *tb[],
1158 * struct net_device *dev, 1159 * struct net_device *dev,
@@ -1376,7 +1377,8 @@ struct net_device_ops {
1376 struct net_device *dev, 1377 struct net_device *dev,
1377 const unsigned char *addr, 1378 const unsigned char *addr,
1378 u16 vid, 1379 u16 vid,
1379 u16 flags); 1380 u16 flags,
1381 struct netlink_ext_ack *extack);
1380 int (*ndo_fdb_del)(struct ndmsg *ndm, 1382 int (*ndo_fdb_del)(struct ndmsg *ndm,
1381 struct nlattr *tb[], 1383 struct nlattr *tb[],
1382 struct net_device *dev, 1384 struct net_device *dev,
diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index 4e8add270200..593d1b9c33a8 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -126,6 +126,7 @@ void __netlink_clear_multicast_users(struct sock *sk, unsigned int group);
126void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err, 126void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err,
127 const struct netlink_ext_ack *extack); 127 const struct netlink_ext_ack *extack);
128int netlink_has_listeners(struct sock *sk, unsigned int group); 128int netlink_has_listeners(struct sock *sk, unsigned int group);
129bool netlink_strict_get_check(struct sk_buff *skb);
129 130
130int netlink_unicast(struct sock *ssk, struct sk_buff *skb, __u32 portid, int nonblock); 131int netlink_unicast(struct sock *ssk, struct sk_buff *skb, __u32 portid, int nonblock);
131int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, __u32 portid, 132int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, __u32 portid,
diff --git a/include/linux/phy.h b/include/linux/phy.h
index ef20aeea10cc..1f3873a2ff29 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -306,11 +306,6 @@ struct phy_device *mdiobus_scan(struct mii_bus *bus, int addr);
306 * - irq or timer will set NOLINK if link goes down 306 * - irq or timer will set NOLINK if link goes down
307 * - phy_stop moves to HALTED 307 * - phy_stop moves to HALTED
308 * 308 *
309 * CHANGELINK: PHY experienced a change in link state
310 * - timer moves to RUNNING if link
311 * - timer moves to NOLINK if the link is down
312 * - phy_stop moves to HALTED
313 *
314 * HALTED: PHY is up, but no polling or interrupts are done. Or 309 * HALTED: PHY is up, but no polling or interrupts are done. Or
315 * PHY is in an error state. 310 * PHY is in an error state.
316 * 311 *
@@ -329,7 +324,6 @@ enum phy_state {
329 PHY_RUNNING, 324 PHY_RUNNING,
330 PHY_NOLINK, 325 PHY_NOLINK,
331 PHY_FORCING, 326 PHY_FORCING,
332 PHY_CHANGELINK,
333 PHY_RESUMING 327 PHY_RESUMING
334}; 328};
335 329
@@ -959,7 +953,6 @@ int phy_aneg_done(struct phy_device *phydev);
959int phy_speed_down(struct phy_device *phydev, bool sync); 953int phy_speed_down(struct phy_device *phydev, bool sync);
960int phy_speed_up(struct phy_device *phydev); 954int phy_speed_up(struct phy_device *phydev);
961 955
962int phy_stop_interrupts(struct phy_device *phydev);
963int phy_restart_aneg(struct phy_device *phydev); 956int phy_restart_aneg(struct phy_device *phydev);
964int phy_reset_after_clk_enable(struct phy_device *phydev); 957int phy_reset_after_clk_enable(struct phy_device *phydev);
965 958
@@ -1185,4 +1178,7 @@ module_exit(phy_module_exit)
1185#define module_phy_driver(__phy_drivers) \ 1178#define module_phy_driver(__phy_drivers) \
1186 phy_module_driver(__phy_drivers, ARRAY_SIZE(__phy_drivers)) 1179 phy_module_driver(__phy_drivers, ARRAY_SIZE(__phy_drivers))
1187 1180
1181bool phy_driver_is_genphy(struct phy_device *phydev);
1182bool phy_driver_is_genphy_10g(struct phy_device *phydev);
1183
1188#endif /* __PHY_H */ 1184#endif /* __PHY_H */
diff --git a/include/linux/platform_data/b53.h b/include/linux/platform_data/b53.h
index 8eaef2f2b691..c3b61ead41f2 100644
--- a/include/linux/platform_data/b53.h
+++ b/include/linux/platform_data/b53.h
@@ -20,7 +20,7 @@
20#define __B53_H 20#define __B53_H
21 21
22#include <linux/kernel.h> 22#include <linux/kernel.h>
23#include <net/dsa.h> 23#include <linux/platform_data/dsa.h>
24 24
25struct b53_platform_data { 25struct b53_platform_data {
26 /* Must be first such that dsa_register_switch() can access it */ 26 /* Must be first such that dsa_register_switch() can access it */
diff --git a/include/linux/platform_data/dsa.h b/include/linux/platform_data/dsa.h
new file mode 100644
index 000000000000..d4d9bf2060a6
--- /dev/null
+++ b/include/linux/platform_data/dsa.h
@@ -0,0 +1,68 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2#ifndef __DSA_PDATA_H
3#define __DSA_PDATA_H
4
5struct device;
6struct net_device;
7
8#define DSA_MAX_SWITCHES 4
9#define DSA_MAX_PORTS 12
10#define DSA_RTABLE_NONE -1
11
12struct dsa_chip_data {
13 /*
14 * How to access the switch configuration registers.
15 */
16 struct device *host_dev;
17 int sw_addr;
18
19 /*
20 * Reference to network devices
21 */
22 struct device *netdev[DSA_MAX_PORTS];
23
24 /* set to size of eeprom if supported by the switch */
25 int eeprom_len;
26
27 /* Device tree node pointer for this specific switch chip
28 * used during switch setup in case additional properties
29 * and resources needs to be used
30 */
31 struct device_node *of_node;
32
33 /*
34 * The names of the switch's ports. Use "cpu" to
35 * designate the switch port that the cpu is connected to,
36 * "dsa" to indicate that this port is a DSA link to
37 * another switch, NULL to indicate the port is unused,
38 * or any other string to indicate this is a physical port.
39 */
40 char *port_names[DSA_MAX_PORTS];
41 struct device_node *port_dn[DSA_MAX_PORTS];
42
43 /*
44 * An array of which element [a] indicates which port on this
45 * switch should be used to send packets to that are destined
46 * for switch a. Can be NULL if there is only one switch chip.
47 */
48 s8 rtable[DSA_MAX_SWITCHES];
49};
50
51struct dsa_platform_data {
52 /*
53 * Reference to a Linux network interface that connects
54 * to the root switch chip of the tree.
55 */
56 struct device *netdev;
57 struct net_device *of_netdev;
58
59 /*
60 * Info structs describing each of the switch chips
61 * connected via this network interface.
62 */
63 int nr_chips;
64 struct dsa_chip_data *chip;
65};
66
67
68#endif /* __DSA_PDATA_H */
diff --git a/include/linux/platform_data/mv88e6xxx.h b/include/linux/platform_data/mv88e6xxx.h
index f63af2955ea0..963730b44aea 100644
--- a/include/linux/platform_data/mv88e6xxx.h
+++ b/include/linux/platform_data/mv88e6xxx.h
@@ -2,7 +2,7 @@
2#ifndef __DSA_MV88E6XXX_H 2#ifndef __DSA_MV88E6XXX_H
3#define __DSA_MV88E6XXX_H 3#define __DSA_MV88E6XXX_H
4 4
5#include <net/dsa.h> 5#include <linux/platform_data/dsa.h>
6 6
7struct dsa_mv88e6xxx_pdata { 7struct dsa_mv88e6xxx_pdata {
8 /* Must be first, such that dsa_register_switch() can access this 8 /* Must be first, such that dsa_register_switch() can access this
diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h
index 91c536a01b56..5f818fda96bd 100644
--- a/include/linux/qed/qed_if.h
+++ b/include/linux/qed/qed_if.h
@@ -38,7 +38,6 @@
38#include <linux/netdevice.h> 38#include <linux/netdevice.h>
39#include <linux/pci.h> 39#include <linux/pci.h>
40#include <linux/skbuff.h> 40#include <linux/skbuff.h>
41#include <linux/types.h>
42#include <asm/byteorder.h> 41#include <asm/byteorder.h>
43#include <linux/io.h> 42#include <linux/io.h>
44#include <linux/compiler.h> 43#include <linux/compiler.h>
diff --git a/include/net/devlink.h b/include/net/devlink.h
index 67f4293bc970..a81a1b7a67d7 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -30,6 +30,7 @@ struct devlink {
30 struct list_head param_list; 30 struct list_head param_list;
31 struct list_head region_list; 31 struct list_head region_list;
32 u32 snapshot_id; 32 u32 snapshot_id;
33 struct list_head reporter_list;
33 struct devlink_dpipe_headers *dpipe_headers; 34 struct devlink_dpipe_headers *dpipe_headers;
34 const struct devlink_ops *ops; 35 const struct devlink_ops *ops;
35 struct device *dev; 36 struct device *dev;
@@ -423,6 +424,36 @@ struct devlink_region;
423 424
424typedef void devlink_snapshot_data_dest_t(const void *data); 425typedef void devlink_snapshot_data_dest_t(const void *data);
425 426
427struct devlink_health_buffer;
428struct devlink_health_reporter;
429
430/**
431 * struct devlink_health_reporter_ops - Reporter operations
432 * @name: reporter name
433 * dump_size: dump buffer size allocated by the devlink
434 * diagnose_size: diagnose buffer size allocated by the devlink
435 * recover: callback to recover from reported error
436 * if priv_ctx is NULL, run a full recover
437 * dump: callback to dump an object
438 * if priv_ctx is NULL, run a full dump
439 * diagnose: callback to diagnose the current status
440 */
441
442struct devlink_health_reporter_ops {
443 char *name;
444 unsigned int dump_size;
445 unsigned int diagnose_size;
446 int (*recover)(struct devlink_health_reporter *reporter,
447 void *priv_ctx);
448 int (*dump)(struct devlink_health_reporter *reporter,
449 struct devlink_health_buffer **buffers_array,
450 unsigned int buffer_size, unsigned int num_buffers,
451 void *priv_ctx);
452 int (*diagnose)(struct devlink_health_reporter *reporter,
453 struct devlink_health_buffer **buffers_array,
454 unsigned int buffer_size, unsigned int num_buffers);
455};
456
426struct devlink_ops { 457struct devlink_ops {
427 int (*reload)(struct devlink *devlink, struct netlink_ext_ack *extack); 458 int (*reload)(struct devlink *devlink, struct netlink_ext_ack *extack);
428 int (*port_type_set)(struct devlink_port *devlink_port, 459 int (*port_type_set)(struct devlink_port *devlink_port,
@@ -584,6 +615,34 @@ int devlink_region_snapshot_create(struct devlink_region *region, u64 data_len,
584 u8 *data, u32 snapshot_id, 615 u8 *data, u32 snapshot_id,
585 devlink_snapshot_data_dest_t *data_destructor); 616 devlink_snapshot_data_dest_t *data_destructor);
586 617
618int devlink_health_buffer_nest_start(struct devlink_health_buffer *buffer,
619 int attrtype);
620void devlink_health_buffer_nest_end(struct devlink_health_buffer *buffer);
621void devlink_health_buffer_nest_cancel(struct devlink_health_buffer *buffer);
622int devlink_health_buffer_put_object_name(struct devlink_health_buffer *buffer,
623 char *name);
624int devlink_health_buffer_put_value_u8(struct devlink_health_buffer *buffer,
625 u8 value);
626int devlink_health_buffer_put_value_u32(struct devlink_health_buffer *buffer,
627 u32 value);
628int devlink_health_buffer_put_value_u64(struct devlink_health_buffer *buffer,
629 u64 value);
630int devlink_health_buffer_put_value_string(struct devlink_health_buffer *buffer,
631 char *name);
632int devlink_health_buffer_put_value_data(struct devlink_health_buffer *buffer,
633 void *data, int len);
634struct devlink_health_reporter *
635devlink_health_reporter_create(struct devlink *devlink,
636 const struct devlink_health_reporter_ops *ops,
637 u64 graceful_period, bool auto_recover,
638 void *priv);
639void
640devlink_health_reporter_destroy(struct devlink_health_reporter *reporter);
641
642void *
643devlink_health_reporter_priv(struct devlink_health_reporter *reporter);
644int devlink_health_report(struct devlink_health_reporter *reporter,
645 const char *msg, void *priv_ctx);
587#else 646#else
588 647
589static inline struct devlink *devlink_alloc(const struct devlink_ops *ops, 648static inline struct devlink *devlink_alloc(const struct devlink_ops *ops,
@@ -844,6 +903,91 @@ devlink_region_snapshot_create(struct devlink_region *region, u64 data_len,
844 return 0; 903 return 0;
845} 904}
846 905
906static inline int
907devlink_health_buffer_nest_start(struct devlink_health_buffer *buffer,
908 int attrtype)
909{
910 return 0;
911}
912
913static inline void
914devlink_health_buffer_nest_end(struct devlink_health_buffer *buffer)
915{
916}
917
918static inline void
919devlink_health_buffer_nest_cancel(struct devlink_health_buffer *buffer)
920{
921}
922
923static inline int
924devlink_health_buffer_put_object_name(struct devlink_health_buffer *buffer,
925 char *name)
926{
927 return 0;
928}
929
930static inline int
931devlink_health_buffer_put_value_u8(struct devlink_health_buffer *buffer,
932 u8 value)
933{
934 return 0;
935}
936
937static inline int
938devlink_health_buffer_put_value_u32(struct devlink_health_buffer *buffer,
939 u32 value)
940{
941 return 0;
942}
943
944static inline int
945devlink_health_buffer_put_value_u64(struct devlink_health_buffer *buffer,
946 u64 value)
947{
948 return 0;
949}
950
951static inline int
952devlink_health_buffer_put_value_string(struct devlink_health_buffer *buffer,
953 char *name)
954{
955 return 0;
956}
957
958static inline int
959devlink_health_buffer_put_value_data(struct devlink_health_buffer *buffer,
960 void *data, int len)
961{
962 return 0;
963}
964
965static inline struct devlink_health_reporter *
966devlink_health_reporter_create(struct devlink *devlink,
967 const struct devlink_health_reporter_ops *ops,
968 u64 graceful_period, bool auto_recover,
969 void *priv)
970{
971 return NULL;
972}
973
974static inline void
975devlink_health_reporter_destroy(struct devlink_health_reporter *reporter)
976{
977}
978
979static inline void *
980devlink_health_reporter_priv(struct devlink_health_reporter *reporter)
981{
982 return NULL;
983}
984
985static inline int
986devlink_health_report(struct devlink_health_reporter *reporter,
987 const char *msg, void *priv_ctx)
988{
989 return 0;
990}
847#endif 991#endif
848 992
849#endif /* _NET_DEVLINK_H_ */ 993#endif /* _NET_DEVLINK_H_ */
diff --git a/include/net/dsa.h b/include/net/dsa.h
index b3eefe8e18fd..7f2a668ef2cc 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -21,6 +21,7 @@
21#include <linux/ethtool.h> 21#include <linux/ethtool.h>
22#include <linux/net_tstamp.h> 22#include <linux/net_tstamp.h>
23#include <linux/phy.h> 23#include <linux/phy.h>
24#include <linux/platform_data/dsa.h>
24#include <net/devlink.h> 25#include <net/devlink.h>
25#include <net/switchdev.h> 26#include <net/switchdev.h>
26 27
@@ -44,66 +45,6 @@ enum dsa_tag_protocol {
44 DSA_TAG_LAST, /* MUST BE LAST */ 45 DSA_TAG_LAST, /* MUST BE LAST */
45}; 46};
46 47
47#define DSA_MAX_SWITCHES 4
48#define DSA_MAX_PORTS 12
49
50#define DSA_RTABLE_NONE -1
51
52struct dsa_chip_data {
53 /*
54 * How to access the switch configuration registers.
55 */
56 struct device *host_dev;
57 int sw_addr;
58
59 /*
60 * Reference to network devices
61 */
62 struct device *netdev[DSA_MAX_PORTS];
63
64 /* set to size of eeprom if supported by the switch */
65 int eeprom_len;
66
67 /* Device tree node pointer for this specific switch chip
68 * used during switch setup in case additional properties
69 * and resources needs to be used
70 */
71 struct device_node *of_node;
72
73 /*
74 * The names of the switch's ports. Use "cpu" to
75 * designate the switch port that the cpu is connected to,
76 * "dsa" to indicate that this port is a DSA link to
77 * another switch, NULL to indicate the port is unused,
78 * or any other string to indicate this is a physical port.
79 */
80 char *port_names[DSA_MAX_PORTS];
81 struct device_node *port_dn[DSA_MAX_PORTS];
82
83 /*
84 * An array of which element [a] indicates which port on this
85 * switch should be used to send packets to that are destined
86 * for switch a. Can be NULL if there is only one switch chip.
87 */
88 s8 rtable[DSA_MAX_SWITCHES];
89};
90
91struct dsa_platform_data {
92 /*
93 * Reference to a Linux network interface that connects
94 * to the root switch chip of the tree.
95 */
96 struct device *netdev;
97 struct net_device *of_netdev;
98
99 /*
100 * Info structs describing each of the switch chips
101 * connected via this network interface.
102 */
103 int nr_chips;
104 struct dsa_chip_data *chip;
105};
106
107struct packet_type; 48struct packet_type;
108struct dsa_switch; 49struct dsa_switch;
109 50
@@ -208,6 +149,11 @@ struct dsa_port {
208 * Original copy of the master netdev ethtool_ops 149 * Original copy of the master netdev ethtool_ops
209 */ 150 */
210 const struct ethtool_ops *orig_ethtool_ops; 151 const struct ethtool_ops *orig_ethtool_ops;
152
153 /*
154 * Original copy of the master netdev net_device_ops
155 */
156 const struct net_device_ops *orig_ndo_ops;
211}; 157};
212 158
213struct dsa_switch { 159struct dsa_switch {
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 9481f2c142e2..7a4957599874 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -580,8 +580,7 @@ struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue,
580void qdisc_reset(struct Qdisc *qdisc); 580void qdisc_reset(struct Qdisc *qdisc);
581void qdisc_put(struct Qdisc *qdisc); 581void qdisc_put(struct Qdisc *qdisc);
582void qdisc_put_unlocked(struct Qdisc *qdisc); 582void qdisc_put_unlocked(struct Qdisc *qdisc);
583void qdisc_tree_reduce_backlog(struct Qdisc *qdisc, unsigned int n, 583void qdisc_tree_reduce_backlog(struct Qdisc *qdisc, int n, int len);
584 unsigned int len);
585#ifdef CONFIG_NET_SCHED 584#ifdef CONFIG_NET_SCHED
586int qdisc_offload_dump_helper(struct Qdisc *q, enum tc_setup_type type, 585int qdisc_offload_dump_helper(struct Qdisc *q, enum tc_setup_type type,
587 void *type_data); 586 void *type_data);
diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index a7fdab5ee6c3..63843ae5dc81 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -195,7 +195,8 @@ int switchdev_port_obj_del(struct net_device *dev,
195int register_switchdev_notifier(struct notifier_block *nb); 195int register_switchdev_notifier(struct notifier_block *nb);
196int unregister_switchdev_notifier(struct notifier_block *nb); 196int unregister_switchdev_notifier(struct notifier_block *nb);
197int call_switchdev_notifiers(unsigned long val, struct net_device *dev, 197int call_switchdev_notifiers(unsigned long val, struct net_device *dev,
198 struct switchdev_notifier_info *info); 198 struct switchdev_notifier_info *info,
199 struct netlink_ext_ack *extack);
199 200
200int register_switchdev_blocking_notifier(struct notifier_block *nb); 201int register_switchdev_blocking_notifier(struct notifier_block *nb);
201int unregister_switchdev_blocking_notifier(struct notifier_block *nb); 202int unregister_switchdev_blocking_notifier(struct notifier_block *nb);
@@ -267,7 +268,8 @@ static inline int unregister_switchdev_notifier(struct notifier_block *nb)
267 268
268static inline int call_switchdev_notifiers(unsigned long val, 269static inline int call_switchdev_notifiers(unsigned long val,
269 struct net_device *dev, 270 struct net_device *dev,
270 struct switchdev_notifier_info *info) 271 struct switchdev_notifier_info *info,
272 struct netlink_ext_ack *extack)
271{ 273{
272 return NOTIFY_DONE; 274 return NOTIFY_DONE;
273} 275}
diff --git a/include/net/tcp.h b/include/net/tcp.h
index e0a65c067662..5c950180d61b 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -406,8 +406,10 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
406 int flags, int *addr_len); 406 int flags, int *addr_len);
407int tcp_set_rcvlowat(struct sock *sk, int val); 407int tcp_set_rcvlowat(struct sock *sk, int val);
408void tcp_data_ready(struct sock *sk); 408void tcp_data_ready(struct sock *sk);
409#ifdef CONFIG_MMU
409int tcp_mmap(struct file *file, struct socket *sock, 410int tcp_mmap(struct file *file, struct socket *sock,
410 struct vm_area_struct *vma); 411 struct vm_area_struct *vma);
412#endif
411void tcp_parse_options(const struct net *net, const struct sk_buff *skb, 413void tcp_parse_options(const struct net *net, const struct sk_buff *skb,
412 struct tcp_options_received *opt_rx, 414 struct tcp_options_received *opt_rx,
413 int estab, struct tcp_fastopen_cookie *foc); 415 int estab, struct tcp_fastopen_cookie *foc);
diff --git a/include/net/tls.h b/include/net/tls.h
index 2a6ac8d642af..90bf52db573e 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -145,12 +145,13 @@ struct tls_sw_context_tx {
145struct tls_sw_context_rx { 145struct tls_sw_context_rx {
146 struct crypto_aead *aead_recv; 146 struct crypto_aead *aead_recv;
147 struct crypto_wait async_wait; 147 struct crypto_wait async_wait;
148
149 struct strparser strp; 148 struct strparser strp;
149 struct sk_buff_head rx_list; /* list of decrypted 'data' records */
150 void (*saved_data_ready)(struct sock *sk); 150 void (*saved_data_ready)(struct sock *sk);
151 151
152 struct sk_buff *recv_pkt; 152 struct sk_buff *recv_pkt;
153 u8 control; 153 u8 control;
154 int async_capable;
154 bool decrypted; 155 bool decrypted;
155 atomic_t decrypt_pending; 156 atomic_t decrypt_pending;
156 bool async_notify; 157 bool async_notify;
diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index 236403eb5ba6..09767819c3d4 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -428,7 +428,8 @@ struct switchdev_notifier_vxlan_fdb_info {
428int vxlan_fdb_find_uc(struct net_device *dev, const u8 *mac, __be32 vni, 428int vxlan_fdb_find_uc(struct net_device *dev, const u8 *mac, __be32 vni,
429 struct switchdev_notifier_vxlan_fdb_info *fdb_info); 429 struct switchdev_notifier_vxlan_fdb_info *fdb_info);
430int vxlan_fdb_replay(const struct net_device *dev, __be32 vni, 430int vxlan_fdb_replay(const struct net_device *dev, __be32 vni,
431 struct notifier_block *nb); 431 struct notifier_block *nb,
432 struct netlink_ext_ack *extack);
432void vxlan_fdb_clear_offload(const struct net_device *dev, __be32 vni); 433void vxlan_fdb_clear_offload(const struct net_device *dev, __be32 vni);
433 434
434#else 435#else
@@ -440,7 +441,8 @@ vxlan_fdb_find_uc(struct net_device *dev, const u8 *mac, __be32 vni,
440} 441}
441 442
442static inline int vxlan_fdb_replay(const struct net_device *dev, __be32 vni, 443static inline int vxlan_fdb_replay(const struct net_device *dev, __be32 vni,
443 struct notifier_block *nb) 444 struct notifier_block *nb,
445 struct netlink_ext_ack *extack)
444{ 446{
445 return -EOPNOTSUPP; 447 return -EOPNOTSUPP;
446} 448}
diff --git a/include/trace/events/devlink.h b/include/trace/events/devlink.h
index 44acfbca1266..7e39d2fc7c75 100644
--- a/include/trace/events/devlink.h
+++ b/include/trace/events/devlink.h
@@ -46,6 +46,65 @@ TRACE_EVENT(devlink_hwmsg,
46 (int) __entry->len, __get_dynamic_array(buf), __entry->len) 46 (int) __entry->len, __get_dynamic_array(buf), __entry->len)
47); 47);
48 48
49TRACE_EVENT(devlink_health_report,
50 TP_PROTO(const struct devlink *devlink, const char *reporter_name,
51 const char *msg),
52
53 TP_ARGS(devlink, reporter_name, msg),
54
55 TP_STRUCT__entry(
56 __string(bus_name, devlink->dev->bus->name)
57 __string(dev_name, dev_name(devlink->dev))
58 __string(driver_name, devlink->dev->driver->name)
59 __string(reporter_name, msg)
60 __string(msg, msg)
61 ),
62
63 TP_fast_assign(
64 __assign_str(bus_name, devlink->dev->bus->name);
65 __assign_str(dev_name, dev_name(devlink->dev));
66 __assign_str(driver_name, devlink->dev->driver->name);
67 __assign_str(reporter_name, reporter_name);
68 __assign_str(msg, msg);
69 ),
70
71 TP_printk("bus_name=%s dev_name=%s driver_name=%s reporter_name=%s: %s",
72 __get_str(bus_name), __get_str(dev_name),
73 __get_str(driver_name), __get_str(reporter_name),
74 __get_str(msg))
75);
76
77TRACE_EVENT(devlink_health_recover_aborted,
78 TP_PROTO(const struct devlink *devlink, const char *reporter_name,
79 bool health_state, u64 time_since_last_recover),
80
81 TP_ARGS(devlink, reporter_name, health_state, time_since_last_recover),
82
83 TP_STRUCT__entry(
84 __string(bus_name, devlink->dev->bus->name)
85 __string(dev_name, dev_name(devlink->dev))
86 __string(driver_name, devlink->dev->driver->name)
87 __string(reporter_name, reporter_name)
88 __field(bool, health_state)
89 __field(u64, time_since_last_recover)
90 ),
91
92 TP_fast_assign(
93 __assign_str(bus_name, devlink->dev->bus->name);
94 __assign_str(dev_name, dev_name(devlink->dev));
95 __assign_str(driver_name, devlink->dev->driver->name);
96 __assign_str(reporter_name, reporter_name);
97 __entry->health_state = health_state;
98 __entry->time_since_last_recover = time_since_last_recover;
99 ),
100
101 TP_printk("bus_name=%s dev_name=%s driver_name=%s reporter_name=%s: health_state=%d time_since_last_recover = %llu recover aborted",
102 __get_str(bus_name), __get_str(dev_name),
103 __get_str(driver_name), __get_str(reporter_name),
104 __entry->health_state,
105 __entry->time_since_last_recover)
106);
107
49#endif /* _TRACE_DEVLINK_H */ 108#endif /* _TRACE_DEVLINK_H */
50 109
51/* This part must be outside protection */ 110/* This part must be outside protection */
@@ -64,6 +123,9 @@ static inline void trace_devlink_hwmsg(const struct devlink *devlink,
64{ 123{
65} 124}
66 125
126static inline void trace_devlink_health(const char *msg)
127{
128}
67#endif /* _TRACE_DEVLINK_H */ 129#endif /* _TRACE_DEVLINK_H */
68 130
69#endif 131#endif
diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h
index a12692e5f7a8..3066ab3853a8 100644
--- a/include/uapi/asm-generic/socket.h
+++ b/include/uapi/asm-generic/socket.h
@@ -110,4 +110,6 @@
110#define SO_TXTIME 61 110#define SO_TXTIME 61
111#define SCM_TXTIME SO_TXTIME 111#define SCM_TXTIME SO_TXTIME
112 112
113#define SO_BINDTOIFINDEX 62
114
113#endif /* __ASM_GENERIC_SOCKET_H */ 115#endif /* __ASM_GENERIC_SOCKET_H */
diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index 6e52d3660654..6b26bb2ce4dc 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -89,6 +89,13 @@ enum devlink_command {
89 DEVLINK_CMD_REGION_DEL, 89 DEVLINK_CMD_REGION_DEL,
90 DEVLINK_CMD_REGION_READ, 90 DEVLINK_CMD_REGION_READ,
91 91
92 DEVLINK_CMD_HEALTH_REPORTER_GET,
93 DEVLINK_CMD_HEALTH_REPORTER_SET,
94 DEVLINK_CMD_HEALTH_REPORTER_RECOVER,
95 DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE,
96 DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET,
97 DEVLINK_CMD_HEALTH_REPORTER_DUMP_CLEAR,
98
92 /* add new commands above here */ 99 /* add new commands above here */
93 __DEVLINK_CMD_MAX, 100 __DEVLINK_CMD_MAX,
94 DEVLINK_CMD_MAX = __DEVLINK_CMD_MAX - 1 101 DEVLINK_CMD_MAX = __DEVLINK_CMD_MAX - 1
@@ -285,6 +292,24 @@ enum devlink_attr {
285 DEVLINK_ATTR_REGION_CHUNK_ADDR, /* u64 */ 292 DEVLINK_ATTR_REGION_CHUNK_ADDR, /* u64 */
286 DEVLINK_ATTR_REGION_CHUNK_LEN, /* u64 */ 293 DEVLINK_ATTR_REGION_CHUNK_LEN, /* u64 */
287 294
295 DEVLINK_ATTR_HEALTH_BUFFER_OBJECT, /* nested */
296 DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_PAIR, /* nested */
297 DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_NAME, /* string */
298 DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE, /* nested */
299 DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_ARRAY, /* nested */
300 DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_TYPE, /* u8 */
301 DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_DATA, /* dynamic */
302
303 DEVLINK_ATTR_HEALTH_REPORTER, /* nested */
304 DEVLINK_ATTR_HEALTH_REPORTER_NAME, /* string */
305 DEVLINK_ATTR_HEALTH_REPORTER_STATE, /* u8 */
306 DEVLINK_ATTR_HEALTH_REPORTER_ERR, /* u64 */
307 DEVLINK_ATTR_HEALTH_REPORTER_RECOVER, /* u64 */
308 DEVLINK_ATTR_HEALTH_REPORTER_DUMP_AVAIL, /* u8 */
309 DEVLINK_ATTR_HEALTH_REPORTER_DUMP_TS, /* u64 */
310 DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD, /* u64 */
311 DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER, /* u8 */
312
288 /* add new attributes above here, update the policy in devlink.c */ 313 /* add new attributes above here, update the policy in devlink.c */
289 314
290 __DEVLINK_ATTR_MAX, 315 __DEVLINK_ATTR_MAX,
diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index 95d0db2a8350..02ac251be8c4 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -333,12 +333,19 @@ enum {
333 333
334/* Basic filter */ 334/* Basic filter */
335 335
336struct tc_basic_pcnt {
337 __u64 rcnt;
338 __u64 rhit;
339};
340
336enum { 341enum {
337 TCA_BASIC_UNSPEC, 342 TCA_BASIC_UNSPEC,
338 TCA_BASIC_CLASSID, 343 TCA_BASIC_CLASSID,
339 TCA_BASIC_EMATCHES, 344 TCA_BASIC_EMATCHES,
340 TCA_BASIC_ACT, 345 TCA_BASIC_ACT,
341 TCA_BASIC_POLICE, 346 TCA_BASIC_POLICE,
347 TCA_BASIC_PCNT,
348 TCA_BASIC_PAD,
342 __TCA_BASIC_MAX 349 __TCA_BASIC_MAX
343}; 350};
344 351
@@ -527,11 +534,17 @@ enum {
527 534
528/* Match-all classifier */ 535/* Match-all classifier */
529 536
537struct tc_matchall_pcnt {
538 __u64 rhit;
539};
540
530enum { 541enum {
531 TCA_MATCHALL_UNSPEC, 542 TCA_MATCHALL_UNSPEC,
532 TCA_MATCHALL_CLASSID, 543 TCA_MATCHALL_CLASSID,
533 TCA_MATCHALL_ACT, 544 TCA_MATCHALL_ACT,
534 TCA_MATCHALL_FLAGS, 545 TCA_MATCHALL_FLAGS,
546 TCA_MATCHALL_PCNT,
547 TCA_MATCHALL_PAD,
535 __TCA_MATCHALL_MAX, 548 __TCA_MATCHALL_MAX,
536}; 549};
537 550
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 9e14767500ea..00573cc46c98 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -915,7 +915,8 @@ static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge *br,
915/* Add new permanent fdb entry with RTM_NEWNEIGH */ 915/* Add new permanent fdb entry with RTM_NEWNEIGH */
916int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], 916int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
917 struct net_device *dev, 917 struct net_device *dev,
918 const unsigned char *addr, u16 vid, u16 nlh_flags) 918 const unsigned char *addr, u16 vid, u16 nlh_flags,
919 struct netlink_ext_ack *extack)
919{ 920{
920 struct net_bridge_vlan_group *vg; 921 struct net_bridge_vlan_group *vg;
921 struct net_bridge_port *p = NULL; 922 struct net_bridge_port *p = NULL;
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index eabf8bf28a3f..00deef7fc1f3 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -573,7 +573,8 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
573int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], 573int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
574 struct net_device *dev, const unsigned char *addr, u16 vid); 574 struct net_device *dev, const unsigned char *addr, u16 vid);
575int br_fdb_add(struct ndmsg *nlh, struct nlattr *tb[], struct net_device *dev, 575int br_fdb_add(struct ndmsg *nlh, struct nlattr *tb[], struct net_device *dev,
576 const unsigned char *addr, u16 vid, u16 nlh_flags); 576 const unsigned char *addr, u16 vid, u16 nlh_flags,
577 struct netlink_ext_ack *extack);
577int br_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, 578int br_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
578 struct net_device *dev, struct net_device *fdev, int *idx); 579 struct net_device *dev, struct net_device *fdev, int *idx);
579int br_fdb_get(struct sk_buff *skb, struct nlattr *tb[], struct net_device *dev, 580int br_fdb_get(struct sk_buff *skb, struct nlattr *tb[], struct net_device *dev,
diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c
index 035ff59d9cbd..4d2b9eb7604a 100644
--- a/net/bridge/br_switchdev.c
+++ b/net/bridge/br_switchdev.c
@@ -113,7 +113,7 @@ br_switchdev_fdb_call_notifiers(bool adding, const unsigned char *mac,
113 info.added_by_user = added_by_user; 113 info.added_by_user = added_by_user;
114 info.offloaded = offloaded; 114 info.offloaded = offloaded;
115 notifier_type = adding ? SWITCHDEV_FDB_ADD_TO_DEVICE : SWITCHDEV_FDB_DEL_TO_DEVICE; 115 notifier_type = adding ? SWITCHDEV_FDB_ADD_TO_DEVICE : SWITCHDEV_FDB_DEL_TO_DEVICE;
116 call_switchdev_notifiers(notifier_type, dev, &info.info); 116 call_switchdev_notifiers(notifier_type, dev, &info.info, NULL);
117} 117}
118 118
119void 119void
diff --git a/net/core/devlink.c b/net/core/devlink.c
index abb0da9d7b4b..60248a53c0ad 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -3597,6 +3597,1012 @@ out:
3597 return 0; 3597 return 0;
3598} 3598}
3599 3599
3600#define DEVLINK_HEALTH_BUFFER_SIZE (4096 - GENL_HDRLEN)
3601#define DEVLINK_HEALTH_BUFFER_DATA_SIZE (DEVLINK_HEALTH_BUFFER_SIZE / 2)
3602#define DEVLINK_HEALTH_SIZE_TO_BUFFERS(size) DIV_ROUND_UP(size, DEVLINK_HEALTH_BUFFER_DATA_SIZE)
3603#define DEVLINK_HEALTH_BUFFER_MAX_CHUNK 1024
3604
3605struct devlink_health_buffer {
3606 void *data;
3607 u64 offset;
3608 u64 bytes_left;
3609 u64 bytes_left_metadata;
3610 u64 max_nested_depth;
3611 u64 curr_nest;
3612};
3613
3614struct devlink_health_buffer_desc {
3615 int attrtype;
3616 u16 len;
3617 u8 nla_type;
3618 u8 nest_end;
3619 int value[0];
3620};
3621
3622static void
3623devlink_health_buffers_reset(struct devlink_health_buffer **buffers_list,
3624 u64 num_of_buffers)
3625{
3626 u64 i;
3627
3628 for (i = 0; i < num_of_buffers; i++) {
3629 memset(buffers_list[i]->data, 0, DEVLINK_HEALTH_BUFFER_SIZE);
3630 buffers_list[i]->offset = 0;
3631 buffers_list[i]->bytes_left = DEVLINK_HEALTH_BUFFER_DATA_SIZE;
3632 buffers_list[i]->bytes_left_metadata =
3633 DEVLINK_HEALTH_BUFFER_DATA_SIZE;
3634 buffers_list[i]->max_nested_depth = 0;
3635 buffers_list[i]->curr_nest = 0;
3636 }
3637}
3638
3639static void
3640devlink_health_buffers_destroy(struct devlink_health_buffer **buffers_list,
3641 u64 size);
3642
3643static struct devlink_health_buffer **
3644devlink_health_buffers_create(u64 size)
3645{
3646 struct devlink_health_buffer **buffers_list;
3647 u64 num_of_buffers = DEVLINK_HEALTH_SIZE_TO_BUFFERS(size);
3648 u64 i;
3649
3650 buffers_list = kcalloc(num_of_buffers,
3651 sizeof(struct devlink_health_buffer *),
3652 GFP_KERNEL);
3653 if (!buffers_list)
3654 return NULL;
3655
3656 for (i = 0; i < num_of_buffers; i++) {
3657 struct devlink_health_buffer *buffer;
3658 void *data;
3659
3660 buffer = kzalloc(sizeof(*buffer), GFP_KERNEL);
3661 data = kzalloc(DEVLINK_HEALTH_BUFFER_SIZE, GFP_KERNEL);
3662 if (!buffer || !data) {
3663 kfree(buffer);
3664 kfree(data);
3665 goto buffers_cleanup;
3666 }
3667 buffers_list[i] = buffer;
3668 buffer->data = data;
3669 }
3670 devlink_health_buffers_reset(buffers_list, num_of_buffers);
3671
3672 return buffers_list;
3673
3674buffers_cleanup:
3675 devlink_health_buffers_destroy(buffers_list, --i);
3676 kfree(buffers_list);
3677 return NULL;
3678}
3679
3680static void
3681devlink_health_buffers_destroy(struct devlink_health_buffer **buffers_list,
3682 u64 num_of_buffers)
3683{
3684 u64 i;
3685
3686 for (i = 0; i < num_of_buffers; i++) {
3687 kfree(buffers_list[i]->data);
3688 kfree(buffers_list[i]);
3689 }
3690}
3691
3692void
3693devlink_health_buffer_offset_inc(struct devlink_health_buffer *buffer,
3694 int len)
3695{
3696 buffer->offset += len;
3697}
3698
3699/* In order to store a nest, need two descriptors, for start and end */
3700#define DEVLINK_HEALTH_BUFFER_NEST_SIZE (sizeof(struct devlink_health_buffer_desc) * 2)
3701
3702int devlink_health_buffer_verify_len(struct devlink_health_buffer *buffer,
3703 int len, int metadata_len)
3704{
3705 if (len > DEVLINK_HEALTH_BUFFER_DATA_SIZE)
3706 return -EINVAL;
3707
3708 if (buffer->bytes_left < len ||
3709 buffer->bytes_left_metadata < metadata_len)
3710 return -ENOMEM;
3711
3712 return 0;
3713}
3714
3715static struct devlink_health_buffer_desc *
3716devlink_health_buffer_get_desc_from_offset(struct devlink_health_buffer *buffer)
3717{
3718 return buffer->data + buffer->offset;
3719}
3720
3721int
3722devlink_health_buffer_nest_start(struct devlink_health_buffer *buffer,
3723 int attrtype)
3724{
3725 struct devlink_health_buffer_desc *desc;
3726 int err;
3727
3728 err = devlink_health_buffer_verify_len(buffer, 0,
3729 DEVLINK_HEALTH_BUFFER_NEST_SIZE);
3730 if (err)
3731 return err;
3732
3733 if (attrtype != DEVLINK_ATTR_HEALTH_BUFFER_OBJECT &&
3734 attrtype != DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_PAIR &&
3735 attrtype != DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE &&
3736 attrtype != DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_ARRAY)
3737 return -EINVAL;
3738
3739 desc = devlink_health_buffer_get_desc_from_offset(buffer);
3740
3741 desc->attrtype = attrtype;
3742 buffer->bytes_left_metadata -= DEVLINK_HEALTH_BUFFER_NEST_SIZE;
3743 devlink_health_buffer_offset_inc(buffer, sizeof(*desc));
3744
3745 buffer->curr_nest++;
3746 buffer->max_nested_depth = max(buffer->max_nested_depth,
3747 buffer->curr_nest);
3748
3749 return 0;
3750}
3751EXPORT_SYMBOL_GPL(devlink_health_buffer_nest_start);
3752
3753enum devlink_health_buffer_nest_end_cancel {
3754 DEVLINK_HEALTH_BUFFER_NEST_END = 1,
3755 DEVLINK_HEALTH_BUFFER_NEST_CANCEL,
3756};
3757
3758static void
3759devlink_health_buffer_nest_end_cancel(struct devlink_health_buffer *buffer,
3760 enum devlink_health_buffer_nest_end_cancel nest)
3761{
3762 struct devlink_health_buffer_desc *desc;
3763
3764 WARN_ON(!buffer->curr_nest);
3765 buffer->curr_nest--;
3766
3767 desc = devlink_health_buffer_get_desc_from_offset(buffer);
3768 desc->nest_end = nest;
3769 devlink_health_buffer_offset_inc(buffer, sizeof(*desc));
3770}
3771
3772void devlink_health_buffer_nest_end(struct devlink_health_buffer *buffer)
3773{
3774 devlink_health_buffer_nest_end_cancel(buffer,
3775 DEVLINK_HEALTH_BUFFER_NEST_END);
3776}
3777EXPORT_SYMBOL_GPL(devlink_health_buffer_nest_end);
3778
3779void devlink_health_buffer_nest_cancel(struct devlink_health_buffer *buffer)
3780{
3781 devlink_health_buffer_nest_end_cancel(buffer,
3782 DEVLINK_HEALTH_BUFFER_NEST_CANCEL);
3783}
3784EXPORT_SYMBOL_GPL(devlink_health_buffer_nest_cancel);
3785
3786int
3787devlink_health_buffer_put_object_name(struct devlink_health_buffer *buffer,
3788 char *name)
3789{
3790 struct devlink_health_buffer_desc *desc;
3791 int err;
3792
3793 err = devlink_health_buffer_verify_len(buffer, strlen(name) + 1,
3794 sizeof(*desc));
3795 if (err)
3796 return err;
3797
3798 desc = devlink_health_buffer_get_desc_from_offset(buffer);
3799 desc->attrtype = DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_NAME;
3800 desc->nla_type = NLA_NUL_STRING;
3801 desc->len = strlen(name) + 1;
3802 memcpy(&desc->value, name, desc->len);
3803 devlink_health_buffer_offset_inc(buffer, sizeof(*desc) + desc->len);
3804
3805 buffer->bytes_left_metadata -= sizeof(*desc);
3806 buffer->bytes_left -= (strlen(name) + 1);
3807
3808 return 0;
3809}
3810EXPORT_SYMBOL_GPL(devlink_health_buffer_put_object_name);
3811
3812static int
3813devlink_health_buffer_put_value(struct devlink_health_buffer *buffer,
3814 u8 nla_type, void *value, int len)
3815{
3816 struct devlink_health_buffer_desc *desc;
3817 int err;
3818
3819 err = devlink_health_buffer_verify_len(buffer, len, sizeof(*desc));
3820 if (err)
3821 return err;
3822
3823 desc = devlink_health_buffer_get_desc_from_offset(buffer);
3824 desc->attrtype = DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_DATA;
3825 desc->nla_type = nla_type;
3826 desc->len = len;
3827 memcpy(&desc->value, value, len);
3828 devlink_health_buffer_offset_inc(buffer, sizeof(*desc) + desc->len);
3829
3830 buffer->bytes_left_metadata -= sizeof(*desc);
3831 buffer->bytes_left -= len;
3832
3833 return 0;
3834}
3835
3836int
3837devlink_health_buffer_put_value_u8(struct devlink_health_buffer *buffer,
3838 u8 value)
3839{
3840 int err;
3841
3842 err = devlink_health_buffer_put_value(buffer, NLA_U8, &value,
3843 sizeof(value));
3844 if (err)
3845 return err;
3846
3847 return 0;
3848}
3849EXPORT_SYMBOL_GPL(devlink_health_buffer_put_value_u8);
3850
3851int
3852devlink_health_buffer_put_value_u32(struct devlink_health_buffer *buffer,
3853 u32 value)
3854{
3855 int err;
3856
3857 err = devlink_health_buffer_put_value(buffer, NLA_U32, &value,
3858 sizeof(value));
3859 if (err)
3860 return err;
3861
3862 return 0;
3863}
3864EXPORT_SYMBOL_GPL(devlink_health_buffer_put_value_u32);
3865
3866int
3867devlink_health_buffer_put_value_u64(struct devlink_health_buffer *buffer,
3868 u64 value)
3869{
3870 int err;
3871
3872 err = devlink_health_buffer_put_value(buffer, NLA_U64, &value,
3873 sizeof(value));
3874 if (err)
3875 return err;
3876
3877 return 0;
3878}
3879EXPORT_SYMBOL_GPL(devlink_health_buffer_put_value_u64);
3880
3881int
3882devlink_health_buffer_put_value_string(struct devlink_health_buffer *buffer,
3883 char *name)
3884{
3885 int err;
3886
3887 if (strlen(name) + 1 > DEVLINK_HEALTH_BUFFER_MAX_CHUNK)
3888 return -EINVAL;
3889
3890 err = devlink_health_buffer_put_value(buffer, NLA_NUL_STRING, name,
3891 strlen(name) + 1);
3892 if (err)
3893 return err;
3894
3895 return 0;
3896}
3897EXPORT_SYMBOL_GPL(devlink_health_buffer_put_value_string);
3898
3899int
3900devlink_health_buffer_put_value_data(struct devlink_health_buffer *buffer,
3901 void *data, int len)
3902{
3903 int err;
3904
3905 if (len > DEVLINK_HEALTH_BUFFER_MAX_CHUNK)
3906 return -EINVAL;
3907
3908 err = devlink_health_buffer_put_value(buffer, NLA_BINARY, data, len);
3909 if (err)
3910 return err;
3911
3912 return 0;
3913}
3914EXPORT_SYMBOL_GPL(devlink_health_buffer_put_value_data);
3915
3916static int
3917devlink_health_buffer_fill_data(struct sk_buff *skb,
3918 struct devlink_health_buffer_desc *desc)
3919{
3920 int err = -EINVAL;
3921
3922 switch (desc->nla_type) {
3923 case NLA_U8:
3924 err = nla_put_u8(skb, DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_DATA,
3925 *(u8 *)desc->value);
3926 break;
3927 case NLA_U32:
3928 err = nla_put_u32(skb, DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_DATA,
3929 *(u32 *)desc->value);
3930 break;
3931 case NLA_U64:
3932 err = nla_put_u64_64bit(skb,
3933 DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_DATA,
3934 *(u64 *)desc->value, DEVLINK_ATTR_PAD);
3935 break;
3936 case NLA_NUL_STRING:
3937 err = nla_put_string(skb,
3938 DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_DATA,
3939 (char *)&desc->value);
3940 break;
3941 case NLA_BINARY:
3942 err = nla_put(skb, DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_DATA,
3943 desc->len, (void *)&desc->value);
3944 break;
3945 }
3946
3947 return err;
3948}
3949
3950static int
3951devlink_health_buffer_fill_type(struct sk_buff *skb,
3952 struct devlink_health_buffer_desc *desc)
3953{
3954 int err = -EINVAL;
3955
3956 switch (desc->nla_type) {
3957 case NLA_U8:
3958 err = nla_put_u8(skb, DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_TYPE,
3959 NLA_U8);
3960 break;
3961 case NLA_U32:
3962 err = nla_put_u8(skb, DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_TYPE,
3963 NLA_U32);
3964 break;
3965 case NLA_U64:
3966 err = nla_put_u8(skb, DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_TYPE,
3967 NLA_U64);
3968 break;
3969 case NLA_NUL_STRING:
3970 err = nla_put_u8(skb, DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_TYPE,
3971 NLA_NUL_STRING);
3972 break;
3973 case NLA_BINARY:
3974 err = nla_put_u8(skb, DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_TYPE,
3975 NLA_BINARY);
3976 break;
3977 }
3978
3979 return err;
3980}
3981
3982static inline struct devlink_health_buffer_desc *
3983devlink_health_buffer_get_next_desc(struct devlink_health_buffer_desc *desc)
3984{
3985 return (void *)&desc->value + desc->len;
3986}
3987
3988static int
3989devlink_health_buffer_prepare_skb(struct sk_buff *skb,
3990 struct devlink_health_buffer *buffer)
3991{
3992 struct devlink_health_buffer_desc *last_desc, *desc;
3993 struct nlattr **buffer_nlattr;
3994 int err;
3995 int i = 0;
3996
3997 buffer_nlattr = kcalloc(buffer->max_nested_depth,
3998 sizeof(*buffer_nlattr), GFP_KERNEL);
3999 if (!buffer_nlattr)
4000 return -EINVAL;
4001
4002 last_desc = devlink_health_buffer_get_desc_from_offset(buffer);
4003 desc = buffer->data;
4004 while (desc != last_desc) {
4005 switch (desc->attrtype) {
4006 case DEVLINK_ATTR_HEALTH_BUFFER_OBJECT:
4007 case DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_PAIR:
4008 case DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE:
4009 case DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_ARRAY:
4010 buffer_nlattr[i] = nla_nest_start(skb, desc->attrtype);
4011 if (!buffer_nlattr[i])
4012 goto nla_put_failure;
4013 i++;
4014 break;
4015 case DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_DATA:
4016 err = devlink_health_buffer_fill_data(skb, desc);
4017 if (err)
4018 goto nla_put_failure;
4019 err = devlink_health_buffer_fill_type(skb, desc);
4020 if (err)
4021 goto nla_put_failure;
4022 break;
4023 case DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_NAME:
4024 err = nla_put_string(skb, desc->attrtype,
4025 (char *)&desc->value);
4026 if (err)
4027 goto nla_put_failure;
4028 break;
4029 default:
4030 WARN_ON(!desc->nest_end);
4031 WARN_ON(i <= 0);
4032 if (desc->nest_end == DEVLINK_HEALTH_BUFFER_NEST_END)
4033 nla_nest_end(skb, buffer_nlattr[--i]);
4034 else
4035 nla_nest_cancel(skb, buffer_nlattr[--i]);
4036 break;
4037 }
4038 desc = devlink_health_buffer_get_next_desc(desc);
4039 }
4040
4041 return 0;
4042
4043nla_put_failure:
4044 kfree(buffer_nlattr);
4045 return err;
4046}
4047
4048static int
4049devlink_health_buffer_snd(struct genl_info *info,
4050 enum devlink_command cmd, int flags,
4051 struct devlink_health_buffer **buffers_array,
4052 u64 num_of_buffers)
4053{
4054 struct sk_buff *skb;
4055 struct nlmsghdr *nlh;
4056 void *hdr;
4057 int err;
4058 u64 i;
4059
4060 for (i = 0; i < num_of_buffers; i++) {
4061 /* Skip buffer if driver did not fill it up with any data */
4062 if (!buffers_array[i]->offset)
4063 continue;
4064
4065 skb = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
4066 if (!skb)
4067 return -ENOMEM;
4068
4069 hdr = genlmsg_put(skb, info->snd_portid, info->snd_seq,
4070 &devlink_nl_family, NLM_F_MULTI, cmd);
4071 if (!hdr)
4072 goto nla_put_failure;
4073
4074 err = devlink_health_buffer_prepare_skb(skb, buffers_array[i]);
4075 if (err)
4076 goto nla_put_failure;
4077
4078 genlmsg_end(skb, hdr);
4079 err = genlmsg_reply(skb, info);
4080 if (err)
4081 return err;
4082 }
4083
4084 skb = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
4085 if (!skb)
4086 return -ENOMEM;
4087 nlh = nlmsg_put(skb, info->snd_portid, info->snd_seq,
4088 NLMSG_DONE, 0, flags | NLM_F_MULTI);
4089 err = genlmsg_reply(skb, info);
4090 if (err)
4091 return err;
4092
4093 return 0;
4094
4095nla_put_failure:
4096 err = -EIO;
4097 nlmsg_free(skb);
4098 return err;
4099}
4100
4101struct devlink_health_reporter {
4102 struct list_head list;
4103 struct devlink_health_buffer **dump_buffers_array;
4104 struct mutex dump_lock; /* lock parallel read/write from dump buffers */
4105 struct devlink_health_buffer **diagnose_buffers_array;
4106 struct mutex diagnose_lock; /* lock parallel read/write from diagnose buffers */
4107 void *priv;
4108 const struct devlink_health_reporter_ops *ops;
4109 struct devlink *devlink;
4110 u64 graceful_period;
4111 bool auto_recover;
4112 u8 health_state;
4113 u8 dump_avail;
4114 u64 dump_ts;
4115 u64 error_count;
4116 u64 recovery_count;
4117 u64 last_recovery_ts;
4118};
4119
4120enum devlink_health_reporter_state {
4121 DEVLINK_HEALTH_REPORTER_STATE_HEALTHY,
4122 DEVLINK_HEALTH_REPORTER_STATE_ERROR,
4123};
4124
4125void *
4126devlink_health_reporter_priv(struct devlink_health_reporter *reporter)
4127{
4128 return reporter->priv;
4129}
4130EXPORT_SYMBOL_GPL(devlink_health_reporter_priv);
4131
4132static struct devlink_health_reporter *
4133devlink_health_reporter_find_by_name(struct devlink *devlink,
4134 const char *reporter_name)
4135{
4136 struct devlink_health_reporter *reporter;
4137
4138 list_for_each_entry(reporter, &devlink->reporter_list, list)
4139 if (!strcmp(reporter->ops->name, reporter_name))
4140 return reporter;
4141 return NULL;
4142}
4143
4144/**
4145 * devlink_health_reporter_create - create devlink health reporter
4146 *
4147 * @devlink: devlink
4148 * @ops: ops
4149 * @graceful_period: to avoid recovery loops, in msecs
4150 * @auto_recover: auto recover when error occurs
4151 * @priv: priv
4152 */
4153struct devlink_health_reporter *
4154devlink_health_reporter_create(struct devlink *devlink,
4155 const struct devlink_health_reporter_ops *ops,
4156 u64 graceful_period, bool auto_recover,
4157 void *priv)
4158{
4159 struct devlink_health_reporter *reporter;
4160
4161 mutex_lock(&devlink->lock);
4162 if (devlink_health_reporter_find_by_name(devlink, ops->name)) {
4163 reporter = ERR_PTR(-EEXIST);
4164 goto unlock;
4165 }
4166
4167 if (WARN_ON(ops->dump && !ops->dump_size) ||
4168 WARN_ON(ops->diagnose && !ops->diagnose_size) ||
4169 WARN_ON(auto_recover && !ops->recover) ||
4170 WARN_ON(graceful_period && !ops->recover)) {
4171 reporter = ERR_PTR(-EINVAL);
4172 goto unlock;
4173 }
4174
4175 reporter = kzalloc(sizeof(*reporter), GFP_KERNEL);
4176 if (!reporter) {
4177 reporter = ERR_PTR(-ENOMEM);
4178 goto unlock;
4179 }
4180
4181 if (ops->dump) {
4182 reporter->dump_buffers_array =
4183 devlink_health_buffers_create(ops->dump_size);
4184 if (!reporter->dump_buffers_array) {
4185 kfree(reporter);
4186 reporter = ERR_PTR(-ENOMEM);
4187 goto unlock;
4188 }
4189 }
4190
4191 if (ops->diagnose) {
4192 reporter->diagnose_buffers_array =
4193 devlink_health_buffers_create(ops->diagnose_size);
4194 if (!reporter->diagnose_buffers_array) {
4195 devlink_health_buffers_destroy(reporter->dump_buffers_array,
4196 DEVLINK_HEALTH_SIZE_TO_BUFFERS(ops->dump_size));
4197 kfree(reporter);
4198 reporter = ERR_PTR(-ENOMEM);
4199 goto unlock;
4200 }
4201 }
4202
4203 list_add_tail(&reporter->list, &devlink->reporter_list);
4204 mutex_init(&reporter->dump_lock);
4205 mutex_init(&reporter->diagnose_lock);
4206
4207 reporter->priv = priv;
4208 reporter->ops = ops;
4209 reporter->devlink = devlink;
4210 reporter->graceful_period = graceful_period;
4211 reporter->auto_recover = auto_recover;
4212unlock:
4213 mutex_unlock(&devlink->lock);
4214 return reporter;
4215}
4216EXPORT_SYMBOL_GPL(devlink_health_reporter_create);
4217
4218/**
4219 * devlink_health_reporter_destroy - destroy devlink health reporter
4220 *
4221 * @reporter: devlink health reporter to destroy
4222 */
4223void
4224devlink_health_reporter_destroy(struct devlink_health_reporter *reporter)
4225{
4226 mutex_lock(&reporter->devlink->lock);
4227 list_del(&reporter->list);
4228 devlink_health_buffers_destroy(reporter->dump_buffers_array,
4229 DEVLINK_HEALTH_SIZE_TO_BUFFERS(reporter->ops->dump_size));
4230 devlink_health_buffers_destroy(reporter->diagnose_buffers_array,
4231 DEVLINK_HEALTH_SIZE_TO_BUFFERS(reporter->ops->diagnose_size));
4232 kfree(reporter);
4233 mutex_unlock(&reporter->devlink->lock);
4234}
4235EXPORT_SYMBOL_GPL(devlink_health_reporter_destroy);
4236
4237static int
4238devlink_health_reporter_recover(struct devlink_health_reporter *reporter,
4239 void *priv_ctx)
4240{
4241 int err;
4242
4243 if (!reporter->ops->recover)
4244 return -EOPNOTSUPP;
4245
4246 err = reporter->ops->recover(reporter, priv_ctx);
4247 if (err)
4248 return err;
4249
4250 reporter->recovery_count++;
4251 reporter->health_state = DEVLINK_HEALTH_REPORTER_STATE_HEALTHY;
4252 reporter->last_recovery_ts = jiffies;
4253
4254 return 0;
4255}
4256
4257static int devlink_health_do_dump(struct devlink_health_reporter *reporter,
4258 void *priv_ctx)
4259{
4260 int err;
4261
4262 if (!reporter->ops->dump)
4263 return 0;
4264
4265 if (reporter->dump_avail)
4266 return 0;
4267
4268 devlink_health_buffers_reset(reporter->dump_buffers_array,
4269 DEVLINK_HEALTH_SIZE_TO_BUFFERS(reporter->ops->dump_size));
4270 err = reporter->ops->dump(reporter, reporter->dump_buffers_array,
4271 DEVLINK_HEALTH_BUFFER_SIZE,
4272 DEVLINK_HEALTH_SIZE_TO_BUFFERS(reporter->ops->dump_size),
4273 priv_ctx);
4274 if (!err) {
4275 reporter->dump_avail = true;
4276 reporter->dump_ts = jiffies;
4277 }
4278
4279 return err;
4280}
4281
4282int devlink_health_report(struct devlink_health_reporter *reporter,
4283 const char *msg, void *priv_ctx)
4284{
4285 struct devlink *devlink = reporter->devlink;
4286 int err = 0;
4287
4288 /* write a log message of the current error */
4289 WARN_ON(!msg);
4290 trace_devlink_health_report(devlink, reporter->ops->name, msg);
4291 reporter->error_count++;
4292
4293 /* abort if the previous error wasn't recovered */
4294 if (reporter->auto_recover &&
4295 (reporter->health_state != DEVLINK_HEALTH_REPORTER_STATE_HEALTHY ||
4296 jiffies - reporter->last_recovery_ts <
4297 msecs_to_jiffies(reporter->graceful_period))) {
4298 trace_devlink_health_recover_aborted(devlink,
4299 reporter->ops->name,
4300 reporter->health_state,
4301 jiffies -
4302 reporter->last_recovery_ts);
4303 return -ECANCELED;
4304 }
4305
4306 reporter->health_state = DEVLINK_HEALTH_REPORTER_STATE_ERROR;
4307
4308 mutex_lock(&reporter->dump_lock);
4309 /* store current dump of current error, for later analysis */
4310 devlink_health_do_dump(reporter, priv_ctx);
4311 mutex_unlock(&reporter->dump_lock);
4312
4313 if (reporter->auto_recover)
4314 err = devlink_health_reporter_recover(reporter, priv_ctx);
4315
4316 return err;
4317}
4318EXPORT_SYMBOL_GPL(devlink_health_report);
4319
4320static struct devlink_health_reporter *
4321devlink_health_reporter_get_from_info(struct devlink *devlink,
4322 struct genl_info *info)
4323{
4324 char *reporter_name;
4325
4326 if (!info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_NAME])
4327 return NULL;
4328
4329 reporter_name =
4330 nla_data(info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_NAME]);
4331 return devlink_health_reporter_find_by_name(devlink, reporter_name);
4332}
4333
4334static int
4335devlink_nl_health_reporter_fill(struct sk_buff *msg,
4336 struct devlink *devlink,
4337 struct devlink_health_reporter *reporter,
4338 enum devlink_command cmd, u32 portid,
4339 u32 seq, int flags)
4340{
4341 struct nlattr *reporter_attr;
4342 void *hdr;
4343
4344 hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
4345 if (!hdr)
4346 return -EMSGSIZE;
4347
4348 if (devlink_nl_put_handle(msg, devlink))
4349 goto genlmsg_cancel;
4350
4351 reporter_attr = nla_nest_start(msg, DEVLINK_ATTR_HEALTH_REPORTER);
4352 if (!reporter_attr)
4353 goto genlmsg_cancel;
4354 if (nla_put_string(msg, DEVLINK_ATTR_HEALTH_REPORTER_NAME,
4355 reporter->ops->name))
4356 goto reporter_nest_cancel;
4357 if (nla_put_u8(msg, DEVLINK_ATTR_HEALTH_REPORTER_STATE,
4358 reporter->health_state))
4359 goto reporter_nest_cancel;
4360 if (nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_ERR,
4361 reporter->error_count, DEVLINK_ATTR_PAD))
4362 goto reporter_nest_cancel;
4363 if (nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_RECOVER,
4364 reporter->recovery_count, DEVLINK_ATTR_PAD))
4365 goto reporter_nest_cancel;
4366 if (nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD,
4367 reporter->graceful_period,
4368 DEVLINK_ATTR_PAD))
4369 goto reporter_nest_cancel;
4370 if (nla_put_u8(msg, DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER,
4371 reporter->auto_recover))
4372 goto reporter_nest_cancel;
4373 if (nla_put_u8(msg, DEVLINK_ATTR_HEALTH_REPORTER_DUMP_AVAIL,
4374 reporter->dump_avail))
4375 goto reporter_nest_cancel;
4376 if (reporter->dump_avail &&
4377 nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_DUMP_TS,
4378 jiffies_to_msecs(reporter->dump_ts),
4379 DEVLINK_ATTR_PAD))
4380 goto reporter_nest_cancel;
4381
4382 nla_nest_end(msg, reporter_attr);
4383 genlmsg_end(msg, hdr);
4384 return 0;
4385
4386reporter_nest_cancel:
4387 nla_nest_end(msg, reporter_attr);
4388genlmsg_cancel:
4389 genlmsg_cancel(msg, hdr);
4390 return -EMSGSIZE;
4391}
4392
4393static int devlink_nl_cmd_health_reporter_get_doit(struct sk_buff *skb,
4394 struct genl_info *info)
4395{
4396 struct devlink *devlink = info->user_ptr[0];
4397 struct devlink_health_reporter *reporter;
4398 struct sk_buff *msg;
4399 int err;
4400
4401 reporter = devlink_health_reporter_get_from_info(devlink, info);
4402 if (!reporter)
4403 return -EINVAL;
4404
4405 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
4406 if (!msg)
4407 return -ENOMEM;
4408
4409 err = devlink_nl_health_reporter_fill(msg, devlink, reporter,
4410 DEVLINK_CMD_HEALTH_REPORTER_GET,
4411 info->snd_portid, info->snd_seq,
4412 0);
4413 if (err) {
4414 nlmsg_free(msg);
4415 return err;
4416 }
4417
4418 return genlmsg_reply(msg, info);
4419}
4420
4421static int
4422devlink_nl_cmd_health_reporter_get_dumpit(struct sk_buff *msg,
4423 struct netlink_callback *cb)
4424{
4425 struct devlink_health_reporter *reporter;
4426 struct devlink *devlink;
4427 int start = cb->args[0];
4428 int idx = 0;
4429 int err;
4430
4431 mutex_lock(&devlink_mutex);
4432 list_for_each_entry(devlink, &devlink_list, list) {
4433 if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
4434 continue;
4435 mutex_lock(&devlink->lock);
4436 list_for_each_entry(reporter, &devlink->reporter_list,
4437 list) {
4438 if (idx < start) {
4439 idx++;
4440 continue;
4441 }
4442 err = devlink_nl_health_reporter_fill(msg, devlink,
4443 reporter,
4444 DEVLINK_CMD_HEALTH_REPORTER_GET,
4445 NETLINK_CB(cb->skb).portid,
4446 cb->nlh->nlmsg_seq,
4447 NLM_F_MULTI);
4448 if (err) {
4449 mutex_unlock(&devlink->lock);
4450 goto out;
4451 }
4452 idx++;
4453 }
4454 mutex_unlock(&devlink->lock);
4455 }
4456out:
4457 mutex_unlock(&devlink_mutex);
4458
4459 cb->args[0] = idx;
4460 return msg->len;
4461}
4462
4463static int
4464devlink_nl_cmd_health_reporter_set_doit(struct sk_buff *skb,
4465 struct genl_info *info)
4466{
4467 struct devlink *devlink = info->user_ptr[0];
4468 struct devlink_health_reporter *reporter;
4469
4470 reporter = devlink_health_reporter_get_from_info(devlink, info);
4471 if (!reporter)
4472 return -EINVAL;
4473
4474 if (!reporter->ops->recover &&
4475 (info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD] ||
4476 info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER]))
4477 return -EINVAL;
4478
4479 if (info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD])
4480 reporter->graceful_period =
4481 nla_get_u64(info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD]);
4482
4483 if (info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER])
4484 reporter->auto_recover =
4485 nla_get_u8(info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER]);
4486
4487 return 0;
4488}
4489
4490static int devlink_nl_cmd_health_reporter_recover_doit(struct sk_buff *skb,
4491 struct genl_info *info)
4492{
4493 struct devlink *devlink = info->user_ptr[0];
4494 struct devlink_health_reporter *reporter;
4495
4496 reporter = devlink_health_reporter_get_from_info(devlink, info);
4497 if (!reporter)
4498 return -EINVAL;
4499
4500 return devlink_health_reporter_recover(reporter, NULL);
4501}
4502
4503static int devlink_nl_cmd_health_reporter_diagnose_doit(struct sk_buff *skb,
4504 struct genl_info *info)
4505{
4506 struct devlink *devlink = info->user_ptr[0];
4507 struct devlink_health_reporter *reporter;
4508 u64 num_of_buffers;
4509 int err;
4510
4511 reporter = devlink_health_reporter_get_from_info(devlink, info);
4512 if (!reporter)
4513 return -EINVAL;
4514
4515 if (!reporter->ops->diagnose)
4516 return -EOPNOTSUPP;
4517
4518 num_of_buffers =
4519 DEVLINK_HEALTH_SIZE_TO_BUFFERS(reporter->ops->diagnose_size);
4520
4521 mutex_lock(&reporter->diagnose_lock);
4522 devlink_health_buffers_reset(reporter->diagnose_buffers_array,
4523 num_of_buffers);
4524
4525 err = reporter->ops->diagnose(reporter,
4526 reporter->diagnose_buffers_array,
4527 DEVLINK_HEALTH_BUFFER_SIZE,
4528 num_of_buffers);
4529 if (err)
4530 goto out;
4531
4532 err = devlink_health_buffer_snd(info,
4533 DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE,
4534 0, reporter->diagnose_buffers_array,
4535 num_of_buffers);
4536 if (err)
4537 goto out;
4538
4539 mutex_unlock(&reporter->diagnose_lock);
4540 return 0;
4541
4542out:
4543 mutex_unlock(&reporter->diagnose_lock);
4544 return err;
4545}
4546
4547static void
4548devlink_health_dump_clear(struct devlink_health_reporter *reporter)
4549{
4550 reporter->dump_avail = false;
4551 reporter->dump_ts = 0;
4552 devlink_health_buffers_reset(reporter->dump_buffers_array,
4553 DEVLINK_HEALTH_SIZE_TO_BUFFERS(reporter->ops->dump_size));
4554}
4555
4556static int devlink_nl_cmd_health_reporter_dump_get_doit(struct sk_buff *skb,
4557 struct genl_info *info)
4558{
4559 struct devlink *devlink = info->user_ptr[0];
4560 struct devlink_health_reporter *reporter;
4561 u64 num_of_buffers;
4562 int err;
4563
4564 reporter = devlink_health_reporter_get_from_info(devlink, info);
4565 if (!reporter)
4566 return -EINVAL;
4567
4568 if (!reporter->ops->dump)
4569 return -EOPNOTSUPP;
4570
4571 num_of_buffers =
4572 DEVLINK_HEALTH_SIZE_TO_BUFFERS(reporter->ops->dump_size);
4573
4574 mutex_lock(&reporter->dump_lock);
4575 err = devlink_health_do_dump(reporter, NULL);
4576 if (err)
4577 goto out;
4578
4579 err = devlink_health_buffer_snd(info,
4580 DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET,
4581 0, reporter->dump_buffers_array,
4582 num_of_buffers);
4583
4584out:
4585 mutex_unlock(&reporter->dump_lock);
4586 return err;
4587}
4588
4589static int
4590devlink_nl_cmd_health_reporter_dump_clear_doit(struct sk_buff *skb,
4591 struct genl_info *info)
4592{
4593 struct devlink *devlink = info->user_ptr[0];
4594 struct devlink_health_reporter *reporter;
4595
4596 reporter = devlink_health_reporter_get_from_info(devlink, info);
4597 if (!reporter)
4598 return -EINVAL;
4599
4600 mutex_lock(&reporter->dump_lock);
4601 devlink_health_dump_clear(reporter);
4602 mutex_unlock(&reporter->dump_lock);
4603 return 0;
4604}
4605
3600static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = { 4606static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
3601 [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING }, 4607 [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING },
3602 [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING }, 4608 [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING },
@@ -3622,6 +4628,9 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
3622 [DEVLINK_ATTR_PARAM_VALUE_CMODE] = { .type = NLA_U8 }, 4628 [DEVLINK_ATTR_PARAM_VALUE_CMODE] = { .type = NLA_U8 },
3623 [DEVLINK_ATTR_REGION_NAME] = { .type = NLA_NUL_STRING }, 4629 [DEVLINK_ATTR_REGION_NAME] = { .type = NLA_NUL_STRING },
3624 [DEVLINK_ATTR_REGION_SNAPSHOT_ID] = { .type = NLA_U32 }, 4630 [DEVLINK_ATTR_REGION_SNAPSHOT_ID] = { .type = NLA_U32 },
4631 [DEVLINK_ATTR_HEALTH_REPORTER_NAME] = { .type = NLA_NUL_STRING },
4632 [DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD] = { .type = NLA_U64 },
4633 [DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER] = { .type = NLA_U8 },
3625}; 4634};
3626 4635
3627static const struct genl_ops devlink_nl_ops[] = { 4636static const struct genl_ops devlink_nl_ops[] = {
@@ -3842,6 +4851,51 @@ static const struct genl_ops devlink_nl_ops[] = {
3842 .flags = GENL_ADMIN_PERM, 4851 .flags = GENL_ADMIN_PERM,
3843 .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, 4852 .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
3844 }, 4853 },
4854 {
4855 .cmd = DEVLINK_CMD_HEALTH_REPORTER_GET,
4856 .doit = devlink_nl_cmd_health_reporter_get_doit,
4857 .dumpit = devlink_nl_cmd_health_reporter_get_dumpit,
4858 .policy = devlink_nl_policy,
4859 .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
4860 /* can be retrieved by unprivileged users */
4861 },
4862 {
4863 .cmd = DEVLINK_CMD_HEALTH_REPORTER_SET,
4864 .doit = devlink_nl_cmd_health_reporter_set_doit,
4865 .policy = devlink_nl_policy,
4866 .flags = GENL_ADMIN_PERM,
4867 .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
4868 },
4869 {
4870 .cmd = DEVLINK_CMD_HEALTH_REPORTER_RECOVER,
4871 .doit = devlink_nl_cmd_health_reporter_recover_doit,
4872 .policy = devlink_nl_policy,
4873 .flags = GENL_ADMIN_PERM,
4874 .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
4875 },
4876 {
4877 .cmd = DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE,
4878 .doit = devlink_nl_cmd_health_reporter_diagnose_doit,
4879 .policy = devlink_nl_policy,
4880 .flags = GENL_ADMIN_PERM,
4881 .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
4882 },
4883 {
4884 .cmd = DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET,
4885 .doit = devlink_nl_cmd_health_reporter_dump_get_doit,
4886 .policy = devlink_nl_policy,
4887 .flags = GENL_ADMIN_PERM,
4888 .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
4889 DEVLINK_NL_FLAG_NO_LOCK,
4890 },
4891 {
4892 .cmd = DEVLINK_CMD_HEALTH_REPORTER_DUMP_CLEAR,
4893 .doit = devlink_nl_cmd_health_reporter_dump_clear_doit,
4894 .policy = devlink_nl_policy,
4895 .flags = GENL_ADMIN_PERM,
4896 .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
4897 DEVLINK_NL_FLAG_NO_LOCK,
4898 },
3845}; 4899};
3846 4900
3847static struct genl_family devlink_nl_family __ro_after_init = { 4901static struct genl_family devlink_nl_family __ro_after_init = {
@@ -3882,6 +4936,7 @@ struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size)
3882 INIT_LIST_HEAD(&devlink->resource_list); 4936 INIT_LIST_HEAD(&devlink->resource_list);
3883 INIT_LIST_HEAD(&devlink->param_list); 4937 INIT_LIST_HEAD(&devlink->param_list);
3884 INIT_LIST_HEAD(&devlink->region_list); 4938 INIT_LIST_HEAD(&devlink->region_list);
4939 INIT_LIST_HEAD(&devlink->reporter_list);
3885 mutex_init(&devlink->lock); 4940 mutex_init(&devlink->lock);
3886 return devlink; 4941 return devlink;
3887} 4942}
diff --git a/net/core/dst.c b/net/core/dst.c
index 81ccf20e2826..a263309df115 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -98,8 +98,12 @@ void *dst_alloc(struct dst_ops *ops, struct net_device *dev,
98 struct dst_entry *dst; 98 struct dst_entry *dst;
99 99
100 if (ops->gc && dst_entries_get_fast(ops) > ops->gc_thresh) { 100 if (ops->gc && dst_entries_get_fast(ops) > ops->gc_thresh) {
101 if (ops->gc(ops)) 101 if (ops->gc(ops)) {
102 printk_ratelimited(KERN_NOTICE "Route cache is full: "
103 "consider increasing sysctl "
104 "net.ipv[4|6].route.max_size.\n");
102 return NULL; 105 return NULL;
106 }
103 } 107 }
104 108
105 dst = kmem_cache_alloc(ops->kmem_cachep, GFP_ATOMIC); 109 dst = kmem_cache_alloc(ops->kmem_cachep, GFP_ATOMIC);
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index b02fb19df2cc..17f36317363d 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -778,6 +778,41 @@ nla_put_failure:
778 return -EMSGSIZE; 778 return -EMSGSIZE;
779} 779}
780 780
781static int rtnl_net_valid_getid_req(struct sk_buff *skb,
782 const struct nlmsghdr *nlh,
783 struct nlattr **tb,
784 struct netlink_ext_ack *extack)
785{
786 int i, err;
787
788 if (!netlink_strict_get_check(skb))
789 return nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX,
790 rtnl_net_policy, extack);
791
792 err = nlmsg_parse_strict(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX,
793 rtnl_net_policy, extack);
794 if (err)
795 return err;
796
797 for (i = 0; i <= NETNSA_MAX; i++) {
798 if (!tb[i])
799 continue;
800
801 switch (i) {
802 case NETNSA_PID:
803 case NETNSA_FD:
804 case NETNSA_NSID:
805 case NETNSA_TARGET_NSID:
806 break;
807 default:
808 NL_SET_ERR_MSG(extack, "Unsupported attribute in peer netns getid request");
809 return -EINVAL;
810 }
811 }
812
813 return 0;
814}
815
781static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh, 816static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh,
782 struct netlink_ext_ack *extack) 817 struct netlink_ext_ack *extack)
783{ 818{
@@ -793,8 +828,7 @@ static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh,
793 struct sk_buff *msg; 828 struct sk_buff *msg;
794 int err; 829 int err;
795 830
796 err = nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX, 831 err = rtnl_net_valid_getid_req(skb, nlh, tb, extack);
797 rtnl_net_policy, extack);
798 if (err < 0) 832 if (err < 0)
799 return err; 833 return err;
800 if (tb[NETNSA_PID]) { 834 if (tb[NETNSA_PID]) {
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 5ea1bed08ede..f5a98082ac7a 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -3242,6 +3242,53 @@ static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
3242 return ret; 3242 return ret;
3243} 3243}
3244 3244
3245static int rtnl_valid_getlink_req(struct sk_buff *skb,
3246 const struct nlmsghdr *nlh,
3247 struct nlattr **tb,
3248 struct netlink_ext_ack *extack)
3249{
3250 struct ifinfomsg *ifm;
3251 int i, err;
3252
3253 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) {
3254 NL_SET_ERR_MSG(extack, "Invalid header for get link");
3255 return -EINVAL;
3256 }
3257
3258 if (!netlink_strict_get_check(skb))
3259 return nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy,
3260 extack);
3261
3262 ifm = nlmsg_data(nlh);
3263 if (ifm->__ifi_pad || ifm->ifi_type || ifm->ifi_flags ||
3264 ifm->ifi_change) {
3265 NL_SET_ERR_MSG(extack, "Invalid values in header for get link request");
3266 return -EINVAL;
3267 }
3268
3269 err = nlmsg_parse_strict(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy,
3270 extack);
3271 if (err)
3272 return err;
3273
3274 for (i = 0; i <= IFLA_MAX; i++) {
3275 if (!tb[i])
3276 continue;
3277
3278 switch (i) {
3279 case IFLA_IFNAME:
3280 case IFLA_EXT_MASK:
3281 case IFLA_TARGET_NETNSID:
3282 break;
3283 default:
3284 NL_SET_ERR_MSG(extack, "Unsupported attribute in get link request");
3285 return -EINVAL;
3286 }
3287 }
3288
3289 return 0;
3290}
3291
3245static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh, 3292static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh,
3246 struct netlink_ext_ack *extack) 3293 struct netlink_ext_ack *extack)
3247{ 3294{
@@ -3256,7 +3303,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh,
3256 int err; 3303 int err;
3257 u32 ext_filter_mask = 0; 3304 u32 ext_filter_mask = 0;
3258 3305
3259 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy, extack); 3306 err = rtnl_valid_getlink_req(skb, nlh, tb, extack);
3260 if (err < 0) 3307 if (err < 0)
3261 return err; 3308 return err;
3262 3309
@@ -3639,7 +3686,7 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh,
3639 const struct net_device_ops *ops = br_dev->netdev_ops; 3686 const struct net_device_ops *ops = br_dev->netdev_ops;
3640 3687
3641 err = ops->ndo_fdb_add(ndm, tb, dev, addr, vid, 3688 err = ops->ndo_fdb_add(ndm, tb, dev, addr, vid,
3642 nlh->nlmsg_flags); 3689 nlh->nlmsg_flags, extack);
3643 if (err) 3690 if (err)
3644 goto out; 3691 goto out;
3645 else 3692 else
@@ -3651,7 +3698,8 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh,
3651 if (dev->netdev_ops->ndo_fdb_add) 3698 if (dev->netdev_ops->ndo_fdb_add)
3652 err = dev->netdev_ops->ndo_fdb_add(ndm, tb, dev, addr, 3699 err = dev->netdev_ops->ndo_fdb_add(ndm, tb, dev, addr,
3653 vid, 3700 vid,
3654 nlh->nlmsg_flags); 3701 nlh->nlmsg_flags,
3702 extack);
3655 else 3703 else
3656 err = ndo_dflt_fdb_add(ndm, tb, dev, addr, vid, 3704 err = ndo_dflt_fdb_add(ndm, tb, dev, addr, vid,
3657 nlh->nlmsg_flags); 3705 nlh->nlmsg_flags);
@@ -4901,6 +4949,40 @@ static size_t if_nlmsg_stats_size(const struct net_device *dev,
4901 return size; 4949 return size;
4902} 4950}
4903 4951
4952static int rtnl_valid_stats_req(const struct nlmsghdr *nlh, bool strict_check,
4953 bool is_dump, struct netlink_ext_ack *extack)
4954{
4955 struct if_stats_msg *ifsm;
4956
4957 if (nlh->nlmsg_len < sizeof(*ifsm)) {
4958 NL_SET_ERR_MSG(extack, "Invalid header for stats dump");
4959 return -EINVAL;
4960 }
4961
4962 if (!strict_check)
4963 return 0;
4964
4965 ifsm = nlmsg_data(nlh);
4966
4967 /* only requests using strict checks can pass data to influence
4968 * the dump. The legacy exception is filter_mask.
4969 */
4970 if (ifsm->pad1 || ifsm->pad2 || (is_dump && ifsm->ifindex)) {
4971 NL_SET_ERR_MSG(extack, "Invalid values in header for stats dump request");
4972 return -EINVAL;
4973 }
4974 if (nlmsg_attrlen(nlh, sizeof(*ifsm))) {
4975 NL_SET_ERR_MSG(extack, "Invalid attributes after stats header");
4976 return -EINVAL;
4977 }
4978 if (ifsm->filter_mask >= IFLA_STATS_FILTER_BIT(IFLA_STATS_MAX + 1)) {
4979 NL_SET_ERR_MSG(extack, "Invalid stats requested through filter mask");
4980 return -EINVAL;
4981 }
4982
4983 return 0;
4984}
4985
4904static int rtnl_stats_get(struct sk_buff *skb, struct nlmsghdr *nlh, 4986static int rtnl_stats_get(struct sk_buff *skb, struct nlmsghdr *nlh,
4905 struct netlink_ext_ack *extack) 4987 struct netlink_ext_ack *extack)
4906{ 4988{
@@ -4912,8 +4994,10 @@ static int rtnl_stats_get(struct sk_buff *skb, struct nlmsghdr *nlh,
4912 u32 filter_mask; 4994 u32 filter_mask;
4913 int err; 4995 int err;
4914 4996
4915 if (nlmsg_len(nlh) < sizeof(*ifsm)) 4997 err = rtnl_valid_stats_req(nlh, netlink_strict_get_check(skb),
4916 return -EINVAL; 4998 false, extack);
4999 if (err)
5000 return err;
4917 5001
4918 ifsm = nlmsg_data(nlh); 5002 ifsm = nlmsg_data(nlh);
4919 if (ifsm->ifindex > 0) 5003 if (ifsm->ifindex > 0)
@@ -4965,27 +5049,11 @@ static int rtnl_stats_dump(struct sk_buff *skb, struct netlink_callback *cb)
4965 5049
4966 cb->seq = net->dev_base_seq; 5050 cb->seq = net->dev_base_seq;
4967 5051
4968 if (nlmsg_len(cb->nlh) < sizeof(*ifsm)) { 5052 err = rtnl_valid_stats_req(cb->nlh, cb->strict_check, true, extack);
4969 NL_SET_ERR_MSG(extack, "Invalid header for stats dump"); 5053 if (err)
4970 return -EINVAL; 5054 return err;
4971 }
4972 5055
4973 ifsm = nlmsg_data(cb->nlh); 5056 ifsm = nlmsg_data(cb->nlh);
4974
4975 /* only requests using strict checks can pass data to influence
4976 * the dump. The legacy exception is filter_mask.
4977 */
4978 if (cb->strict_check) {
4979 if (ifsm->pad1 || ifsm->pad2 || ifsm->ifindex) {
4980 NL_SET_ERR_MSG(extack, "Invalid values in header for stats dump request");
4981 return -EINVAL;
4982 }
4983 if (nlmsg_attrlen(cb->nlh, sizeof(*ifsm))) {
4984 NL_SET_ERR_MSG(extack, "Invalid attributes after stats header");
4985 return -EINVAL;
4986 }
4987 }
4988
4989 filter_mask = ifsm->filter_mask; 5057 filter_mask = ifsm->filter_mask;
4990 if (!filter_mask) { 5058 if (!filter_mask) {
4991 NL_SET_ERR_MSG(extack, "Filter mask must be set for stats dump"); 5059 NL_SET_ERR_MSG(extack, "Filter mask must be set for stats dump");
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index d6d5c20d7044..e76ed8df9f13 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -78,11 +78,9 @@ int sk_msg_clone(struct sock *sk, struct sk_msg *dst, struct sk_msg *src,
78{ 78{
79 int i = src->sg.start; 79 int i = src->sg.start;
80 struct scatterlist *sge = sk_msg_elem(src, i); 80 struct scatterlist *sge = sk_msg_elem(src, i);
81 struct scatterlist *sgd = NULL;
81 u32 sge_len, sge_off; 82 u32 sge_len, sge_off;
82 83
83 if (sk_msg_full(dst))
84 return -ENOSPC;
85
86 while (off) { 84 while (off) {
87 if (sge->length > off) 85 if (sge->length > off)
88 break; 86 break;
@@ -94,16 +92,27 @@ int sk_msg_clone(struct sock *sk, struct sk_msg *dst, struct sk_msg *src,
94 } 92 }
95 93
96 while (len) { 94 while (len) {
97 if (sk_msg_full(dst))
98 return -ENOSPC;
99
100 sge_len = sge->length - off; 95 sge_len = sge->length - off;
101 sge_off = sge->offset + off;
102 if (sge_len > len) 96 if (sge_len > len)
103 sge_len = len; 97 sge_len = len;
98
99 if (dst->sg.end)
100 sgd = sk_msg_elem(dst, dst->sg.end - 1);
101
102 if (sgd &&
103 (sg_page(sge) == sg_page(sgd)) &&
104 (sg_virt(sge) + off == sg_virt(sgd) + sgd->length)) {
105 sgd->length += sge_len;
106 dst->sg.size += sge_len;
107 } else if (!sk_msg_full(dst)) {
108 sge_off = sge->offset + off;
109 sk_msg_page_add(dst, sg_page(sge), sge_len, sge_off);
110 } else {
111 return -ENOSPC;
112 }
113
104 off = 0; 114 off = 0;
105 len -= sge_len; 115 len -= sge_len;
106 sk_msg_page_add(dst, sg_page(sge), sge_len, sge_off);
107 sk_mem_charge(sk, sge_len); 116 sk_mem_charge(sk, sge_len);
108 sk_msg_iter_var_next(i); 117 sk_msg_iter_var_next(i);
109 if (i == src->sg.end && len) 118 if (i == src->sg.end && len)
diff --git a/net/core/sock.c b/net/core/sock.c
index 6aa2e7e0b4fb..900e8a9435f5 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -520,14 +520,11 @@ struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie)
520} 520}
521EXPORT_SYMBOL(sk_dst_check); 521EXPORT_SYMBOL(sk_dst_check);
522 522
523static int sock_setbindtodevice(struct sock *sk, char __user *optval, 523static int sock_setbindtodevice_locked(struct sock *sk, int ifindex)
524 int optlen)
525{ 524{
526 int ret = -ENOPROTOOPT; 525 int ret = -ENOPROTOOPT;
527#ifdef CONFIG_NETDEVICES 526#ifdef CONFIG_NETDEVICES
528 struct net *net = sock_net(sk); 527 struct net *net = sock_net(sk);
529 char devname[IFNAMSIZ];
530 int index;
531 528
532 /* Sorry... */ 529 /* Sorry... */
533 ret = -EPERM; 530 ret = -EPERM;
@@ -535,6 +532,32 @@ static int sock_setbindtodevice(struct sock *sk, char __user *optval,
535 goto out; 532 goto out;
536 533
537 ret = -EINVAL; 534 ret = -EINVAL;
535 if (ifindex < 0)
536 goto out;
537
538 sk->sk_bound_dev_if = ifindex;
539 if (sk->sk_prot->rehash)
540 sk->sk_prot->rehash(sk);
541 sk_dst_reset(sk);
542
543 ret = 0;
544
545out:
546#endif
547
548 return ret;
549}
550
551static int sock_setbindtodevice(struct sock *sk, char __user *optval,
552 int optlen)
553{
554 int ret = -ENOPROTOOPT;
555#ifdef CONFIG_NETDEVICES
556 struct net *net = sock_net(sk);
557 char devname[IFNAMSIZ];
558 int index;
559
560 ret = -EINVAL;
538 if (optlen < 0) 561 if (optlen < 0)
539 goto out; 562 goto out;
540 563
@@ -566,14 +589,9 @@ static int sock_setbindtodevice(struct sock *sk, char __user *optval,
566 } 589 }
567 590
568 lock_sock(sk); 591 lock_sock(sk);
569 sk->sk_bound_dev_if = index; 592 ret = sock_setbindtodevice_locked(sk, index);
570 if (sk->sk_prot->rehash)
571 sk->sk_prot->rehash(sk);
572 sk_dst_reset(sk);
573 release_sock(sk); 593 release_sock(sk);
574 594
575 ret = 0;
576
577out: 595out:
578#endif 596#endif
579 597
@@ -1055,6 +1073,10 @@ set_rcvbuf:
1055 } 1073 }
1056 break; 1074 break;
1057 1075
1076 case SO_BINDTOIFINDEX:
1077 ret = sock_setbindtodevice_locked(sk, val);
1078 break;
1079
1058 default: 1080 default:
1059 ret = -ENOPROTOOPT; 1081 ret = -ENOPROTOOPT;
1060 break; 1082 break;
@@ -1399,6 +1421,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
1399 SOF_TXTIME_REPORT_ERRORS : 0; 1421 SOF_TXTIME_REPORT_ERRORS : 0;
1400 break; 1422 break;
1401 1423
1424 case SO_BINDTOIFINDEX:
1425 v.val = sk->sk_bound_dev_if;
1426 break;
1427
1402 default: 1428 default:
1403 /* We implement the SO_SNDLOWAT etc to not be settable 1429 /* We implement the SO_SNDLOWAT etc to not be settable
1404 * (1003.1g 7). 1430 * (1003.1g 7).
@@ -1726,7 +1752,6 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
1726 newsk->sk_err_soft = 0; 1752 newsk->sk_err_soft = 0;
1727 newsk->sk_priority = 0; 1753 newsk->sk_priority = 0;
1728 newsk->sk_incoming_cpu = raw_smp_processor_id(); 1754 newsk->sk_incoming_cpu = raw_smp_processor_id();
1729 atomic64_set(&newsk->sk_cookie, 0);
1730 if (likely(newsk->sk_net_refcnt)) 1755 if (likely(newsk->sk_net_refcnt))
1731 sock_inuse_add(sock_net(newsk), 1); 1756 sock_inuse_add(sock_net(newsk), 1);
1732 1757
diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c
index f78fe58eafc8..6cd3737593a6 100644
--- a/net/decnet/dn_fib.c
+++ b/net/decnet/dn_fib.c
@@ -282,7 +282,7 @@ struct dn_fib_info *dn_fib_create_info(const struct rtmsg *r, struct nlattr *att
282 (nhs = dn_fib_count_nhs(attrs[RTA_MULTIPATH])) == 0) 282 (nhs = dn_fib_count_nhs(attrs[RTA_MULTIPATH])) == 0)
283 goto err_inval; 283 goto err_inval;
284 284
285 fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct dn_fib_nh), GFP_KERNEL); 285 fi = kzalloc(struct_size(fi, fib_nh, nhs), GFP_KERNEL);
286 err = -ENOBUFS; 286 err = -ENOBUFS;
287 if (fi == NULL) 287 if (fi == NULL)
288 goto failure; 288 goto failure;
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index 026a05774bf7..1f4972dab9f2 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -103,7 +103,8 @@ static inline void dsa_legacy_unregister(void) { }
103int dsa_legacy_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], 103int dsa_legacy_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
104 struct net_device *dev, 104 struct net_device *dev,
105 const unsigned char *addr, u16 vid, 105 const unsigned char *addr, u16 vid,
106 u16 flags); 106 u16 flags,
107 struct netlink_ext_ack *extack);
107int dsa_legacy_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], 108int dsa_legacy_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
108 struct net_device *dev, 109 struct net_device *dev,
109 const unsigned char *addr, u16 vid); 110 const unsigned char *addr, u16 vid);
diff --git a/net/dsa/master.c b/net/dsa/master.c
index 71bb15f491c8..79e97d2f2d9b 100644
--- a/net/dsa/master.c
+++ b/net/dsa/master.c
@@ -126,6 +126,17 @@ static void dsa_master_get_strings(struct net_device *dev, uint32_t stringset,
126 } 126 }
127} 127}
128 128
129static int dsa_master_get_phys_port_name(struct net_device *dev,
130 char *name, size_t len)
131{
132 struct dsa_port *cpu_dp = dev->dsa_ptr;
133
134 if (snprintf(name, len, "p%d", cpu_dp->index) >= len)
135 return -EINVAL;
136
137 return 0;
138}
139
129static int dsa_master_ethtool_setup(struct net_device *dev) 140static int dsa_master_ethtool_setup(struct net_device *dev)
130{ 141{
131 struct dsa_port *cpu_dp = dev->dsa_ptr; 142 struct dsa_port *cpu_dp = dev->dsa_ptr;
@@ -158,6 +169,38 @@ static void dsa_master_ethtool_teardown(struct net_device *dev)
158 cpu_dp->orig_ethtool_ops = NULL; 169 cpu_dp->orig_ethtool_ops = NULL;
159} 170}
160 171
172static int dsa_master_ndo_setup(struct net_device *dev)
173{
174 struct dsa_port *cpu_dp = dev->dsa_ptr;
175 struct dsa_switch *ds = cpu_dp->ds;
176 struct net_device_ops *ops;
177
178 if (dev->netdev_ops->ndo_get_phys_port_name)
179 return 0;
180
181 ops = devm_kzalloc(ds->dev, sizeof(*ops), GFP_KERNEL);
182 if (!ops)
183 return -ENOMEM;
184
185 cpu_dp->orig_ndo_ops = dev->netdev_ops;
186 if (cpu_dp->orig_ndo_ops)
187 memcpy(ops, cpu_dp->orig_ndo_ops, sizeof(*ops));
188
189 ops->ndo_get_phys_port_name = dsa_master_get_phys_port_name;
190
191 dev->netdev_ops = ops;
192
193 return 0;
194}
195
196static void dsa_master_ndo_teardown(struct net_device *dev)
197{
198 struct dsa_port *cpu_dp = dev->dsa_ptr;
199
200 dev->netdev_ops = cpu_dp->orig_ndo_ops;
201 cpu_dp->orig_ndo_ops = NULL;
202}
203
161static ssize_t tagging_show(struct device *d, struct device_attribute *attr, 204static ssize_t tagging_show(struct device *d, struct device_attribute *attr,
162 char *buf) 205 char *buf)
163{ 206{
@@ -223,16 +266,27 @@ int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp)
223 if (ret) 266 if (ret)
224 return ret; 267 return ret;
225 268
269 ret = dsa_master_ndo_setup(dev);
270 if (ret)
271 goto out_err_ethtool_teardown;
272
226 ret = sysfs_create_group(&dev->dev.kobj, &dsa_group); 273 ret = sysfs_create_group(&dev->dev.kobj, &dsa_group);
227 if (ret) 274 if (ret)
228 dsa_master_ethtool_teardown(dev); 275 goto out_err_ndo_teardown;
276
277 return ret;
229 278
279out_err_ndo_teardown:
280 dsa_master_ndo_teardown(dev);
281out_err_ethtool_teardown:
282 dsa_master_ethtool_teardown(dev);
230 return ret; 283 return ret;
231} 284}
232 285
233void dsa_master_teardown(struct net_device *dev) 286void dsa_master_teardown(struct net_device *dev)
234{ 287{
235 sysfs_remove_group(&dev->dev.kobj, &dsa_group); 288 sysfs_remove_group(&dev->dev.kobj, &dsa_group);
289 dsa_master_ndo_teardown(dev);
236 dsa_master_ethtool_teardown(dev); 290 dsa_master_ethtool_teardown(dev);
237 dsa_master_reset_mtu(dev); 291 dsa_master_reset_mtu(dev);
238 292
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index a3fcc1d01615..91de3a663226 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -1009,7 +1009,8 @@ static const struct ethtool_ops dsa_slave_ethtool_ops = {
1009int dsa_legacy_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], 1009int dsa_legacy_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
1010 struct net_device *dev, 1010 struct net_device *dev,
1011 const unsigned char *addr, u16 vid, 1011 const unsigned char *addr, u16 vid,
1012 u16 flags) 1012 u16 flags,
1013 struct netlink_ext_ack *extack)
1013{ 1014{
1014 struct dsa_port *dp = dsa_slave_to_port(dev); 1015 struct dsa_port *dp = dsa_slave_to_port(dev);
1015 1016
@@ -1450,7 +1451,7 @@ static void dsa_slave_switchdev_event_work(struct work_struct *work)
1450 } 1451 }
1451 fdb_info->offloaded = true; 1452 fdb_info->offloaded = true;
1452 call_switchdev_notifiers(SWITCHDEV_FDB_OFFLOADED, dev, 1453 call_switchdev_notifiers(SWITCHDEV_FDB_OFFLOADED, dev,
1453 &fdb_info->info); 1454 &fdb_info->info, NULL);
1454 break; 1455 break;
1455 1456
1456 case SWITCHDEV_FDB_DEL_TO_DEVICE: 1457 case SWITCHDEV_FDB_DEL_TO_DEVICE:
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index e258a00b4a3d..cd027639df2f 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -2063,13 +2063,49 @@ static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
2063 [NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN] = { .len = sizeof(int) }, 2063 [NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN] = { .len = sizeof(int) },
2064}; 2064};
2065 2065
2066static int inet_netconf_valid_get_req(struct sk_buff *skb,
2067 const struct nlmsghdr *nlh,
2068 struct nlattr **tb,
2069 struct netlink_ext_ack *extack)
2070{
2071 int i, err;
2072
2073 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(struct netconfmsg))) {
2074 NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf get request");
2075 return -EINVAL;
2076 }
2077
2078 if (!netlink_strict_get_check(skb))
2079 return nlmsg_parse(nlh, sizeof(struct netconfmsg), tb,
2080 NETCONFA_MAX, devconf_ipv4_policy, extack);
2081
2082 err = nlmsg_parse_strict(nlh, sizeof(struct netconfmsg), tb,
2083 NETCONFA_MAX, devconf_ipv4_policy, extack);
2084 if (err)
2085 return err;
2086
2087 for (i = 0; i <= NETCONFA_MAX; i++) {
2088 if (!tb[i])
2089 continue;
2090
2091 switch (i) {
2092 case NETCONFA_IFINDEX:
2093 break;
2094 default:
2095 NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in netconf get request");
2096 return -EINVAL;
2097 }
2098 }
2099
2100 return 0;
2101}
2102
2066static int inet_netconf_get_devconf(struct sk_buff *in_skb, 2103static int inet_netconf_get_devconf(struct sk_buff *in_skb,
2067 struct nlmsghdr *nlh, 2104 struct nlmsghdr *nlh,
2068 struct netlink_ext_ack *extack) 2105 struct netlink_ext_ack *extack)
2069{ 2106{
2070 struct net *net = sock_net(in_skb->sk); 2107 struct net *net = sock_net(in_skb->sk);
2071 struct nlattr *tb[NETCONFA_MAX+1]; 2108 struct nlattr *tb[NETCONFA_MAX+1];
2072 struct netconfmsg *ncm;
2073 struct sk_buff *skb; 2109 struct sk_buff *skb;
2074 struct ipv4_devconf *devconf; 2110 struct ipv4_devconf *devconf;
2075 struct in_device *in_dev; 2111 struct in_device *in_dev;
@@ -2077,9 +2113,8 @@ static int inet_netconf_get_devconf(struct sk_buff *in_skb,
2077 int ifindex; 2113 int ifindex;
2078 int err; 2114 int err;
2079 2115
2080 err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX, 2116 err = inet_netconf_valid_get_req(in_skb, nlh, tb, extack);
2081 devconf_ipv4_policy, extack); 2117 if (err)
2082 if (err < 0)
2083 goto errout; 2118 goto errout;
2084 2119
2085 err = -EINVAL; 2120 err = -EINVAL;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index ddbf8c9a1abb..fb99002c3d4e 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -2467,6 +2467,61 @@ errout:
2467 rtnl_set_sk_err(net, RTNLGRP_IPV4_MROUTE_R, -ENOBUFS); 2467 rtnl_set_sk_err(net, RTNLGRP_IPV4_MROUTE_R, -ENOBUFS);
2468} 2468}
2469 2469
2470static int ipmr_rtm_valid_getroute_req(struct sk_buff *skb,
2471 const struct nlmsghdr *nlh,
2472 struct nlattr **tb,
2473 struct netlink_ext_ack *extack)
2474{
2475 struct rtmsg *rtm;
2476 int i, err;
2477
2478 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) {
2479 NL_SET_ERR_MSG(extack, "ipv4: Invalid header for multicast route get request");
2480 return -EINVAL;
2481 }
2482
2483 if (!netlink_strict_get_check(skb))
2484 return nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX,
2485 rtm_ipv4_policy, extack);
2486
2487 rtm = nlmsg_data(nlh);
2488 if ((rtm->rtm_src_len && rtm->rtm_src_len != 32) ||
2489 (rtm->rtm_dst_len && rtm->rtm_dst_len != 32) ||
2490 rtm->rtm_tos || rtm->rtm_table || rtm->rtm_protocol ||
2491 rtm->rtm_scope || rtm->rtm_type || rtm->rtm_flags) {
2492 NL_SET_ERR_MSG(extack, "ipv4: Invalid values in header for multicast route get request");
2493 return -EINVAL;
2494 }
2495
2496 err = nlmsg_parse_strict(nlh, sizeof(*rtm), tb, RTA_MAX,
2497 rtm_ipv4_policy, extack);
2498 if (err)
2499 return err;
2500
2501 if ((tb[RTA_SRC] && !rtm->rtm_src_len) ||
2502 (tb[RTA_DST] && !rtm->rtm_dst_len)) {
2503 NL_SET_ERR_MSG(extack, "ipv4: rtm_src_len and rtm_dst_len must be 32 for IPv4");
2504 return -EINVAL;
2505 }
2506
2507 for (i = 0; i <= RTA_MAX; i++) {
2508 if (!tb[i])
2509 continue;
2510
2511 switch (i) {
2512 case RTA_SRC:
2513 case RTA_DST:
2514 case RTA_TABLE:
2515 break;
2516 default:
2517 NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in multicast route get request");
2518 return -EINVAL;
2519 }
2520 }
2521
2522 return 0;
2523}
2524
2470static int ipmr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, 2525static int ipmr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
2471 struct netlink_ext_ack *extack) 2526 struct netlink_ext_ack *extack)
2472{ 2527{
@@ -2475,18 +2530,14 @@ static int ipmr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
2475 struct sk_buff *skb = NULL; 2530 struct sk_buff *skb = NULL;
2476 struct mfc_cache *cache; 2531 struct mfc_cache *cache;
2477 struct mr_table *mrt; 2532 struct mr_table *mrt;
2478 struct rtmsg *rtm;
2479 __be32 src, grp; 2533 __be32 src, grp;
2480 u32 tableid; 2534 u32 tableid;
2481 int err; 2535 int err;
2482 2536
2483 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, 2537 err = ipmr_rtm_valid_getroute_req(in_skb, nlh, tb, extack);
2484 rtm_ipv4_policy, extack);
2485 if (err < 0) 2538 if (err < 0)
2486 goto errout; 2539 goto errout;
2487 2540
2488 rtm = nlmsg_data(nlh);
2489
2490 src = tb[RTA_SRC] ? nla_get_in_addr(tb[RTA_SRC]) : 0; 2541 src = tb[RTA_SRC] ? nla_get_in_addr(tb[RTA_SRC]) : 0;
2491 grp = tb[RTA_DST] ? nla_get_in_addr(tb[RTA_DST]) : 0; 2542 grp = tb[RTA_DST] ? nla_get_in_addr(tb[RTA_DST]) : 0;
2492 tableid = tb[RTA_TABLE] ? nla_get_u32(tb[RTA_TABLE]) : 0; 2543 tableid = tb[RTA_TABLE] ? nla_get_u32(tb[RTA_TABLE]) : 0;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index ce92f73cf104..99be68b15da0 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2763,6 +2763,75 @@ static struct sk_buff *inet_rtm_getroute_build_skb(__be32 src, __be32 dst,
2763 return skb; 2763 return skb;
2764} 2764}
2765 2765
2766static int inet_rtm_valid_getroute_req(struct sk_buff *skb,
2767 const struct nlmsghdr *nlh,
2768 struct nlattr **tb,
2769 struct netlink_ext_ack *extack)
2770{
2771 struct rtmsg *rtm;
2772 int i, err;
2773
2774 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) {
2775 NL_SET_ERR_MSG(extack,
2776 "ipv4: Invalid header for route get request");
2777 return -EINVAL;
2778 }
2779
2780 if (!netlink_strict_get_check(skb))
2781 return nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX,
2782 rtm_ipv4_policy, extack);
2783
2784 rtm = nlmsg_data(nlh);
2785 if ((rtm->rtm_src_len && rtm->rtm_src_len != 32) ||
2786 (rtm->rtm_dst_len && rtm->rtm_dst_len != 32) ||
2787 rtm->rtm_table || rtm->rtm_protocol ||
2788 rtm->rtm_scope || rtm->rtm_type) {
2789 NL_SET_ERR_MSG(extack, "ipv4: Invalid values in header for route get request");
2790 return -EINVAL;
2791 }
2792
2793 if (rtm->rtm_flags & ~(RTM_F_NOTIFY |
2794 RTM_F_LOOKUP_TABLE |
2795 RTM_F_FIB_MATCH)) {
2796 NL_SET_ERR_MSG(extack, "ipv4: Unsupported rtm_flags for route get request");
2797 return -EINVAL;
2798 }
2799
2800 err = nlmsg_parse_strict(nlh, sizeof(*rtm), tb, RTA_MAX,
2801 rtm_ipv4_policy, extack);
2802 if (err)
2803 return err;
2804
2805 if ((tb[RTA_SRC] && !rtm->rtm_src_len) ||
2806 (tb[RTA_DST] && !rtm->rtm_dst_len)) {
2807 NL_SET_ERR_MSG(extack, "ipv4: rtm_src_len and rtm_dst_len must be 32 for IPv4");
2808 return -EINVAL;
2809 }
2810
2811 for (i = 0; i <= RTA_MAX; i++) {
2812 if (!tb[i])
2813 continue;
2814
2815 switch (i) {
2816 case RTA_IIF:
2817 case RTA_OIF:
2818 case RTA_SRC:
2819 case RTA_DST:
2820 case RTA_IP_PROTO:
2821 case RTA_SPORT:
2822 case RTA_DPORT:
2823 case RTA_MARK:
2824 case RTA_UID:
2825 break;
2826 default:
2827 NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in route get request");
2828 return -EINVAL;
2829 }
2830 }
2831
2832 return 0;
2833}
2834
2766static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, 2835static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
2767 struct netlink_ext_ack *extack) 2836 struct netlink_ext_ack *extack)
2768{ 2837{
@@ -2783,8 +2852,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
2783 int err; 2852 int err;
2784 int mark; 2853 int mark;
2785 2854
2786 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy, 2855 err = inet_rtm_valid_getroute_req(in_skb, nlh, tb, extack);
2787 extack);
2788 if (err < 0) 2856 if (err < 0)
2789 return err; 2857 return err;
2790 2858
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 2079145a3b7c..5f099c9d04e5 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2572,14 +2572,16 @@ int tcp_disconnect(struct sock *sk, int flags)
2572 sk->sk_shutdown = 0; 2572 sk->sk_shutdown = 0;
2573 sock_reset_flag(sk, SOCK_DONE); 2573 sock_reset_flag(sk, SOCK_DONE);
2574 tp->srtt_us = 0; 2574 tp->srtt_us = 0;
2575 tp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT);
2575 tp->rcv_rtt_last_tsecr = 0; 2576 tp->rcv_rtt_last_tsecr = 0;
2576 tp->write_seq += tp->max_window + 2; 2577 tp->write_seq += tp->max_window + 2;
2577 if (tp->write_seq == 0) 2578 if (tp->write_seq == 0)
2578 tp->write_seq = 1; 2579 tp->write_seq = 1;
2579 icsk->icsk_backoff = 0; 2580 icsk->icsk_backoff = 0;
2580 tp->snd_cwnd = 2;
2581 icsk->icsk_probes_out = 0; 2581 icsk->icsk_probes_out = 0;
2582 icsk->icsk_rto = TCP_TIMEOUT_INIT;
2582 tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; 2583 tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
2584 tp->snd_cwnd = TCP_INIT_CWND;
2583 tp->snd_cwnd_cnt = 0; 2585 tp->snd_cwnd_cnt = 0;
2584 tp->window_clamp = 0; 2586 tp->window_clamp = 0;
2585 tp->delivered_ce = 0; 2587 tp->delivered_ce = 0;
@@ -2603,6 +2605,23 @@ int tcp_disconnect(struct sock *sk, int flags)
2603 tp->duplicate_sack[0].end_seq = 0; 2605 tp->duplicate_sack[0].end_seq = 0;
2604 tp->dsack_dups = 0; 2606 tp->dsack_dups = 0;
2605 tp->reord_seen = 0; 2607 tp->reord_seen = 0;
2608 tp->retrans_out = 0;
2609 tp->sacked_out = 0;
2610 tp->tlp_high_seq = 0;
2611 tp->last_oow_ack_time = 0;
2612 /* There's a bubble in the pipe until at least the first ACK. */
2613 tp->app_limited = ~0U;
2614 tp->rack.mstamp = 0;
2615 tp->rack.advanced = 0;
2616 tp->rack.reo_wnd_steps = 1;
2617 tp->rack.last_delivered = 0;
2618 tp->rack.reo_wnd_persist = 0;
2619 tp->rack.dsack_seen = 0;
2620 tp->syn_data_acked = 0;
2621 tp->rx_opt.saw_tstamp = 0;
2622 tp->rx_opt.dsack = 0;
2623 tp->rx_opt.num_sacks = 0;
2624
2606 2625
2607 /* Clean up fastopen related fields */ 2626 /* Clean up fastopen related fields */
2608 tcp_free_fastopen_req(tp); 2627 tcp_free_fastopen_req(tp);
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 12affb7864d9..182595e2d40f 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -479,43 +479,16 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
479 479
480 tcp_init_wl(newtp, treq->rcv_isn); 480 tcp_init_wl(newtp, treq->rcv_isn);
481 481
482 newtp->srtt_us = 0;
483 newtp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT);
484 minmax_reset(&newtp->rtt_min, tcp_jiffies32, ~0U); 482 minmax_reset(&newtp->rtt_min, tcp_jiffies32, ~0U);
485 newicsk->icsk_rto = TCP_TIMEOUT_INIT;
486 newicsk->icsk_ack.lrcvtime = tcp_jiffies32; 483 newicsk->icsk_ack.lrcvtime = tcp_jiffies32;
487 484
488 newtp->packets_out = 0;
489 newtp->retrans_out = 0;
490 newtp->sacked_out = 0;
491 newtp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
492 newtp->tlp_high_seq = 0;
493 newtp->lsndtime = tcp_jiffies32; 485 newtp->lsndtime = tcp_jiffies32;
494 newsk->sk_txhash = treq->txhash; 486 newsk->sk_txhash = treq->txhash;
495 newtp->last_oow_ack_time = 0;
496 newtp->total_retrans = req->num_retrans; 487 newtp->total_retrans = req->num_retrans;
497 488
498 /* So many TCP implementations out there (incorrectly) count the
499 * initial SYN frame in their delayed-ACK and congestion control
500 * algorithms that we must have the following bandaid to talk
501 * efficiently to them. -DaveM
502 */
503 newtp->snd_cwnd = TCP_INIT_CWND;
504 newtp->snd_cwnd_cnt = 0;
505
506 /* There's a bubble in the pipe until at least the first ACK. */
507 newtp->app_limited = ~0U;
508
509 tcp_init_xmit_timers(newsk); 489 tcp_init_xmit_timers(newsk);
510 newtp->write_seq = newtp->pushed_seq = treq->snt_isn + 1; 490 newtp->write_seq = newtp->pushed_seq = treq->snt_isn + 1;
511 491
512 newtp->rx_opt.saw_tstamp = 0;
513
514 newtp->rx_opt.dsack = 0;
515 newtp->rx_opt.num_sacks = 0;
516
517 newtp->urg_data = 0;
518
519 if (sock_flag(newsk, SOCK_KEEPOPEN)) 492 if (sock_flag(newsk, SOCK_KEEPOPEN))
520 inet_csk_reset_keepalive_timer(newsk, 493 inet_csk_reset_keepalive_timer(newsk,
521 keepalive_time_when(newtp)); 494 keepalive_time_when(newtp));
@@ -556,13 +529,6 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
556 tcp_ecn_openreq_child(newtp, req); 529 tcp_ecn_openreq_child(newtp, req);
557 newtp->fastopen_req = NULL; 530 newtp->fastopen_req = NULL;
558 newtp->fastopen_rsk = NULL; 531 newtp->fastopen_rsk = NULL;
559 newtp->syn_data_acked = 0;
560 newtp->rack.mstamp = 0;
561 newtp->rack.advanced = 0;
562 newtp->rack.reo_wnd_steps = 1;
563 newtp->rack.last_delivered = 0;
564 newtp->rack.reo_wnd_persist = 0;
565 newtp->rack.dsack_seen = 0;
566 532
567 __TCP_INC_STATS(sock_net(sk), TCP_MIB_PASSIVEOPENS); 533 __TCP_INC_STATS(sock_net(sk), TCP_MIB_PASSIVEOPENS);
568 534
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 730bc44dbad9..6527f61f59ff 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -980,7 +980,6 @@ static void tcp_update_skb_after_send(struct sock *sk, struct sk_buff *skb,
980{ 980{
981 struct tcp_sock *tp = tcp_sk(sk); 981 struct tcp_sock *tp = tcp_sk(sk);
982 982
983 skb->skb_mstamp_ns = tp->tcp_wstamp_ns;
984 if (sk->sk_pacing_status != SK_PACING_NONE) { 983 if (sk->sk_pacing_status != SK_PACING_NONE) {
985 unsigned long rate = sk->sk_pacing_rate; 984 unsigned long rate = sk->sk_pacing_rate;
986 985
@@ -1028,7 +1027,9 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
1028 1027
1029 BUG_ON(!skb || !tcp_skb_pcount(skb)); 1028 BUG_ON(!skb || !tcp_skb_pcount(skb));
1030 tp = tcp_sk(sk); 1029 tp = tcp_sk(sk);
1031 1030 prior_wstamp = tp->tcp_wstamp_ns;
1031 tp->tcp_wstamp_ns = max(tp->tcp_wstamp_ns, tp->tcp_clock_cache);
1032 skb->skb_mstamp_ns = tp->tcp_wstamp_ns;
1032 if (clone_it) { 1033 if (clone_it) {
1033 TCP_SKB_CB(skb)->tx.in_flight = TCP_SKB_CB(skb)->end_seq 1034 TCP_SKB_CB(skb)->tx.in_flight = TCP_SKB_CB(skb)->end_seq
1034 - tp->snd_una; 1035 - tp->snd_una;
@@ -1045,11 +1046,6 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
1045 return -ENOBUFS; 1046 return -ENOBUFS;
1046 } 1047 }
1047 1048
1048 prior_wstamp = tp->tcp_wstamp_ns;
1049 tp->tcp_wstamp_ns = max(tp->tcp_wstamp_ns, tp->tcp_clock_cache);
1050
1051 skb->skb_mstamp_ns = tp->tcp_wstamp_ns;
1052
1053 inet = inet_sk(sk); 1049 inet = inet_sk(sk);
1054 tcb = TCP_SKB_CB(skb); 1050 tcb = TCP_SKB_CB(skb);
1055 memset(&opts, 0, sizeof(opts)); 1051 memset(&opts, 0, sizeof(opts));
@@ -2937,12 +2933,16 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
2937 err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); 2933 err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
2938 } 2934 }
2939 2935
2936 /* To avoid taking spuriously low RTT samples based on a timestamp
2937 * for a transmit that never happened, always mark EVER_RETRANS
2938 */
2939 TCP_SKB_CB(skb)->sacked |= TCPCB_EVER_RETRANS;
2940
2940 if (BPF_SOCK_OPS_TEST_FLAG(tp, BPF_SOCK_OPS_RETRANS_CB_FLAG)) 2941 if (BPF_SOCK_OPS_TEST_FLAG(tp, BPF_SOCK_OPS_RETRANS_CB_FLAG))
2941 tcp_call_bpf_3arg(sk, BPF_SOCK_OPS_RETRANS_CB, 2942 tcp_call_bpf_3arg(sk, BPF_SOCK_OPS_RETRANS_CB,
2942 TCP_SKB_CB(skb)->seq, segs, err); 2943 TCP_SKB_CB(skb)->seq, segs, err);
2943 2944
2944 if (likely(!err)) { 2945 if (likely(!err)) {
2945 TCP_SKB_CB(skb)->sacked |= TCPCB_EVER_RETRANS;
2946 trace_tcp_retransmit_skb(sk, skb); 2946 trace_tcp_retransmit_skb(sk, skb);
2947 } else if (err != -EBUSY) { 2947 } else if (err != -EBUSY) {
2948 NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL, segs); 2948 NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL, segs);
@@ -2963,13 +2963,12 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
2963#endif 2963#endif
2964 TCP_SKB_CB(skb)->sacked |= TCPCB_RETRANS; 2964 TCP_SKB_CB(skb)->sacked |= TCPCB_RETRANS;
2965 tp->retrans_out += tcp_skb_pcount(skb); 2965 tp->retrans_out += tcp_skb_pcount(skb);
2966
2967 /* Save stamp of the first retransmit. */
2968 if (!tp->retrans_stamp)
2969 tp->retrans_stamp = tcp_skb_timestamp(skb);
2970
2971 } 2966 }
2972 2967
2968 /* Save stamp of the first (attempted) retransmit. */
2969 if (!tp->retrans_stamp)
2970 tp->retrans_stamp = tcp_skb_timestamp(skb);
2971
2973 if (tp->undo_retrans < 0) 2972 if (tp->undo_retrans < 0)
2974 tp->undo_retrans = 0; 2973 tp->undo_retrans = 0;
2975 tp->undo_retrans += tcp_skb_pcount(skb); 2974 tp->undo_retrans += tcp_skb_pcount(skb);
@@ -3750,7 +3749,7 @@ void tcp_send_probe0(struct sock *sk)
3750 struct inet_connection_sock *icsk = inet_csk(sk); 3749 struct inet_connection_sock *icsk = inet_csk(sk);
3751 struct tcp_sock *tp = tcp_sk(sk); 3750 struct tcp_sock *tp = tcp_sk(sk);
3752 struct net *net = sock_net(sk); 3751 struct net *net = sock_net(sk);
3753 unsigned long probe_max; 3752 unsigned long timeout;
3754 int err; 3753 int err;
3755 3754
3756 err = tcp_write_wakeup(sk, LINUX_MIB_TCPWINPROBE); 3755 err = tcp_write_wakeup(sk, LINUX_MIB_TCPWINPROBE);
@@ -3762,26 +3761,18 @@ void tcp_send_probe0(struct sock *sk)
3762 return; 3761 return;
3763 } 3762 }
3764 3763
3764 icsk->icsk_probes_out++;
3765 if (err <= 0) { 3765 if (err <= 0) {
3766 if (icsk->icsk_backoff < net->ipv4.sysctl_tcp_retries2) 3766 if (icsk->icsk_backoff < net->ipv4.sysctl_tcp_retries2)
3767 icsk->icsk_backoff++; 3767 icsk->icsk_backoff++;
3768 icsk->icsk_probes_out++; 3768 timeout = tcp_probe0_when(sk, TCP_RTO_MAX);
3769 probe_max = TCP_RTO_MAX;
3770 } else { 3769 } else {
3771 /* If packet was not sent due to local congestion, 3770 /* If packet was not sent due to local congestion,
3772 * do not backoff and do not remember icsk_probes_out. 3771 * Let senders fight for local resources conservatively.
3773 * Let local senders to fight for local resources.
3774 *
3775 * Use accumulated backoff yet.
3776 */ 3772 */
3777 if (!icsk->icsk_probes_out) 3773 timeout = TCP_RESOURCE_PROBE_INTERVAL;
3778 icsk->icsk_probes_out = 1; 3774 }
3779 probe_max = TCP_RESOURCE_PROBE_INTERVAL; 3775 tcp_reset_xmit_timer(sk, ICSK_TIME_PROBE0, timeout, TCP_RTO_MAX, NULL);
3780 }
3781 tcp_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
3782 tcp_probe0_when(sk, probe_max),
3783 TCP_RTO_MAX,
3784 NULL);
3785} 3776}
3786 3777
3787int tcp_rtx_synack(const struct sock *sk, struct request_sock *req) 3778int tcp_rtx_synack(const struct sock *sk, struct request_sock *req)
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 71a29e9c0620..d7399a89469d 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -22,28 +22,14 @@
22#include <linux/gfp.h> 22#include <linux/gfp.h>
23#include <net/tcp.h> 23#include <net/tcp.h>
24 24
25static u32 tcp_retransmit_stamp(const struct sock *sk)
26{
27 u32 start_ts = tcp_sk(sk)->retrans_stamp;
28
29 if (unlikely(!start_ts)) {
30 struct sk_buff *head = tcp_rtx_queue_head(sk);
31
32 if (!head)
33 return 0;
34 start_ts = tcp_skb_timestamp(head);
35 }
36 return start_ts;
37}
38
39static u32 tcp_clamp_rto_to_user_timeout(const struct sock *sk) 25static u32 tcp_clamp_rto_to_user_timeout(const struct sock *sk)
40{ 26{
41 struct inet_connection_sock *icsk = inet_csk(sk); 27 struct inet_connection_sock *icsk = inet_csk(sk);
42 u32 elapsed, start_ts; 28 u32 elapsed, start_ts;
43 s32 remaining; 29 s32 remaining;
44 30
45 start_ts = tcp_retransmit_stamp(sk); 31 start_ts = tcp_sk(sk)->retrans_stamp;
46 if (!icsk->icsk_user_timeout || !start_ts) 32 if (!icsk->icsk_user_timeout)
47 return icsk->icsk_rto; 33 return icsk->icsk_rto;
48 elapsed = tcp_time_stamp(tcp_sk(sk)) - start_ts; 34 elapsed = tcp_time_stamp(tcp_sk(sk)) - start_ts;
49 remaining = icsk->icsk_user_timeout - elapsed; 35 remaining = icsk->icsk_user_timeout - elapsed;
@@ -173,7 +159,20 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk)
173 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); 159 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
174} 160}
175 161
176 162static unsigned int tcp_model_timeout(struct sock *sk,
163 unsigned int boundary,
164 unsigned int rto_base)
165{
166 unsigned int linear_backoff_thresh, timeout;
167
168 linear_backoff_thresh = ilog2(TCP_RTO_MAX / rto_base);
169 if (boundary <= linear_backoff_thresh)
170 timeout = ((2 << boundary) - 1) * rto_base;
171 else
172 timeout = ((2 << linear_backoff_thresh) - 1) * rto_base +
173 (boundary - linear_backoff_thresh) * TCP_RTO_MAX;
174 return jiffies_to_msecs(timeout);
175}
177/** 176/**
178 * retransmits_timed_out() - returns true if this connection has timed out 177 * retransmits_timed_out() - returns true if this connection has timed out
179 * @sk: The current socket 178 * @sk: The current socket
@@ -191,26 +190,15 @@ static bool retransmits_timed_out(struct sock *sk,
191 unsigned int boundary, 190 unsigned int boundary,
192 unsigned int timeout) 191 unsigned int timeout)
193{ 192{
194 const unsigned int rto_base = TCP_RTO_MIN; 193 unsigned int start_ts;
195 unsigned int linear_backoff_thresh, start_ts;
196 194
197 if (!inet_csk(sk)->icsk_retransmits) 195 if (!inet_csk(sk)->icsk_retransmits)
198 return false; 196 return false;
199 197
200 start_ts = tcp_retransmit_stamp(sk); 198 start_ts = tcp_sk(sk)->retrans_stamp;
201 if (!start_ts) 199 if (likely(timeout == 0))
202 return false; 200 timeout = tcp_model_timeout(sk, boundary, TCP_RTO_MIN);
203
204 if (likely(timeout == 0)) {
205 linear_backoff_thresh = ilog2(TCP_RTO_MAX/rto_base);
206 201
207 if (boundary <= linear_backoff_thresh)
208 timeout = ((2 << boundary) - 1) * rto_base;
209 else
210 timeout = ((2 << linear_backoff_thresh) - 1) * rto_base +
211 (boundary - linear_backoff_thresh) * TCP_RTO_MAX;
212 timeout = jiffies_to_msecs(timeout);
213 }
214 return (s32)(tcp_time_stamp(tcp_sk(sk)) - start_ts - timeout) >= 0; 202 return (s32)(tcp_time_stamp(tcp_sk(sk)) - start_ts - timeout) >= 0;
215} 203}
216 204
@@ -345,7 +333,6 @@ static void tcp_probe_timer(struct sock *sk)
345 struct sk_buff *skb = tcp_send_head(sk); 333 struct sk_buff *skb = tcp_send_head(sk);
346 struct tcp_sock *tp = tcp_sk(sk); 334 struct tcp_sock *tp = tcp_sk(sk);
347 int max_probes; 335 int max_probes;
348 u32 start_ts;
349 336
350 if (tp->packets_out || !skb) { 337 if (tp->packets_out || !skb) {
351 icsk->icsk_probes_out = 0; 338 icsk->icsk_probes_out = 0;
@@ -360,12 +347,13 @@ static void tcp_probe_timer(struct sock *sk)
360 * corresponding system limit. We also implement similar policy when 347 * corresponding system limit. We also implement similar policy when
361 * we use RTO to probe window in tcp_retransmit_timer(). 348 * we use RTO to probe window in tcp_retransmit_timer().
362 */ 349 */
363 start_ts = tcp_skb_timestamp(skb); 350 if (icsk->icsk_user_timeout) {
364 if (!start_ts) 351 u32 elapsed = tcp_model_timeout(sk, icsk->icsk_probes_out,
365 skb->skb_mstamp_ns = tp->tcp_clock_cache; 352 tcp_probe0_base(sk));
366 else if (icsk->icsk_user_timeout && 353
367 (s32)(tcp_time_stamp(tp) - start_ts) > icsk->icsk_user_timeout) 354 if (elapsed >= icsk->icsk_user_timeout)
368 goto abort; 355 goto abort;
356 }
369 357
370 max_probes = sock_net(sk)->ipv4.sysctl_tcp_retries2; 358 max_probes = sock_net(sk)->ipv4.sysctl_tcp_retries2;
371 if (sock_flag(sk, SOCK_DEAD)) { 359 if (sock_flag(sk, SOCK_DEAD)) {
@@ -395,6 +383,7 @@ static void tcp_fastopen_synack_timer(struct sock *sk)
395 struct inet_connection_sock *icsk = inet_csk(sk); 383 struct inet_connection_sock *icsk = inet_csk(sk);
396 int max_retries = icsk->icsk_syn_retries ? : 384 int max_retries = icsk->icsk_syn_retries ? :
397 sock_net(sk)->ipv4.sysctl_tcp_synack_retries + 1; /* add one more retry for fastopen */ 385 sock_net(sk)->ipv4.sysctl_tcp_synack_retries + 1; /* add one more retry for fastopen */
386 struct tcp_sock *tp = tcp_sk(sk);
398 struct request_sock *req; 387 struct request_sock *req;
399 388
400 req = tcp_sk(sk)->fastopen_rsk; 389 req = tcp_sk(sk)->fastopen_rsk;
@@ -412,6 +401,8 @@ static void tcp_fastopen_synack_timer(struct sock *sk)
412 inet_rtx_syn_ack(sk, req); 401 inet_rtx_syn_ack(sk, req);
413 req->num_timeout++; 402 req->num_timeout++;
414 icsk->icsk_retransmits++; 403 icsk->icsk_retransmits++;
404 if (!tp->retrans_stamp)
405 tp->retrans_stamp = tcp_time_stamp(tp);
415 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, 406 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
416 TCP_TIMEOUT_INIT << req->num_timeout, TCP_RTO_MAX); 407 TCP_TIMEOUT_INIT << req->num_timeout, TCP_RTO_MAX);
417} 408}
@@ -443,10 +434,8 @@ void tcp_retransmit_timer(struct sock *sk)
443 */ 434 */
444 return; 435 return;
445 } 436 }
446 if (!tp->packets_out) 437 if (!tp->packets_out || WARN_ON_ONCE(tcp_rtx_queue_empty(sk)))
447 goto out; 438 return;
448
449 WARN_ON(tcp_rtx_queue_empty(sk));
450 439
451 tp->tlp_high_seq = 0; 440 tp->tlp_high_seq = 0;
452 441
@@ -511,14 +500,13 @@ void tcp_retransmit_timer(struct sock *sk)
511 500
512 tcp_enter_loss(sk); 501 tcp_enter_loss(sk);
513 502
503 icsk->icsk_retransmits++;
514 if (tcp_retransmit_skb(sk, tcp_rtx_queue_head(sk), 1) > 0) { 504 if (tcp_retransmit_skb(sk, tcp_rtx_queue_head(sk), 1) > 0) {
515 /* Retransmission failed because of local congestion, 505 /* Retransmission failed because of local congestion,
516 * do not backoff. 506 * Let senders fight for local resources conservatively.
517 */ 507 */
518 if (!icsk->icsk_retransmits)
519 icsk->icsk_retransmits = 1;
520 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, 508 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
521 min(icsk->icsk_rto, TCP_RESOURCE_PROBE_INTERVAL), 509 TCP_RESOURCE_PROBE_INTERVAL,
522 TCP_RTO_MAX); 510 TCP_RTO_MAX);
523 goto out; 511 goto out;
524 } 512 }
@@ -539,7 +527,6 @@ void tcp_retransmit_timer(struct sock *sk)
539 * the 120 second clamps though! 527 * the 120 second clamps though!
540 */ 528 */
541 icsk->icsk_backoff++; 529 icsk->icsk_backoff++;
542 icsk->icsk_retransmits++;
543 530
544out_reset_timer: 531out_reset_timer:
545 /* If stream is thin, use linear timeouts. Since 'icsk_backoff' is 532 /* If stream is thin, use linear timeouts. Since 'icsk_backoff' is
diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel.c
index be8b5b2157d8..e93cc0379201 100644
--- a/net/ipv4/udp_tunnel.c
+++ b/net/ipv4/udp_tunnel.c
@@ -21,18 +21,9 @@ int udp_sock_create4(struct net *net, struct udp_port_cfg *cfg,
21 goto error; 21 goto error;
22 22
23 if (cfg->bind_ifindex) { 23 if (cfg->bind_ifindex) {
24 struct net_device *dev; 24 err = kernel_setsockopt(sock, SOL_SOCKET, SO_BINDTOIFINDEX,
25 25 (void *)&cfg->bind_ifindex,
26 dev = dev_get_by_index(net, cfg->bind_ifindex); 26 sizeof(cfg->bind_ifindex));
27 if (!dev) {
28 err = -ENODEV;
29 goto error;
30 }
31
32 err = kernel_setsockopt(sock, SOL_SOCKET, SO_BINDTODEVICE,
33 dev->name, strlen(dev->name) + 1);
34 dev_put(dev);
35
36 if (err < 0) 27 if (err < 0)
37 goto error; 28 goto error;
38 } 29 }
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 93d5ad2b1a69..57198b3c86da 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -597,6 +597,43 @@ static const struct nla_policy devconf_ipv6_policy[NETCONFA_MAX+1] = {
597 [NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN] = { .len = sizeof(int) }, 597 [NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN] = { .len = sizeof(int) },
598}; 598};
599 599
600static int inet6_netconf_valid_get_req(struct sk_buff *skb,
601 const struct nlmsghdr *nlh,
602 struct nlattr **tb,
603 struct netlink_ext_ack *extack)
604{
605 int i, err;
606
607 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(struct netconfmsg))) {
608 NL_SET_ERR_MSG_MOD(extack, "Invalid header for netconf get request");
609 return -EINVAL;
610 }
611
612 if (!netlink_strict_get_check(skb))
613 return nlmsg_parse(nlh, sizeof(struct netconfmsg), tb,
614 NETCONFA_MAX, devconf_ipv6_policy, extack);
615
616 err = nlmsg_parse_strict(nlh, sizeof(struct netconfmsg), tb,
617 NETCONFA_MAX, devconf_ipv6_policy, extack);
618 if (err)
619 return err;
620
621 for (i = 0; i <= NETCONFA_MAX; i++) {
622 if (!tb[i])
623 continue;
624
625 switch (i) {
626 case NETCONFA_IFINDEX:
627 break;
628 default:
629 NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in netconf get request");
630 return -EINVAL;
631 }
632 }
633
634 return 0;
635}
636
600static int inet6_netconf_get_devconf(struct sk_buff *in_skb, 637static int inet6_netconf_get_devconf(struct sk_buff *in_skb,
601 struct nlmsghdr *nlh, 638 struct nlmsghdr *nlh,
602 struct netlink_ext_ack *extack) 639 struct netlink_ext_ack *extack)
@@ -605,14 +642,12 @@ static int inet6_netconf_get_devconf(struct sk_buff *in_skb,
605 struct nlattr *tb[NETCONFA_MAX+1]; 642 struct nlattr *tb[NETCONFA_MAX+1];
606 struct inet6_dev *in6_dev = NULL; 643 struct inet6_dev *in6_dev = NULL;
607 struct net_device *dev = NULL; 644 struct net_device *dev = NULL;
608 struct netconfmsg *ncm;
609 struct sk_buff *skb; 645 struct sk_buff *skb;
610 struct ipv6_devconf *devconf; 646 struct ipv6_devconf *devconf;
611 int ifindex; 647 int ifindex;
612 int err; 648 int err;
613 649
614 err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX, 650 err = inet6_netconf_valid_get_req(in_skb, nlh, tb, extack);
615 devconf_ipv6_policy, extack);
616 if (err < 0) 651 if (err < 0)
617 return err; 652 return err;
618 653
@@ -5179,6 +5214,52 @@ static int inet6_dump_ifacaddr(struct sk_buff *skb, struct netlink_callback *cb)
5179 return inet6_dump_addr(skb, cb, type); 5214 return inet6_dump_addr(skb, cb, type);
5180} 5215}
5181 5216
5217static int inet6_rtm_valid_getaddr_req(struct sk_buff *skb,
5218 const struct nlmsghdr *nlh,
5219 struct nlattr **tb,
5220 struct netlink_ext_ack *extack)
5221{
5222 struct ifaddrmsg *ifm;
5223 int i, err;
5224
5225 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) {
5226 NL_SET_ERR_MSG_MOD(extack, "Invalid header for get address request");
5227 return -EINVAL;
5228 }
5229
5230 ifm = nlmsg_data(nlh);
5231 if (ifm->ifa_prefixlen || ifm->ifa_flags || ifm->ifa_scope) {
5232 NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for get address request");
5233 return -EINVAL;
5234 }
5235
5236 if (!netlink_strict_get_check(skb))
5237 return nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX,
5238 ifa_ipv6_policy, extack);
5239
5240 err = nlmsg_parse_strict(nlh, sizeof(*ifm), tb, IFA_MAX,
5241 ifa_ipv6_policy, extack);
5242 if (err)
5243 return err;
5244
5245 for (i = 0; i <= IFA_MAX; i++) {
5246 if (!tb[i])
5247 continue;
5248
5249 switch (i) {
5250 case IFA_TARGET_NETNSID:
5251 case IFA_ADDRESS:
5252 case IFA_LOCAL:
5253 break;
5254 default:
5255 NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in get address request");
5256 return -EINVAL;
5257 }
5258 }
5259
5260 return 0;
5261}
5262
5182static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh, 5263static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh,
5183 struct netlink_ext_ack *extack) 5264 struct netlink_ext_ack *extack)
5184{ 5265{
@@ -5199,8 +5280,7 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh,
5199 struct sk_buff *skb; 5280 struct sk_buff *skb;
5200 int err; 5281 int err;
5201 5282
5202 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy, 5283 err = inet6_rtm_valid_getaddr_req(in_skb, nlh, tb, extack);
5203 extack);
5204 if (err < 0) 5284 if (err < 0)
5205 return err; 5285 return err;
5206 5286
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index 0d1ee82ee55b..d43d076c98f5 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -523,6 +523,50 @@ static inline int ip6addrlbl_msgsize(void)
523 + nla_total_size(4); /* IFAL_LABEL */ 523 + nla_total_size(4); /* IFAL_LABEL */
524} 524}
525 525
526static int ip6addrlbl_valid_get_req(struct sk_buff *skb,
527 const struct nlmsghdr *nlh,
528 struct nlattr **tb,
529 struct netlink_ext_ack *extack)
530{
531 struct ifaddrlblmsg *ifal;
532 int i, err;
533
534 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifal))) {
535 NL_SET_ERR_MSG_MOD(extack, "Invalid header for addrlabel get request");
536 return -EINVAL;
537 }
538
539 if (!netlink_strict_get_check(skb))
540 return nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX,
541 ifal_policy, extack);
542
543 ifal = nlmsg_data(nlh);
544 if (ifal->__ifal_reserved || ifal->ifal_flags || ifal->ifal_seq) {
545 NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for addrlabel get request");
546 return -EINVAL;
547 }
548
549 err = nlmsg_parse_strict(nlh, sizeof(*ifal), tb, IFAL_MAX,
550 ifal_policy, extack);
551 if (err)
552 return err;
553
554 for (i = 0; i <= IFAL_MAX; i++) {
555 if (!tb[i])
556 continue;
557
558 switch (i) {
559 case IFAL_ADDRESS:
560 break;
561 default:
562 NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in addrlabel get request");
563 return -EINVAL;
564 }
565 }
566
567 return 0;
568}
569
526static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr *nlh, 570static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr *nlh,
527 struct netlink_ext_ack *extack) 571 struct netlink_ext_ack *extack)
528{ 572{
@@ -535,8 +579,7 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr *nlh,
535 struct ip6addrlbl_entry *p; 579 struct ip6addrlbl_entry *p;
536 struct sk_buff *skb; 580 struct sk_buff *skb;
537 581
538 err = nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX, ifal_policy, 582 err = ip6addrlbl_valid_get_req(in_skb, nlh, tb, extack);
539 extack);
540 if (err < 0) 583 if (err < 0)
541 return err; 584 return err;
542 585
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index b1be67ca6768..c465d8a102f2 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -524,7 +524,7 @@ static int ip6gre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi)
524 return PACKET_REJECT; 524 return PACKET_REJECT;
525} 525}
526 526
527static int ip6erspan_rcv(struct sk_buff *skb, int gre_hdr_len, 527static int ip6erspan_rcv(struct sk_buff *skb,
528 struct tnl_ptk_info *tpi) 528 struct tnl_ptk_info *tpi)
529{ 529{
530 struct erspan_base_hdr *ershdr; 530 struct erspan_base_hdr *ershdr;
@@ -611,7 +611,7 @@ static int gre_rcv(struct sk_buff *skb)
611 611
612 if (unlikely(tpi.proto == htons(ETH_P_ERSPAN) || 612 if (unlikely(tpi.proto == htons(ETH_P_ERSPAN) ||
613 tpi.proto == htons(ETH_P_ERSPAN2))) { 613 tpi.proto == htons(ETH_P_ERSPAN2))) {
614 if (ip6erspan_rcv(skb, hdr_len, &tpi) == PACKET_RCVD) 614 if (ip6erspan_rcv(skb, &tpi) == PACKET_RCVD)
615 return 0; 615 return 0;
616 goto out; 616 goto out;
617 } 617 }
diff --git a/net/ipv6/ip6_udp_tunnel.c b/net/ipv6/ip6_udp_tunnel.c
index ad1a9ccd4b44..25430c991cea 100644
--- a/net/ipv6/ip6_udp_tunnel.c
+++ b/net/ipv6/ip6_udp_tunnel.c
@@ -32,18 +32,9 @@ int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg,
32 goto error; 32 goto error;
33 } 33 }
34 if (cfg->bind_ifindex) { 34 if (cfg->bind_ifindex) {
35 struct net_device *dev; 35 err = kernel_setsockopt(sock, SOL_SOCKET, SO_BINDTOIFINDEX,
36 36 (void *)&cfg->bind_ifindex,
37 dev = dev_get_by_index(net, cfg->bind_ifindex); 37 sizeof(cfg->bind_ifindex));
38 if (!dev) {
39 err = -ENODEV;
40 goto error;
41 }
42
43 err = kernel_setsockopt(sock, SOL_SOCKET, SO_BINDTODEVICE,
44 dev->name, strlen(dev->name) + 1);
45 dev_put(dev);
46
47 if (err < 0) 38 if (err < 0)
48 goto error; 39 goto error;
49 } 40 }
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 964491cf3672..dc066fdf7e46 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -4812,6 +4812,73 @@ int rt6_dump_route(struct fib6_info *rt, void *p_arg)
4812 arg->cb->nlh->nlmsg_seq, flags); 4812 arg->cb->nlh->nlmsg_seq, flags);
4813} 4813}
4814 4814
4815static int inet6_rtm_valid_getroute_req(struct sk_buff *skb,
4816 const struct nlmsghdr *nlh,
4817 struct nlattr **tb,
4818 struct netlink_ext_ack *extack)
4819{
4820 struct rtmsg *rtm;
4821 int i, err;
4822
4823 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) {
4824 NL_SET_ERR_MSG_MOD(extack,
4825 "Invalid header for get route request");
4826 return -EINVAL;
4827 }
4828
4829 if (!netlink_strict_get_check(skb))
4830 return nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX,
4831 rtm_ipv6_policy, extack);
4832
4833 rtm = nlmsg_data(nlh);
4834 if ((rtm->rtm_src_len && rtm->rtm_src_len != 128) ||
4835 (rtm->rtm_dst_len && rtm->rtm_dst_len != 128) ||
4836 rtm->rtm_table || rtm->rtm_protocol || rtm->rtm_scope ||
4837 rtm->rtm_type) {
4838 NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for get route request");
4839 return -EINVAL;
4840 }
4841 if (rtm->rtm_flags & ~RTM_F_FIB_MATCH) {
4842 NL_SET_ERR_MSG_MOD(extack,
4843 "Invalid flags for get route request");
4844 return -EINVAL;
4845 }
4846
4847 err = nlmsg_parse_strict(nlh, sizeof(*rtm), tb, RTA_MAX,
4848 rtm_ipv6_policy, extack);
4849 if (err)
4850 return err;
4851
4852 if ((tb[RTA_SRC] && !rtm->rtm_src_len) ||
4853 (tb[RTA_DST] && !rtm->rtm_dst_len)) {
4854 NL_SET_ERR_MSG_MOD(extack, "rtm_src_len and rtm_dst_len must be 128 for IPv6");
4855 return -EINVAL;
4856 }
4857
4858 for (i = 0; i <= RTA_MAX; i++) {
4859 if (!tb[i])
4860 continue;
4861
4862 switch (i) {
4863 case RTA_SRC:
4864 case RTA_DST:
4865 case RTA_IIF:
4866 case RTA_OIF:
4867 case RTA_MARK:
4868 case RTA_UID:
4869 case RTA_SPORT:
4870 case RTA_DPORT:
4871 case RTA_IP_PROTO:
4872 break;
4873 default:
4874 NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in get route request");
4875 return -EINVAL;
4876 }
4877 }
4878
4879 return 0;
4880}
4881
4815static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, 4882static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
4816 struct netlink_ext_ack *extack) 4883 struct netlink_ext_ack *extack)
4817{ 4884{
@@ -4826,8 +4893,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
4826 struct flowi6 fl6 = {}; 4893 struct flowi6 fl6 = {};
4827 bool fibmatch; 4894 bool fibmatch;
4828 4895
4829 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy, 4896 err = inet6_rtm_valid_getroute_req(in_skb, nlh, tb, extack);
4830 extack);
4831 if (err < 0) 4897 if (err < 0)
4832 goto errout; 4898 goto errout;
4833 4899
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index 7d55d4c04088..2662a23c658e 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -1209,21 +1209,57 @@ static const struct nla_policy devconf_mpls_policy[NETCONFA_MAX + 1] = {
1209 [NETCONFA_IFINDEX] = { .len = sizeof(int) }, 1209 [NETCONFA_IFINDEX] = { .len = sizeof(int) },
1210}; 1210};
1211 1211
1212static int mpls_netconf_valid_get_req(struct sk_buff *skb,
1213 const struct nlmsghdr *nlh,
1214 struct nlattr **tb,
1215 struct netlink_ext_ack *extack)
1216{
1217 int i, err;
1218
1219 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(struct netconfmsg))) {
1220 NL_SET_ERR_MSG_MOD(extack,
1221 "Invalid header for netconf get request");
1222 return -EINVAL;
1223 }
1224
1225 if (!netlink_strict_get_check(skb))
1226 return nlmsg_parse(nlh, sizeof(struct netconfmsg), tb,
1227 NETCONFA_MAX, devconf_mpls_policy, extack);
1228
1229 err = nlmsg_parse_strict(nlh, sizeof(struct netconfmsg), tb,
1230 NETCONFA_MAX, devconf_mpls_policy, extack);
1231 if (err)
1232 return err;
1233
1234 for (i = 0; i <= NETCONFA_MAX; i++) {
1235 if (!tb[i])
1236 continue;
1237
1238 switch (i) {
1239 case NETCONFA_IFINDEX:
1240 break;
1241 default:
1242 NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in netconf get request");
1243 return -EINVAL;
1244 }
1245 }
1246
1247 return 0;
1248}
1249
1212static int mpls_netconf_get_devconf(struct sk_buff *in_skb, 1250static int mpls_netconf_get_devconf(struct sk_buff *in_skb,
1213 struct nlmsghdr *nlh, 1251 struct nlmsghdr *nlh,
1214 struct netlink_ext_ack *extack) 1252 struct netlink_ext_ack *extack)
1215{ 1253{
1216 struct net *net = sock_net(in_skb->sk); 1254 struct net *net = sock_net(in_skb->sk);
1217 struct nlattr *tb[NETCONFA_MAX + 1]; 1255 struct nlattr *tb[NETCONFA_MAX + 1];
1218 struct netconfmsg *ncm;
1219 struct net_device *dev; 1256 struct net_device *dev;
1220 struct mpls_dev *mdev; 1257 struct mpls_dev *mdev;
1221 struct sk_buff *skb; 1258 struct sk_buff *skb;
1222 int ifindex; 1259 int ifindex;
1223 int err; 1260 int err;
1224 1261
1225 err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX, 1262 err = mpls_netconf_valid_get_req(in_skb, nlh, tb, extack);
1226 devconf_mpls_policy, extack);
1227 if (err < 0) 1263 if (err < 0)
1228 goto errout; 1264 goto errout;
1229 1265
@@ -2236,6 +2272,64 @@ errout:
2236 rtnl_set_sk_err(net, RTNLGRP_MPLS_ROUTE, err); 2272 rtnl_set_sk_err(net, RTNLGRP_MPLS_ROUTE, err);
2237} 2273}
2238 2274
2275static int mpls_valid_getroute_req(struct sk_buff *skb,
2276 const struct nlmsghdr *nlh,
2277 struct nlattr **tb,
2278 struct netlink_ext_ack *extack)
2279{
2280 struct rtmsg *rtm;
2281 int i, err;
2282
2283 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) {
2284 NL_SET_ERR_MSG_MOD(extack,
2285 "Invalid header for get route request");
2286 return -EINVAL;
2287 }
2288
2289 if (!netlink_strict_get_check(skb))
2290 return nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX,
2291 rtm_mpls_policy, extack);
2292
2293 rtm = nlmsg_data(nlh);
2294 if ((rtm->rtm_dst_len && rtm->rtm_dst_len != 20) ||
2295 rtm->rtm_src_len || rtm->rtm_tos || rtm->rtm_table ||
2296 rtm->rtm_protocol || rtm->rtm_scope || rtm->rtm_type) {
2297 NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for get route request");
2298 return -EINVAL;
2299 }
2300 if (rtm->rtm_flags & ~RTM_F_FIB_MATCH) {
2301 NL_SET_ERR_MSG_MOD(extack,
2302 "Invalid flags for get route request");
2303 return -EINVAL;
2304 }
2305
2306 err = nlmsg_parse_strict(nlh, sizeof(*rtm), tb, RTA_MAX,
2307 rtm_mpls_policy, extack);
2308 if (err)
2309 return err;
2310
2311 if ((tb[RTA_DST] || tb[RTA_NEWDST]) && !rtm->rtm_dst_len) {
2312 NL_SET_ERR_MSG_MOD(extack, "rtm_dst_len must be 20 for MPLS");
2313 return -EINVAL;
2314 }
2315
2316 for (i = 0; i <= RTA_MAX; i++) {
2317 if (!tb[i])
2318 continue;
2319
2320 switch (i) {
2321 case RTA_DST:
2322 case RTA_NEWDST:
2323 break;
2324 default:
2325 NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in get route request");
2326 return -EINVAL;
2327 }
2328 }
2329
2330 return 0;
2331}
2332
2239static int mpls_getroute(struct sk_buff *in_skb, struct nlmsghdr *in_nlh, 2333static int mpls_getroute(struct sk_buff *in_skb, struct nlmsghdr *in_nlh,
2240 struct netlink_ext_ack *extack) 2334 struct netlink_ext_ack *extack)
2241{ 2335{
@@ -2255,8 +2349,7 @@ static int mpls_getroute(struct sk_buff *in_skb, struct nlmsghdr *in_nlh,
2255 u8 n_labels; 2349 u8 n_labels;
2256 int err; 2350 int err;
2257 2351
2258 err = nlmsg_parse(in_nlh, sizeof(*rtm), tb, RTA_MAX, 2352 err = mpls_valid_getroute_req(in_skb, in_nlh, tb, extack);
2259 rtm_mpls_policy, extack);
2260 if (err < 0) 2353 if (err < 0)
2261 goto errout; 2354 goto errout;
2262 2355
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 3c023d6120f6..8fa35df94c07 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1371,6 +1371,14 @@ int netlink_has_listeners(struct sock *sk, unsigned int group)
1371} 1371}
1372EXPORT_SYMBOL_GPL(netlink_has_listeners); 1372EXPORT_SYMBOL_GPL(netlink_has_listeners);
1373 1373
1374bool netlink_strict_get_check(struct sk_buff *skb)
1375{
1376 const struct netlink_sock *nlk = nlk_sk(NETLINK_CB(skb).sk);
1377
1378 return nlk->flags & NETLINK_F_STRICT_CHK;
1379}
1380EXPORT_SYMBOL_GPL(netlink_strict_get_check);
1381
1374static int netlink_broadcast_deliver(struct sock *sk, struct sk_buff *skb) 1382static int netlink_broadcast_deliver(struct sock *sk, struct sk_buff *skb)
1375{ 1383{
1376 struct netlink_sock *nlk = nlk_sk(sk); 1384 struct netlink_sock *nlk = nlk_sk(sk);
diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c
index c038e021a591..43849d752a1e 100644
--- a/net/openvswitch/meter.c
+++ b/net/openvswitch/meter.c
@@ -206,8 +206,7 @@ static struct dp_meter *dp_meter_create(struct nlattr **a)
206 return ERR_PTR(-EINVAL); 206 return ERR_PTR(-EINVAL);
207 207
208 /* Allocate and set up the meter before locking anything. */ 208 /* Allocate and set up the meter before locking anything. */
209 meter = kzalloc(n_bands * sizeof(struct dp_meter_band) + 209 meter = kzalloc(struct_size(meter, bands, n_bands), GFP_KERNEL);
210 sizeof(*meter), GFP_KERNEL);
211 if (!meter) 210 if (!meter)
212 return ERR_PTR(-ENOMEM); 211 return ERR_PTR(-ENOMEM);
213 212
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index 6a5dce8baf19..4a57fec6f306 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -18,6 +18,7 @@
18#include <linux/rtnetlink.h> 18#include <linux/rtnetlink.h>
19#include <linux/skbuff.h> 19#include <linux/skbuff.h>
20#include <linux/idr.h> 20#include <linux/idr.h>
21#include <linux/percpu.h>
21#include <net/netlink.h> 22#include <net/netlink.h>
22#include <net/act_api.h> 23#include <net/act_api.h>
23#include <net/pkt_cls.h> 24#include <net/pkt_cls.h>
@@ -35,6 +36,7 @@ struct basic_filter {
35 struct tcf_result res; 36 struct tcf_result res;
36 struct tcf_proto *tp; 37 struct tcf_proto *tp;
37 struct list_head link; 38 struct list_head link;
39 struct tc_basic_pcnt __percpu *pf;
38 struct rcu_work rwork; 40 struct rcu_work rwork;
39}; 41};
40 42
@@ -46,8 +48,10 @@ static int basic_classify(struct sk_buff *skb, const struct tcf_proto *tp,
46 struct basic_filter *f; 48 struct basic_filter *f;
47 49
48 list_for_each_entry_rcu(f, &head->flist, link) { 50 list_for_each_entry_rcu(f, &head->flist, link) {
51 __this_cpu_inc(f->pf->rcnt);
49 if (!tcf_em_tree_match(skb, &f->ematches, NULL)) 52 if (!tcf_em_tree_match(skb, &f->ematches, NULL))
50 continue; 53 continue;
54 __this_cpu_inc(f->pf->rhit);
51 *res = f->res; 55 *res = f->res;
52 r = tcf_exts_exec(skb, &f->exts, res); 56 r = tcf_exts_exec(skb, &f->exts, res);
53 if (r < 0) 57 if (r < 0)
@@ -89,6 +93,7 @@ static void __basic_delete_filter(struct basic_filter *f)
89 tcf_exts_destroy(&f->exts); 93 tcf_exts_destroy(&f->exts);
90 tcf_em_tree_destroy(&f->ematches); 94 tcf_em_tree_destroy(&f->ematches);
91 tcf_exts_put_net(&f->exts); 95 tcf_exts_put_net(&f->exts);
96 free_percpu(f->pf);
92 kfree(f); 97 kfree(f);
93} 98}
94 99
@@ -208,6 +213,11 @@ static int basic_change(struct net *net, struct sk_buff *in_skb,
208 if (err) 213 if (err)
209 goto errout; 214 goto errout;
210 fnew->handle = handle; 215 fnew->handle = handle;
216 fnew->pf = alloc_percpu(struct tc_basic_pcnt);
217 if (!fnew->pf) {
218 err = -ENOMEM;
219 goto errout;
220 }
211 221
212 err = basic_set_parms(net, tp, fnew, base, tb, tca[TCA_RATE], ovr, 222 err = basic_set_parms(net, tp, fnew, base, tb, tca[TCA_RATE], ovr,
213 extack); 223 extack);
@@ -231,6 +241,7 @@ static int basic_change(struct net *net, struct sk_buff *in_skb,
231 241
232 return 0; 242 return 0;
233errout: 243errout:
244 free_percpu(fnew->pf);
234 tcf_exts_destroy(&fnew->exts); 245 tcf_exts_destroy(&fnew->exts);
235 kfree(fnew); 246 kfree(fnew);
236 return err; 247 return err;
@@ -265,8 +276,10 @@ static void basic_bind_class(void *fh, u32 classid, unsigned long cl)
265static int basic_dump(struct net *net, struct tcf_proto *tp, void *fh, 276static int basic_dump(struct net *net, struct tcf_proto *tp, void *fh,
266 struct sk_buff *skb, struct tcmsg *t) 277 struct sk_buff *skb, struct tcmsg *t)
267{ 278{
279 struct tc_basic_pcnt gpf = {};
268 struct basic_filter *f = fh; 280 struct basic_filter *f = fh;
269 struct nlattr *nest; 281 struct nlattr *nest;
282 int cpu;
270 283
271 if (f == NULL) 284 if (f == NULL)
272 return skb->len; 285 return skb->len;
@@ -281,6 +294,18 @@ static int basic_dump(struct net *net, struct tcf_proto *tp, void *fh,
281 nla_put_u32(skb, TCA_BASIC_CLASSID, f->res.classid)) 294 nla_put_u32(skb, TCA_BASIC_CLASSID, f->res.classid))
282 goto nla_put_failure; 295 goto nla_put_failure;
283 296
297 for_each_possible_cpu(cpu) {
298 struct tc_basic_pcnt *pf = per_cpu_ptr(f->pf, cpu);
299
300 gpf.rcnt += pf->rcnt;
301 gpf.rhit += pf->rhit;
302 }
303
304 if (nla_put_64bit(skb, TCA_BASIC_PCNT,
305 sizeof(struct tc_basic_pcnt),
306 &gpf, TCA_BASIC_PAD))
307 goto nla_put_failure;
308
284 if (tcf_exts_dump(skb, &f->exts) < 0 || 309 if (tcf_exts_dump(skb, &f->exts) < 0 ||
285 tcf_em_tree_dump(skb, &f->ematches, TCA_BASIC_EMATCHES) < 0) 310 tcf_em_tree_dump(skb, &f->ematches, TCA_BASIC_EMATCHES) < 0)
286 goto nla_put_failure; 311 goto nla_put_failure;
diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
index 0e408ee9dcec..a1b803fd372e 100644
--- a/net/sched/cls_matchall.c
+++ b/net/sched/cls_matchall.c
@@ -12,6 +12,7 @@
12#include <linux/kernel.h> 12#include <linux/kernel.h>
13#include <linux/init.h> 13#include <linux/init.h>
14#include <linux/module.h> 14#include <linux/module.h>
15#include <linux/percpu.h>
15 16
16#include <net/sch_generic.h> 17#include <net/sch_generic.h>
17#include <net/pkt_cls.h> 18#include <net/pkt_cls.h>
@@ -22,6 +23,7 @@ struct cls_mall_head {
22 u32 handle; 23 u32 handle;
23 u32 flags; 24 u32 flags;
24 unsigned int in_hw_count; 25 unsigned int in_hw_count;
26 struct tc_matchall_pcnt __percpu *pf;
25 struct rcu_work rwork; 27 struct rcu_work rwork;
26}; 28};
27 29
@@ -34,6 +36,7 @@ static int mall_classify(struct sk_buff *skb, const struct tcf_proto *tp,
34 return -1; 36 return -1;
35 37
36 *res = head->res; 38 *res = head->res;
39 __this_cpu_inc(head->pf->rhit);
37 return tcf_exts_exec(skb, &head->exts, res); 40 return tcf_exts_exec(skb, &head->exts, res);
38} 41}
39 42
@@ -46,6 +49,7 @@ static void __mall_destroy(struct cls_mall_head *head)
46{ 49{
47 tcf_exts_destroy(&head->exts); 50 tcf_exts_destroy(&head->exts);
48 tcf_exts_put_net(&head->exts); 51 tcf_exts_put_net(&head->exts);
52 free_percpu(head->pf);
49 kfree(head); 53 kfree(head);
50} 54}
51 55
@@ -192,6 +196,11 @@ static int mall_change(struct net *net, struct sk_buff *in_skb,
192 handle = 1; 196 handle = 1;
193 new->handle = handle; 197 new->handle = handle;
194 new->flags = flags; 198 new->flags = flags;
199 new->pf = alloc_percpu(struct tc_matchall_pcnt);
200 if (!new->pf) {
201 err = -ENOMEM;
202 goto err_alloc_percpu;
203 }
195 204
196 err = mall_set_parms(net, tp, new, base, tb, tca[TCA_RATE], ovr, 205 err = mall_set_parms(net, tp, new, base, tb, tca[TCA_RATE], ovr,
197 extack); 206 extack);
@@ -214,6 +223,8 @@ static int mall_change(struct net *net, struct sk_buff *in_skb,
214 223
215err_replace_hw_filter: 224err_replace_hw_filter:
216err_set_parms: 225err_set_parms:
226 free_percpu(new->pf);
227err_alloc_percpu:
217 tcf_exts_destroy(&new->exts); 228 tcf_exts_destroy(&new->exts);
218err_exts_init: 229err_exts_init:
219 kfree(new); 230 kfree(new);
@@ -270,8 +281,10 @@ static int mall_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb,
270static int mall_dump(struct net *net, struct tcf_proto *tp, void *fh, 281static int mall_dump(struct net *net, struct tcf_proto *tp, void *fh,
271 struct sk_buff *skb, struct tcmsg *t) 282 struct sk_buff *skb, struct tcmsg *t)
272{ 283{
284 struct tc_matchall_pcnt gpf = {};
273 struct cls_mall_head *head = fh; 285 struct cls_mall_head *head = fh;
274 struct nlattr *nest; 286 struct nlattr *nest;
287 int cpu;
275 288
276 if (!head) 289 if (!head)
277 return skb->len; 290 return skb->len;
@@ -289,6 +302,17 @@ static int mall_dump(struct net *net, struct tcf_proto *tp, void *fh,
289 if (head->flags && nla_put_u32(skb, TCA_MATCHALL_FLAGS, head->flags)) 302 if (head->flags && nla_put_u32(skb, TCA_MATCHALL_FLAGS, head->flags))
290 goto nla_put_failure; 303 goto nla_put_failure;
291 304
305 for_each_possible_cpu(cpu) {
306 struct tc_matchall_pcnt *pf = per_cpu_ptr(head->pf, cpu);
307
308 gpf.rhit += pf->rhit;
309 }
310
311 if (nla_put_64bit(skb, TCA_MATCHALL_PCNT,
312 sizeof(struct tc_matchall_pcnt),
313 &gpf, TCA_MATCHALL_PAD))
314 goto nla_put_failure;
315
292 if (tcf_exts_dump(skb, &head->exts)) 316 if (tcf_exts_dump(skb, &head->exts))
293 goto nla_put_failure; 317 goto nla_put_failure;
294 318
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 7e4d1ccf4c87..03e26e8d0ec9 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -758,8 +758,7 @@ static u32 qdisc_alloc_handle(struct net_device *dev)
758 return 0; 758 return 0;
759} 759}
760 760
761void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n, 761void qdisc_tree_reduce_backlog(struct Qdisc *sch, int n, int len)
762 unsigned int len)
763{ 762{
764 bool qdisc_is_offloaded = sch->flags & TCQ_F_OFFLOADED; 763 bool qdisc_is_offloaded = sch->flags & TCQ_F_OFFLOADED;
765 const struct Qdisc_class_ops *cops; 764 const struct Qdisc_class_ops *cops;
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 5df9d1138ac9..cd78253de31d 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -556,10 +556,11 @@ EXPORT_SYMBOL_GPL(unregister_switchdev_notifier);
556 * Call all network notifier blocks. 556 * Call all network notifier blocks.
557 */ 557 */
558int call_switchdev_notifiers(unsigned long val, struct net_device *dev, 558int call_switchdev_notifiers(unsigned long val, struct net_device *dev,
559 struct switchdev_notifier_info *info) 559 struct switchdev_notifier_info *info,
560 struct netlink_ext_ack *extack)
560{ 561{
561 info->dev = dev; 562 info->dev = dev;
562 info->extack = NULL; 563 info->extack = extack;
563 return atomic_notifier_call_chain(&switchdev_notif_chain, val, info); 564 return atomic_notifier_call_chain(&switchdev_notif_chain, val, info);
564} 565}
565EXPORT_SYMBOL_GPL(call_switchdev_notifiers); 566EXPORT_SYMBOL_GPL(call_switchdev_notifiers);
diff --git a/net/tipc/trace.c b/net/tipc/trace.c
index 964823841efe..265f6a26aa3d 100644
--- a/net/tipc/trace.c
+++ b/net/tipc/trace.c
@@ -111,7 +111,7 @@ int tipc_skb_dump(struct sk_buff *skb, bool more, char *buf)
111 break; 111 break;
112 default: 112 default:
113 break; 113 break;
114 }; 114 }
115 i += scnprintf(buf + i, sz - i, " | %u", 115 i += scnprintf(buf + i, sz - i, " | %u",
116 msg_src_droppable(hdr)); 116 msg_src_droppable(hdr));
117 i += scnprintf(buf + i, sz - i, " %u", 117 i += scnprintf(buf + i, sz - i, " %u",
@@ -122,7 +122,7 @@ int tipc_skb_dump(struct sk_buff *skb, bool more, char *buf)
122 default: 122 default:
123 /* need more? */ 123 /* need more? */
124 break; 124 break;
125 }; 125 }
126 126
127 i += scnprintf(buf + i, sz - i, "\n"); 127 i += scnprintf(buf + i, sz - i, "\n");
128 if (!more) 128 if (!more)
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 11cdc8f7db63..86b9527c4826 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -124,6 +124,7 @@ static void tls_decrypt_done(struct crypto_async_request *req, int err)
124{ 124{
125 struct aead_request *aead_req = (struct aead_request *)req; 125 struct aead_request *aead_req = (struct aead_request *)req;
126 struct scatterlist *sgout = aead_req->dst; 126 struct scatterlist *sgout = aead_req->dst;
127 struct scatterlist *sgin = aead_req->src;
127 struct tls_sw_context_rx *ctx; 128 struct tls_sw_context_rx *ctx;
128 struct tls_context *tls_ctx; 129 struct tls_context *tls_ctx;
129 struct scatterlist *sg; 130 struct scatterlist *sg;
@@ -134,12 +135,16 @@ static void tls_decrypt_done(struct crypto_async_request *req, int err)
134 skb = (struct sk_buff *)req->data; 135 skb = (struct sk_buff *)req->data;
135 tls_ctx = tls_get_ctx(skb->sk); 136 tls_ctx = tls_get_ctx(skb->sk);
136 ctx = tls_sw_ctx_rx(tls_ctx); 137 ctx = tls_sw_ctx_rx(tls_ctx);
137 pending = atomic_dec_return(&ctx->decrypt_pending);
138 138
139 /* Propagate if there was an err */ 139 /* Propagate if there was an err */
140 if (err) { 140 if (err) {
141 ctx->async_wait.err = err; 141 ctx->async_wait.err = err;
142 tls_err_abort(skb->sk, err); 142 tls_err_abort(skb->sk, err);
143 } else {
144 struct strp_msg *rxm = strp_msg(skb);
145
146 rxm->offset += tls_ctx->rx.prepend_size;
147 rxm->full_len -= tls_ctx->rx.overhead_size;
143 } 148 }
144 149
145 /* After using skb->sk to propagate sk through crypto async callback 150 /* After using skb->sk to propagate sk through crypto async callback
@@ -147,18 +152,21 @@ static void tls_decrypt_done(struct crypto_async_request *req, int err)
147 */ 152 */
148 skb->sk = NULL; 153 skb->sk = NULL;
149 154
150 /* Release the skb, pages and memory allocated for crypto req */
151 kfree_skb(skb);
152 155
153 /* Skip the first S/G entry as it points to AAD */ 156 /* Free the destination pages if skb was not decrypted inplace */
154 for_each_sg(sg_next(sgout), sg, UINT_MAX, pages) { 157 if (sgout != sgin) {
155 if (!sg) 158 /* Skip the first S/G entry as it points to AAD */
156 break; 159 for_each_sg(sg_next(sgout), sg, UINT_MAX, pages) {
157 put_page(sg_page(sg)); 160 if (!sg)
161 break;
162 put_page(sg_page(sg));
163 }
158 } 164 }
159 165
160 kfree(aead_req); 166 kfree(aead_req);
161 167
168 pending = atomic_dec_return(&ctx->decrypt_pending);
169
162 if (!pending && READ_ONCE(ctx->async_notify)) 170 if (!pending && READ_ONCE(ctx->async_notify))
163 complete(&ctx->async_wait.completion); 171 complete(&ctx->async_wait.completion);
164} 172}
@@ -1020,8 +1028,8 @@ send_end:
1020 return copied ? copied : ret; 1028 return copied ? copied : ret;
1021} 1029}
1022 1030
1023int tls_sw_do_sendpage(struct sock *sk, struct page *page, 1031static int tls_sw_do_sendpage(struct sock *sk, struct page *page,
1024 int offset, size_t size, int flags) 1032 int offset, size_t size, int flags)
1025{ 1033{
1026 long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); 1034 long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
1027 struct tls_context *tls_ctx = tls_get_ctx(sk); 1035 struct tls_context *tls_ctx = tls_get_ctx(sk);
@@ -1143,16 +1151,6 @@ sendpage_end:
1143 return copied ? copied : ret; 1151 return copied ? copied : ret;
1144} 1152}
1145 1153
1146int tls_sw_sendpage_locked(struct sock *sk, struct page *page,
1147 int offset, size_t size, int flags)
1148{
1149 if (flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL |
1150 MSG_SENDPAGE_NOTLAST | MSG_SENDPAGE_NOPOLICY))
1151 return -ENOTSUPP;
1152
1153 return tls_sw_do_sendpage(sk, page, offset, size, flags);
1154}
1155
1156int tls_sw_sendpage(struct sock *sk, struct page *page, 1154int tls_sw_sendpage(struct sock *sk, struct page *page,
1157 int offset, size_t size, int flags) 1155 int offset, size_t size, int flags)
1158{ 1156{
@@ -1281,7 +1279,7 @@ out:
1281static int decrypt_internal(struct sock *sk, struct sk_buff *skb, 1279static int decrypt_internal(struct sock *sk, struct sk_buff *skb,
1282 struct iov_iter *out_iov, 1280 struct iov_iter *out_iov,
1283 struct scatterlist *out_sg, 1281 struct scatterlist *out_sg,
1284 int *chunk, bool *zc) 1282 int *chunk, bool *zc, bool async)
1285{ 1283{
1286 struct tls_context *tls_ctx = tls_get_ctx(sk); 1284 struct tls_context *tls_ctx = tls_get_ctx(sk);
1287 struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); 1285 struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
@@ -1381,13 +1379,13 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb,
1381fallback_to_reg_recv: 1379fallback_to_reg_recv:
1382 sgout = sgin; 1380 sgout = sgin;
1383 pages = 0; 1381 pages = 0;
1384 *chunk = 0; 1382 *chunk = data_len;
1385 *zc = false; 1383 *zc = false;
1386 } 1384 }
1387 1385
1388 /* Prepare and submit AEAD request */ 1386 /* Prepare and submit AEAD request */
1389 err = tls_do_decryption(sk, skb, sgin, sgout, iv, 1387 err = tls_do_decryption(sk, skb, sgin, sgout, iv,
1390 data_len, aead_req, *zc); 1388 data_len, aead_req, async);
1391 if (err == -EINPROGRESS) 1389 if (err == -EINPROGRESS)
1392 return err; 1390 return err;
1393 1391
@@ -1400,7 +1398,8 @@ fallback_to_reg_recv:
1400} 1398}
1401 1399
1402static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb, 1400static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb,
1403 struct iov_iter *dest, int *chunk, bool *zc) 1401 struct iov_iter *dest, int *chunk, bool *zc,
1402 bool async)
1404{ 1403{
1405 struct tls_context *tls_ctx = tls_get_ctx(sk); 1404 struct tls_context *tls_ctx = tls_get_ctx(sk);
1406 struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); 1405 struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
@@ -1413,7 +1412,7 @@ static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb,
1413 return err; 1412 return err;
1414#endif 1413#endif
1415 if (!ctx->decrypted) { 1414 if (!ctx->decrypted) {
1416 err = decrypt_internal(sk, skb, dest, NULL, chunk, zc); 1415 err = decrypt_internal(sk, skb, dest, NULL, chunk, zc, async);
1417 if (err < 0) { 1416 if (err < 0) {
1418 if (err == -EINPROGRESS) 1417 if (err == -EINPROGRESS)
1419 tls_advance_record_sn(sk, &tls_ctx->rx); 1418 tls_advance_record_sn(sk, &tls_ctx->rx);
@@ -1439,7 +1438,7 @@ int decrypt_skb(struct sock *sk, struct sk_buff *skb,
1439 bool zc = true; 1438 bool zc = true;
1440 int chunk; 1439 int chunk;
1441 1440
1442 return decrypt_internal(sk, skb, NULL, sgout, &chunk, &zc); 1441 return decrypt_internal(sk, skb, NULL, sgout, &chunk, &zc, false);
1443} 1442}
1444 1443
1445static bool tls_sw_advance_skb(struct sock *sk, struct sk_buff *skb, 1444static bool tls_sw_advance_skb(struct sock *sk, struct sk_buff *skb,
@@ -1466,6 +1465,77 @@ static bool tls_sw_advance_skb(struct sock *sk, struct sk_buff *skb,
1466 return true; 1465 return true;
1467} 1466}
1468 1467
1468/* This function traverses the rx_list in tls receive context to copies the
1469 * decrypted data records into the buffer provided by caller zero copy is not
1470 * true. Further, the records are removed from the rx_list if it is not a peek
1471 * case and the record has been consumed completely.
1472 */
1473static int process_rx_list(struct tls_sw_context_rx *ctx,
1474 struct msghdr *msg,
1475 size_t skip,
1476 size_t len,
1477 bool zc,
1478 bool is_peek)
1479{
1480 struct sk_buff *skb = skb_peek(&ctx->rx_list);
1481 ssize_t copied = 0;
1482
1483 while (skip && skb) {
1484 struct strp_msg *rxm = strp_msg(skb);
1485
1486 if (skip < rxm->full_len)
1487 break;
1488
1489 skip = skip - rxm->full_len;
1490 skb = skb_peek_next(skb, &ctx->rx_list);
1491 }
1492
1493 while (len && skb) {
1494 struct sk_buff *next_skb;
1495 struct strp_msg *rxm = strp_msg(skb);
1496 int chunk = min_t(unsigned int, rxm->full_len - skip, len);
1497
1498 if (!zc || (rxm->full_len - skip) > len) {
1499 int err = skb_copy_datagram_msg(skb, rxm->offset + skip,
1500 msg, chunk);
1501 if (err < 0)
1502 return err;
1503 }
1504
1505 len = len - chunk;
1506 copied = copied + chunk;
1507
1508 /* Consume the data from record if it is non-peek case*/
1509 if (!is_peek) {
1510 rxm->offset = rxm->offset + chunk;
1511 rxm->full_len = rxm->full_len - chunk;
1512
1513 /* Return if there is unconsumed data in the record */
1514 if (rxm->full_len - skip)
1515 break;
1516 }
1517
1518 /* The remaining skip-bytes must lie in 1st record in rx_list.
1519 * So from the 2nd record, 'skip' should be 0.
1520 */
1521 skip = 0;
1522
1523 if (msg)
1524 msg->msg_flags |= MSG_EOR;
1525
1526 next_skb = skb_peek_next(skb, &ctx->rx_list);
1527
1528 if (!is_peek) {
1529 skb_unlink(skb, &ctx->rx_list);
1530 kfree_skb(skb);
1531 }
1532
1533 skb = next_skb;
1534 }
1535
1536 return copied;
1537}
1538
1469int tls_sw_recvmsg(struct sock *sk, 1539int tls_sw_recvmsg(struct sock *sk,
1470 struct msghdr *msg, 1540 struct msghdr *msg,
1471 size_t len, 1541 size_t len,
@@ -1476,7 +1546,8 @@ int tls_sw_recvmsg(struct sock *sk,
1476 struct tls_context *tls_ctx = tls_get_ctx(sk); 1546 struct tls_context *tls_ctx = tls_get_ctx(sk);
1477 struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); 1547 struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
1478 struct sk_psock *psock; 1548 struct sk_psock *psock;
1479 unsigned char control; 1549 unsigned char control = 0;
1550 ssize_t decrypted = 0;
1480 struct strp_msg *rxm; 1551 struct strp_msg *rxm;
1481 struct sk_buff *skb; 1552 struct sk_buff *skb;
1482 ssize_t copied = 0; 1553 ssize_t copied = 0;
@@ -1484,6 +1555,7 @@ int tls_sw_recvmsg(struct sock *sk,
1484 int target, err = 0; 1555 int target, err = 0;
1485 long timeo; 1556 long timeo;
1486 bool is_kvec = iov_iter_is_kvec(&msg->msg_iter); 1557 bool is_kvec = iov_iter_is_kvec(&msg->msg_iter);
1558 bool is_peek = flags & MSG_PEEK;
1487 int num_async = 0; 1559 int num_async = 0;
1488 1560
1489 flags |= nonblock; 1561 flags |= nonblock;
@@ -1494,11 +1566,28 @@ int tls_sw_recvmsg(struct sock *sk,
1494 psock = sk_psock_get(sk); 1566 psock = sk_psock_get(sk);
1495 lock_sock(sk); 1567 lock_sock(sk);
1496 1568
1497 target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); 1569 /* Process pending decrypted records. It must be non-zero-copy */
1498 timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); 1570 err = process_rx_list(ctx, msg, 0, len, false, is_peek);
1571 if (err < 0) {
1572 tls_err_abort(sk, err);
1573 goto end;
1574 } else {
1575 copied = err;
1576 }
1577
1578 len = len - copied;
1579 if (len) {
1580 target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
1581 timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
1582 } else {
1583 goto recv_end;
1584 }
1585
1499 do { 1586 do {
1500 bool zc = false; 1587 bool retain_skb = false;
1501 bool async = false; 1588 bool async = false;
1589 bool zc = false;
1590 int to_decrypt;
1502 int chunk = 0; 1591 int chunk = 0;
1503 1592
1504 skb = tls_wait_data(sk, psock, flags, timeo, &err); 1593 skb = tls_wait_data(sk, psock, flags, timeo, &err);
@@ -1508,7 +1597,7 @@ int tls_sw_recvmsg(struct sock *sk,
1508 msg, len, flags); 1597 msg, len, flags);
1509 1598
1510 if (ret > 0) { 1599 if (ret > 0) {
1511 copied += ret; 1600 decrypted += ret;
1512 len -= ret; 1601 len -= ret;
1513 continue; 1602 continue;
1514 } 1603 }
@@ -1535,70 +1624,70 @@ int tls_sw_recvmsg(struct sock *sk,
1535 goto recv_end; 1624 goto recv_end;
1536 } 1625 }
1537 1626
1538 if (!ctx->decrypted) { 1627 to_decrypt = rxm->full_len - tls_ctx->rx.overhead_size;
1539 int to_copy = rxm->full_len - tls_ctx->rx.overhead_size;
1540 1628
1541 if (!is_kvec && to_copy <= len && 1629 if (to_decrypt <= len && !is_kvec && !is_peek)
1542 likely(!(flags & MSG_PEEK))) 1630 zc = true;
1543 zc = true;
1544 1631
1545 err = decrypt_skb_update(sk, skb, &msg->msg_iter, 1632 err = decrypt_skb_update(sk, skb, &msg->msg_iter,
1546 &chunk, &zc); 1633 &chunk, &zc, ctx->async_capable);
1547 if (err < 0 && err != -EINPROGRESS) { 1634 if (err < 0 && err != -EINPROGRESS) {
1548 tls_err_abort(sk, EBADMSG); 1635 tls_err_abort(sk, EBADMSG);
1549 goto recv_end; 1636 goto recv_end;
1550 }
1551
1552 if (err == -EINPROGRESS) {
1553 async = true;
1554 num_async++;
1555 goto pick_next_record;
1556 }
1557
1558 ctx->decrypted = true;
1559 } 1637 }
1560 1638
1561 if (!zc) { 1639 if (err == -EINPROGRESS) {
1562 chunk = min_t(unsigned int, rxm->full_len, len); 1640 async = true;
1641 num_async++;
1642 goto pick_next_record;
1643 } else {
1644 if (!zc) {
1645 if (rxm->full_len > len) {
1646 retain_skb = true;
1647 chunk = len;
1648 } else {
1649 chunk = rxm->full_len;
1650 }
1563 1651
1564 err = skb_copy_datagram_msg(skb, rxm->offset, msg, 1652 err = skb_copy_datagram_msg(skb, rxm->offset,
1565 chunk); 1653 msg, chunk);
1566 if (err < 0) 1654 if (err < 0)
1567 goto recv_end; 1655 goto recv_end;
1656
1657 if (!is_peek) {
1658 rxm->offset = rxm->offset + chunk;
1659 rxm->full_len = rxm->full_len - chunk;
1660 }
1661 }
1568 } 1662 }
1569 1663
1570pick_next_record: 1664pick_next_record:
1571 copied += chunk; 1665 if (chunk > len)
1666 chunk = len;
1667
1668 decrypted += chunk;
1572 len -= chunk; 1669 len -= chunk;
1573 if (likely(!(flags & MSG_PEEK))) { 1670
1574 u8 control = ctx->control; 1671 /* For async or peek case, queue the current skb */
1575 1672 if (async || is_peek || retain_skb) {
1576 /* For async, drop current skb reference */ 1673 skb_queue_tail(&ctx->rx_list, skb);
1577 if (async) 1674 skb = NULL;
1578 skb = NULL; 1675 }
1579 1676
1580 if (tls_sw_advance_skb(sk, skb, chunk)) { 1677 if (tls_sw_advance_skb(sk, skb, chunk)) {
1581 /* Return full control message to 1678 /* Return full control message to
1582 * userspace before trying to parse 1679 * userspace before trying to parse
1583 * another message type 1680 * another message type
1584 */
1585 msg->msg_flags |= MSG_EOR;
1586 if (control != TLS_RECORD_TYPE_DATA)
1587 goto recv_end;
1588 } else {
1589 break;
1590 }
1591 } else {
1592 /* MSG_PEEK right now cannot look beyond current skb
1593 * from strparser, meaning we cannot advance skb here
1594 * and thus unpause strparser since we'd loose original
1595 * one.
1596 */ 1681 */
1682 msg->msg_flags |= MSG_EOR;
1683 if (ctx->control != TLS_RECORD_TYPE_DATA)
1684 goto recv_end;
1685 } else {
1597 break; 1686 break;
1598 } 1687 }
1599 1688
1600 /* If we have a new message from strparser, continue now. */ 1689 /* If we have a new message from strparser, continue now. */
1601 if (copied >= target && !ctx->recv_pkt) 1690 if (decrypted >= target && !ctx->recv_pkt)
1602 break; 1691 break;
1603 } while (len); 1692 } while (len);
1604 1693
@@ -1612,13 +1701,33 @@ recv_end:
1612 /* one of async decrypt failed */ 1701 /* one of async decrypt failed */
1613 tls_err_abort(sk, err); 1702 tls_err_abort(sk, err);
1614 copied = 0; 1703 copied = 0;
1704 decrypted = 0;
1705 goto end;
1615 } 1706 }
1616 } else { 1707 } else {
1617 reinit_completion(&ctx->async_wait.completion); 1708 reinit_completion(&ctx->async_wait.completion);
1618 } 1709 }
1619 WRITE_ONCE(ctx->async_notify, false); 1710 WRITE_ONCE(ctx->async_notify, false);
1711
1712 /* Drain records from the rx_list & copy if required */
1713 if (is_peek || is_kvec)
1714 err = process_rx_list(ctx, msg, copied,
1715 decrypted, false, is_peek);
1716 else
1717 err = process_rx_list(ctx, msg, 0,
1718 decrypted, true, is_peek);
1719 if (err < 0) {
1720 tls_err_abort(sk, err);
1721 copied = 0;
1722 goto end;
1723 }
1724
1725 WARN_ON(decrypted != err);
1620 } 1726 }
1621 1727
1728 copied += decrypted;
1729
1730end:
1622 release_sock(sk); 1731 release_sock(sk);
1623 if (psock) 1732 if (psock)
1624 sk_psock_put(sk, psock); 1733 sk_psock_put(sk, psock);
@@ -1655,7 +1764,7 @@ ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos,
1655 } 1764 }
1656 1765
1657 if (!ctx->decrypted) { 1766 if (!ctx->decrypted) {
1658 err = decrypt_skb_update(sk, skb, NULL, &chunk, &zc); 1767 err = decrypt_skb_update(sk, skb, NULL, &chunk, &zc, false);
1659 1768
1660 if (err < 0) { 1769 if (err < 0) {
1661 tls_err_abort(sk, EBADMSG); 1770 tls_err_abort(sk, EBADMSG);
@@ -1842,6 +1951,7 @@ void tls_sw_release_resources_rx(struct sock *sk)
1842 if (ctx->aead_recv) { 1951 if (ctx->aead_recv) {
1843 kfree_skb(ctx->recv_pkt); 1952 kfree_skb(ctx->recv_pkt);
1844 ctx->recv_pkt = NULL; 1953 ctx->recv_pkt = NULL;
1954 skb_queue_purge(&ctx->rx_list);
1845 crypto_free_aead(ctx->aead_recv); 1955 crypto_free_aead(ctx->aead_recv);
1846 strp_stop(&ctx->strp); 1956 strp_stop(&ctx->strp);
1847 write_lock_bh(&sk->sk_callback_lock); 1957 write_lock_bh(&sk->sk_callback_lock);
@@ -1891,6 +2001,7 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
1891 struct crypto_aead **aead; 2001 struct crypto_aead **aead;
1892 struct strp_callbacks cb; 2002 struct strp_callbacks cb;
1893 u16 nonce_size, tag_size, iv_size, rec_seq_size; 2003 u16 nonce_size, tag_size, iv_size, rec_seq_size;
2004 struct crypto_tfm *tfm;
1894 char *iv, *rec_seq; 2005 char *iv, *rec_seq;
1895 int rc = 0; 2006 int rc = 0;
1896 2007
@@ -1937,6 +2048,7 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
1937 crypto_init_wait(&sw_ctx_rx->async_wait); 2048 crypto_init_wait(&sw_ctx_rx->async_wait);
1938 crypto_info = &ctx->crypto_recv.info; 2049 crypto_info = &ctx->crypto_recv.info;
1939 cctx = &ctx->rx; 2050 cctx = &ctx->rx;
2051 skb_queue_head_init(&sw_ctx_rx->rx_list);
1940 aead = &sw_ctx_rx->aead_recv; 2052 aead = &sw_ctx_rx->aead_recv;
1941 } 2053 }
1942 2054
@@ -2004,6 +2116,10 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
2004 goto free_aead; 2116 goto free_aead;
2005 2117
2006 if (sw_ctx_rx) { 2118 if (sw_ctx_rx) {
2119 tfm = crypto_aead_tfm(sw_ctx_rx->aead_recv);
2120 sw_ctx_rx->async_capable =
2121 tfm->__crt_alg->cra_flags & CRYPTO_ALG_ASYNC;
2122
2007 /* Set up strparser */ 2123 /* Set up strparser */
2008 memset(&cb, 0, sizeof(cb)); 2124 memset(&cb, 0, sizeof(cb));
2009 cb.rcv_msg = tls_queue; 2125 cb.rcv_msg = tls_queue;
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 43a1dec08825..a60df252d3cc 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -505,7 +505,7 @@ out:
505static int __vsock_bind_stream(struct vsock_sock *vsk, 505static int __vsock_bind_stream(struct vsock_sock *vsk,
506 struct sockaddr_vm *addr) 506 struct sockaddr_vm *addr)
507{ 507{
508 static u32 port = 0; 508 static u32 port;
509 struct sockaddr_vm new_addr; 509 struct sockaddr_vm new_addr;
510 510
511 if (!port) 511 if (!port)
diff --git a/tools/testing/selftests/drivers/net/mlxsw/vxlan_fdb_veto.sh b/tools/testing/selftests/drivers/net/mlxsw/vxlan_fdb_veto.sh
new file mode 100755
index 000000000000..749ba3cfda1d
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/vxlan_fdb_veto.sh
@@ -0,0 +1,126 @@
1#!/bin/bash
2# SPDX-License-Identifier: GPL-2.0
3#
4# Test vetoing of FDB entries that mlxsw can not offload. This exercises several
5# different veto vectors to test various rollback scenarios in the vxlan driver.
6
7lib_dir=$(dirname $0)/../../../net/forwarding
8
9ALL_TESTS="
10 fdb_create_veto_test
11 fdb_replace_veto_test
12 fdb_append_veto_test
13 fdb_changelink_veto_test
14"
15NUM_NETIFS=2
16source $lib_dir/lib.sh
17
18setup_prepare()
19{
20 swp1=${NETIFS[p1]}
21 swp2=${NETIFS[p2]}
22
23 ip link add dev br0 type bridge mcast_snooping 0
24
25 ip link set dev $swp1 up
26 ip link set dev $swp1 master br0
27 ip link set dev $swp2 up
28
29 ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \
30 ttl 20 tos inherit local 198.51.100.1 dstport 4789
31 ip link set dev vxlan0 master br0
32}
33
34cleanup()
35{
36 pre_cleanup
37
38 ip link set dev vxlan0 nomaster
39 ip link del dev vxlan0
40
41 ip link set dev $swp2 down
42 ip link set dev $swp1 nomaster
43 ip link set dev $swp1 down
44
45 ip link del dev br0
46}
47
48fdb_create_veto_test()
49{
50 RET=0
51
52 bridge fdb add 01:02:03:04:05:06 dev vxlan0 self static \
53 dst 198.51.100.2 2>/dev/null
54 check_fail $? "multicast MAC not rejected"
55
56 bridge fdb add 01:02:03:04:05:06 dev vxlan0 self static \
57 dst 198.51.100.2 2>&1 >/dev/null | grep -q mlxsw_spectrum
58 check_err $? "multicast MAC rejected without extack"
59
60 log_test "vxlan FDB veto - create"
61}
62
63fdb_replace_veto_test()
64{
65 RET=0
66
67 bridge fdb add 00:01:02:03:04:05 dev vxlan0 self static \
68 dst 198.51.100.2
69 check_err $? "valid FDB rejected"
70
71 bridge fdb replace 00:01:02:03:04:05 dev vxlan0 self static \
72 dst 198.51.100.2 port 1234 2>/dev/null
73 check_fail $? "FDB with an explicit port not rejected"
74
75 bridge fdb replace 00:01:02:03:04:05 dev vxlan0 self static \
76 dst 198.51.100.2 port 1234 2>&1 >/dev/null \
77 | grep -q mlxsw_spectrum
78 check_err $? "FDB with an explicit port rejected without extack"
79
80 log_test "vxlan FDB veto - replace"
81}
82
83fdb_append_veto_test()
84{
85 RET=0
86
87 bridge fdb add 00:00:00:00:00:00 dev vxlan0 self static \
88 dst 198.51.100.2
89 check_err $? "valid FDB rejected"
90
91 bridge fdb append 00:00:00:00:00:00 dev vxlan0 self static \
92 dst 198.51.100.3 port 1234 2>/dev/null
93 check_fail $? "FDB with an explicit port not rejected"
94
95 bridge fdb append 00:00:00:00:00:00 dev vxlan0 self static \
96 dst 198.51.100.3 port 1234 2>&1 >/dev/null \
97 | grep -q mlxsw_spectrum
98 check_err $? "FDB with an explicit port rejected without extack"
99
100 log_test "vxlan FDB veto - append"
101}
102
103fdb_changelink_veto_test()
104{
105 RET=0
106
107 ip link set dev vxlan0 type vxlan \
108 group 224.0.0.1 dev lo 2>/dev/null
109 check_fail $? "FDB with a multicast IP not rejected"
110
111 ip link set dev vxlan0 type vxlan \
112 group 224.0.0.1 dev lo 2>&1 >/dev/null \
113 | grep -q mlxsw_spectrum
114 check_err $? "FDB with a multicast IP rejected without extack"
115
116 log_test "vxlan FDB veto - changelink"
117}
118
119trap cleanup EXIT
120
121setup_prepare
122setup_wait
123
124tests_run
125
126exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/ipip_flat_gre.sh b/tools/testing/selftests/net/forwarding/ipip_flat_gre.sh
new file mode 100755
index 000000000000..abb694397b86
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/ipip_flat_gre.sh
@@ -0,0 +1,63 @@
1#!/bin/bash
2# SPDX-License-Identifier: GPL-2.0
3
4# Test IP-in-IP GRE tunnel without key.
5# This test uses flat topology for IP tunneling tests. See ipip_lib.sh for more
6# details.
7
8ALL_TESTS="gre_flat4 gre_mtu_change"
9
10NUM_NETIFS=6
11source lib.sh
12source ipip_lib.sh
13
14setup_prepare()
15{
16 h1=${NETIFS[p1]}
17 ol1=${NETIFS[p2]}
18
19 ul1=${NETIFS[p3]}
20 ul2=${NETIFS[p4]}
21
22 ol2=${NETIFS[p5]}
23 h2=${NETIFS[p6]}
24
25 forwarding_enable
26 vrf_prepare
27 h1_create
28 h2_create
29 sw1_flat_create gre $ol1 $ul1
30 sw2_flat_create gre $ol2 $ul2
31}
32
33gre_flat4()
34{
35 RET=0
36
37 ping_test $h1 192.0.2.18 " gre flat"
38}
39
40gre_mtu_change()
41{
42 test_mtu_change gre
43}
44
45cleanup()
46{
47 pre_cleanup
48
49 sw2_flat_destroy $ol2 $ul2
50 sw1_flat_destroy $ol1 $ul1
51 h2_destroy
52 h1_destroy
53 vrf_cleanup
54 forwarding_restore
55}
56
57trap cleanup EXIT
58
59setup_prepare
60setup_wait
61tests_run
62
63exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/ipip_flat_gre_key.sh b/tools/testing/selftests/net/forwarding/ipip_flat_gre_key.sh
new file mode 100755
index 000000000000..c4f373337e48
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/ipip_flat_gre_key.sh
@@ -0,0 +1,63 @@
1#!/bin/bash
2# SPDX-License-Identifier: GPL-2.0
3
4# Test IP-in-IP GRE tunnel with key.
5# This test uses flat topology for IP tunneling tests. See ipip_lib.sh for more
6# details.
7
8ALL_TESTS="gre_flat4 gre_mtu_change"
9
10NUM_NETIFS=6
11source lib.sh
12source ipip_lib.sh
13
14setup_prepare()
15{
16 h1=${NETIFS[p1]}
17 ol1=${NETIFS[p2]}
18
19 ul1=${NETIFS[p3]}
20 ul2=${NETIFS[p4]}
21
22 ol2=${NETIFS[p5]}
23 h2=${NETIFS[p6]}
24
25 forwarding_enable
26 vrf_prepare
27 h1_create
28 h2_create
29 sw1_flat_create gre $ol1 $ul1 key 233
30 sw2_flat_create gre $ol2 $ul2 key 233
31}
32
33gre_flat4()
34{
35 RET=0
36
37 ping_test $h1 192.0.2.18 " gre flat with key"
38}
39
40gre_mtu_change()
41{
42 test_mtu_change gre
43}
44
45cleanup()
46{
47 pre_cleanup
48
49 sw2_flat_destroy $ol2 $ul2
50 sw1_flat_destroy $ol1 $ul1
51 h2_destroy
52 h1_destroy
53 vrf_cleanup
54 forwarding_restore
55}
56
57trap cleanup EXIT
58
59setup_prepare
60setup_wait
61tests_run
62
63exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/ipip_flat_gre_keys.sh b/tools/testing/selftests/net/forwarding/ipip_flat_gre_keys.sh
new file mode 100755
index 000000000000..a811130c0627
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/ipip_flat_gre_keys.sh
@@ -0,0 +1,63 @@
1#!/bin/bash
2# SPDX-License-Identifier: GPL-2.0
3
4# Test IP-in-IP GRE tunnel with key.
5# This test uses flat topology for IP tunneling tests. See ipip_lib.sh for more
6# details.
7
8ALL_TESTS="gre_flat4 gre_mtu_change"
9
10NUM_NETIFS=6
11source lib.sh
12source ipip_lib.sh
13
14setup_prepare()
15{
16 h1=${NETIFS[p1]}
17 ol1=${NETIFS[p2]}
18
19 ul1=${NETIFS[p3]}
20 ul2=${NETIFS[p4]}
21
22 ol2=${NETIFS[p5]}
23 h2=${NETIFS[p6]}
24
25 forwarding_enable
26 vrf_prepare
27 h1_create
28 h2_create
29 sw1_flat_create gre $ol1 $ul1 ikey 111 okey 222
30 sw2_flat_create gre $ol2 $ul2 ikey 222 okey 111
31}
32
33gre_flat4()
34{
35 RET=0
36
37 ping_test $h1 192.0.2.18 " gre flat with ikey/okey"
38}
39
40gre_mtu_change()
41{
42 test_mtu_change gre
43}
44
45cleanup()
46{
47 pre_cleanup
48
49 sw2_flat_destroy $ol2 $ul2
50 sw1_flat_destroy $ol1 $ul1
51 h2_destroy
52 h1_destroy
53 vrf_cleanup
54 forwarding_restore
55}
56
57trap cleanup EXIT
58
59setup_prepare
60setup_wait
61tests_run
62
63exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/ipip_hier_gre.sh b/tools/testing/selftests/net/forwarding/ipip_hier_gre.sh
new file mode 100755
index 000000000000..05c5b3cf2f78
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/ipip_hier_gre.sh
@@ -0,0 +1,63 @@
1#!/bin/bash
2# SPDX-License-Identifier: GPL-2.0
3
4# Test IP-in-IP GRE tunnels without key.
5# This test uses hierarchical topology for IP tunneling tests. See
6# ipip_lib.sh for more details.
7
8ALL_TESTS="gre_hier4 gre_mtu_change"
9
10NUM_NETIFS=6
11source lib.sh
12source ipip_lib.sh
13
14setup_prepare()
15{
16 h1=${NETIFS[p1]}
17 ol1=${NETIFS[p2]}
18
19 ul1=${NETIFS[p3]}
20 ul2=${NETIFS[p4]}
21
22 ol2=${NETIFS[p5]}
23 h2=${NETIFS[p6]}
24
25 forwarding_enable
26 vrf_prepare
27 h1_create
28 h2_create
29 sw1_hierarchical_create gre $ol1 $ul1
30 sw2_hierarchical_create gre $ol2 $ul2
31}
32
33gre_hier4()
34{
35 RET=0
36
37 ping_test $h1 192.0.2.18 " gre hierarchical"
38}
39
40gre_mtu_change()
41{
42 test_mtu_change gre
43}
44
45cleanup()
46{
47 pre_cleanup
48
49 sw2_hierarchical_destroy $ol2 $ul2
50 sw1_hierarchical_destroy $ol1 $ul1
51 h2_destroy
52 h1_destroy
53 vrf_cleanup
54 forwarding_restore
55}
56
57trap cleanup EXIT
58
59setup_prepare
60setup_wait
61tests_run
62
63exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/ipip_hier_gre_key.sh b/tools/testing/selftests/net/forwarding/ipip_hier_gre_key.sh
new file mode 100755
index 000000000000..9b105dbca32a
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/ipip_hier_gre_key.sh
@@ -0,0 +1,63 @@
1#!/bin/bash
2# SPDX-License-Identifier: GPL-2.0
3
4# Test IP-in-IP GRE tunnels without key.
5# This test uses hierarchical topology for IP tunneling tests. See
6# ipip_lib.sh for more details.
7
8ALL_TESTS="gre_hier4 gre_mtu_change"
9
10NUM_NETIFS=6
11source lib.sh
12source ipip_lib.sh
13
14setup_prepare()
15{
16 h1=${NETIFS[p1]}
17 ol1=${NETIFS[p2]}
18
19 ul1=${NETIFS[p3]}
20 ul2=${NETIFS[p4]}
21
22 ol2=${NETIFS[p5]}
23 h2=${NETIFS[p6]}
24
25 forwarding_enable
26 vrf_prepare
27 h1_create
28 h2_create
29 sw1_hierarchical_create gre $ol1 $ul1 key 22
30 sw2_hierarchical_create gre $ol2 $ul2 key 22
31}
32
33gre_hier4()
34{
35 RET=0
36
37 ping_test $h1 192.0.2.18 " gre hierarchical with key"
38}
39
40gre_mtu_change()
41{
42 test_mtu_change gre
43}
44
45cleanup()
46{
47 pre_cleanup
48
49 sw2_hierarchical_destroy $ol2 $ul2
50 sw1_hierarchical_destroy $ol1 $ul1
51 h2_destroy
52 h1_destroy
53 vrf_cleanup
54 forwarding_restore
55}
56
57trap cleanup EXIT
58
59setup_prepare
60setup_wait
61tests_run
62
63exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/ipip_hier_gre_keys.sh b/tools/testing/selftests/net/forwarding/ipip_hier_gre_keys.sh
new file mode 100755
index 000000000000..e275d25bd83a
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/ipip_hier_gre_keys.sh
@@ -0,0 +1,63 @@
1#!/bin/bash
2# SPDX-License-Identifier: GPL-2.0
3
4# Test IP-in-IP GRE tunnels without key.
5# This test uses hierarchical topology for IP tunneling tests. See
6# ipip_lib.sh for more details.
7
8ALL_TESTS="gre_hier4 gre_mtu_change"
9
10NUM_NETIFS=6
11source lib.sh
12source ipip_lib.sh
13
14setup_prepare()
15{
16 h1=${NETIFS[p1]}
17 ol1=${NETIFS[p2]}
18
19 ul1=${NETIFS[p3]}
20 ul2=${NETIFS[p4]}
21
22 ol2=${NETIFS[p5]}
23 h2=${NETIFS[p6]}
24
25 forwarding_enable
26 vrf_prepare
27 h1_create
28 h2_create
29 sw1_hierarchical_create gre $ol1 $ul1 ikey 111 okey 222
30 sw2_hierarchical_create gre $ol2 $ul2 ikey 222 okey 111
31}
32
33gre_hier4()
34{
35 RET=0
36
37 ping_test $h1 192.0.2.18 " gre hierarchical with ikey/okey"
38}
39
40gre_mtu_change()
41{
42 test_mtu_change gre
43}
44
45cleanup()
46{
47 pre_cleanup
48
49 sw2_hierarchical_destroy $ol2 $ul2
50 sw1_hierarchical_destroy $ol1 $ul1
51 h2_destroy
52 h1_destroy
53 vrf_cleanup
54 forwarding_restore
55}
56
57trap cleanup EXIT
58
59setup_prepare
60setup_wait
61tests_run
62
63exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/ipip_lib.sh b/tools/testing/selftests/net/forwarding/ipip_lib.sh
new file mode 100644
index 000000000000..30f36a57bae6
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/ipip_lib.sh
@@ -0,0 +1,349 @@
1#!/bin/bash
2# SPDX-License-Identifier: GPL-2.0
3
4# Handles creation and destruction of IP-in-IP or GRE tunnels over the given
5# topology. Supports both flat and hierarchical models.
6#
7# Flat Model:
8# Overlay and underlay share the same VRF.
9# SW1 uses default VRF so tunnel has no bound dev.
10# SW2 uses non-default VRF tunnel has a bound dev.
11# +-------------------------+
12# | H1 |
13# | $h1 + |
14# | 192.0.2.1/28 | |
15# +-------------------|-----+
16# |
17# +-------------------|-----+
18# | SW1 | |
19# | $ol1 + |
20# | 192.0.2.2/28 |
21# | |
22# | + g1a (gre) |
23# | loc=192.0.2.65 |
24# | rem=192.0.2.66 --. |
25# | tos=inherit | |
26# | .------------------' |
27# | | |
28# | v |
29# | + $ul1.111 (vlan) |
30# | | 192.0.2.129/28 |
31# | \ |
32# | \_______ |
33# | | |
34# |VRF default + $ul1 |
35# +------------|------------+
36# |
37# +------------|------------+
38# | SW2 + $ul2 |
39# | _______| |
40# | / |
41# | / |
42# | + $ul2.111 (vlan) |
43# | ^ 192.0.2.130/28 |
44# | | |
45# | | |
46# | '------------------. |
47# | + g2a (gre) | |
48# | loc=192.0.2.66 | |
49# | rem=192.0.2.65 --' |
50# | tos=inherit |
51# | |
52# | $ol2 + |
53# | 192.0.2.17/28 | |
54# | VRF v$ol2 | |
55# +-------------------|-----+
56# |
57# +-------------------|-----+
58# | H2 | |
59# | $h2 + |
60# | 192.0.2.18/28 |
61# +-------------------------+
62#
63# Hierarchical model:
64# The tunnel is bound to a device in a different VRF
65#
66# +---------------------------+
67# | H1 |
68# | $h1 + |
69# | 192.0.2.1/28 | |
70# +-------------------|-------+
71# |
72# +-------------------|-------+
73# | SW1 | |
74# | +-----------------|-----+ |
75# | | $ol1 + | |
76# | | 192.0.2.2/28 | |
77# | | | |
78# | | + g1a (gre) | |
79# | | rem=192.0.2.66 | |
80# | | tos=inherit | |
81# | | loc=192.0.2.65 | |
82# | | ^ | |
83# | | VRF v$ol1 | | |
84# | +-----------|-----------+ |
85# | | |
86# | +-----------|-----------+ |
87# | | VRF v$ul1 | | |
88# | | | | |
89# | | | | |
90# | | v | |
91# | | dummy1 + | |
92# | | 192.0.2.65 | |
93# | | .-------' | |
94# | | | | |
95# | | v | |
96# | | + $ul1.111 (vlan) | |
97# | | | 192.0.2.129/28 | |
98# | | \ | |
99# | | \_____ | |
100# | | | | |
101# | | + $ul1 | |
102# | +----------|------------+ |
103# +------------|--------------+
104# |
105# +------------|--------------+
106# | SW2 | |
107# | +----------|------------+ |
108# | | + $ul2 | |
109# | | _____| | |
110# | | / | |
111# | | / | |
112# | | | $ul2.111 (vlan) | |
113# | | + 192.0.2.130/28 | |
114# | | ^ | |
115# | | | | |
116# | | '-------. | |
117# | | dummy2 + | |
118# | | 192.0.2.66 | |
119# | | ^ | |
120# | | | | |
121# | | | | |
122# | | VRF v$ul2 | | |
123# | +-----------|-----------+ |
124# | | |
125# | +-----------|-----------+ |
126# | | VRF v$ol2 | | |
127# | | | | |
128# | | v | |
129# | | g2a (gre)+ | |
130# | | loc=192.0.2.66 | |
131# | | rem=192.0.2.65 | |
132# | | tos=inherit | |
133# | | | |
134# | | $ol2 + | |
135# | | 192.0.2.17/28 | | |
136# | +-----------------|-----+ |
137# +-------------------|-------+
138# |
139# +-------------------|-------+
140# | H2 | |
141# | $h2 + |
142# | 192.0.2.18/28 |
143# +---------------------------+
144source lib.sh
145
146h1_create()
147{
148 simple_if_init $h1 192.0.2.1/28 2001:db8:1::1/64
149 ip route add vrf v$h1 192.0.2.16/28 via 192.0.2.2
150}
151
152h1_destroy()
153{
154 ip route del vrf v$h1 192.0.2.16/28 via 192.0.2.2
155 simple_if_fini $h1 192.0.2.1/28
156}
157
158h2_create()
159{
160 simple_if_init $h2 192.0.2.18/28
161 ip route add vrf v$h2 192.0.2.0/28 via 192.0.2.17
162}
163
164h2_destroy()
165{
166 ip route del vrf v$h2 192.0.2.0/28 via 192.0.2.17
167 simple_if_fini $h2 192.0.2.18/28
168}
169
170sw1_flat_create()
171{
172 local type=$1; shift
173 local ol1=$1; shift
174 local ul1=$1; shift
175
176 ip link set dev $ol1 up
177 __addr_add_del $ol1 add "192.0.2.2/28"
178
179 ip link set dev $ul1 up
180 vlan_create $ul1 111 "" 192.0.2.129/28
181
182 tunnel_create g1a $type 192.0.2.65 192.0.2.66 tos inherit "$@"
183 ip link set dev g1a up
184 __addr_add_del g1a add "192.0.2.65/32"
185
186 ip route add 192.0.2.66/32 via 192.0.2.130
187
188 ip route add 192.0.2.16/28 nexthop dev g1a
189}
190
191sw1_flat_destroy()
192{
193 local ol1=$1; shift
194 local ul1=$1; shift
195
196 ip route del 192.0.2.16/28
197
198 ip route del 192.0.2.66/32 via 192.0.2.130
199 __simple_if_fini g1a 192.0.2.65/32
200 tunnel_destroy g1a
201
202 vlan_destroy $ul1 111
203 __simple_if_fini $ul1
204 __simple_if_fini $ol1 192.0.2.2/28
205}
206
207sw2_flat_create()
208{
209 local type=$1; shift
210 local ol2=$1; shift
211 local ul2=$1; shift
212
213 simple_if_init $ol2 192.0.2.17/28
214 __simple_if_init $ul2 v$ol2
215 vlan_create $ul2 111 v$ol2 192.0.2.130/28
216
217 tunnel_create g2a $type 192.0.2.66 192.0.2.65 tos inherit dev v$ol2 \
218 "$@"
219 __simple_if_init g2a v$ol2 192.0.2.66/32
220
221 ip route add vrf v$ol2 192.0.2.65/32 via 192.0.2.129
222 ip route add vrf v$ol2 192.0.2.0/28 nexthop dev g2a
223}
224
225sw2_flat_destroy()
226{
227 local ol2=$1; shift
228 local ul2=$1; shift
229
230 ip route del vrf v$ol2 192.0.2.0/28
231
232 ip route del vrf v$ol2 192.0.2.65/32 via 192.0.2.129
233 __simple_if_fini g2a 192.0.2.66/32
234 tunnel_destroy g2a
235
236 vlan_destroy $ul2 111
237 __simple_if_fini $ul2
238 simple_if_fini $ol2 192.0.2.17/28
239}
240
241sw1_hierarchical_create()
242{
243 local type=$1; shift
244 local ol1=$1; shift
245 local ul1=$1; shift
246
247 simple_if_init $ol1 192.0.2.2/28
248 simple_if_init $ul1
249 ip link add name dummy1 type dummy
250 __simple_if_init dummy1 v$ul1 192.0.2.65/32
251
252 vlan_create $ul1 111 v$ul1 192.0.2.129/28
253 tunnel_create g1a $type 192.0.2.65 192.0.2.66 tos inherit dev dummy1 \
254 "$@"
255 ip link set dev g1a master v$ol1
256
257 ip route add vrf v$ul1 192.0.2.66/32 via 192.0.2.130
258 ip route add vrf v$ol1 192.0.2.16/28 nexthop dev g1a
259}
260
261sw1_hierarchical_destroy()
262{
263 local ol1=$1; shift
264 local ul1=$1; shift
265
266 ip route del vrf v$ol1 192.0.2.16/28
267 ip route del vrf v$ul1 192.0.2.66/32
268
269 tunnel_destroy g1a
270 vlan_destroy $ul1 111
271
272 __simple_if_fini dummy1 192.0.2.65/32
273 ip link del dev dummy1
274
275 simple_if_fini $ul1
276 simple_if_fini $ol1 192.0.2.2/28
277}
278
279sw2_hierarchical_create()
280{
281 local type=$1; shift
282 local ol2=$1; shift
283 local ul2=$1; shift
284
285 simple_if_init $ol2 192.0.2.17/28
286 simple_if_init $ul2
287
288 ip link add name dummy2 type dummy
289 __simple_if_init dummy2 v$ul2 192.0.2.66/32
290
291 vlan_create $ul2 111 v$ul2 192.0.2.130/28
292 tunnel_create g2a $type 192.0.2.66 192.0.2.65 tos inherit dev dummy2 \
293 "$@"
294 ip link set dev g2a master v$ol2
295
296 ip route add vrf v$ul2 192.0.2.65/32 via 192.0.2.129
297 ip route add vrf v$ol2 192.0.2.0/28 nexthop dev g2a
298}
299
300sw2_hierarchical_destroy()
301{
302 local ol2=$1; shift
303 local ul2=$1; shift
304
305 ip route del vrf v$ol2 192.0.2.0/28
306 ip route del vrf v$ul2 192.0.2.65/32
307
308 tunnel_destroy g2a
309 vlan_destroy $ul2 111
310
311 __simple_if_fini dummy2 192.0.2.66/32
312 ip link del dev dummy2
313
314 simple_if_fini $ul2
315 simple_if_fini $ol2 192.0.2.17/28
316}
317
318topo_mtu_change()
319{
320 local mtu=$1
321
322 ip link set mtu $mtu dev $h1
323 ip link set mtu $mtu dev $ol1
324 ip link set mtu $mtu dev g1a
325 ip link set mtu $mtu dev $ul1
326 ip link set mtu $mtu dev $ul1.111
327 ip link set mtu $mtu dev $h2
328 ip link set mtu $mtu dev $ol2
329 ip link set mtu $mtu dev g2a
330 ip link set mtu $mtu dev $ul2
331 ip link set mtu $mtu dev $ul2.111
332}
333
334test_mtu_change()
335{
336 local encap=$1; shift
337
338 RET=0
339
340 ping_do $h1 192.0.2.18 "-s 1800 -w 3"
341 check_fail $? "ping $encap should not pass with size 1800"
342
343 RET=0
344
345 topo_mtu_change 2000
346 ping_do $h1 192.0.2.18 "-s 1800 -w 3"
347 check_err $?
348 log_test "ping $encap packet size 1800 after MTU change"
349}
diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c
index fac68d710f35..ff68ed19c0ef 100644
--- a/tools/testing/selftests/net/tls.c
+++ b/tools/testing/selftests/net/tls.c
@@ -452,10 +452,12 @@ TEST_F(tls, recv_partial)
452 452
453 memset(recv_mem, 0, sizeof(recv_mem)); 453 memset(recv_mem, 0, sizeof(recv_mem));
454 EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len); 454 EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len);
455 EXPECT_NE(recv(self->cfd, recv_mem, strlen(test_str_first), 0), -1); 455 EXPECT_NE(recv(self->cfd, recv_mem, strlen(test_str_first),
456 MSG_WAITALL), -1);
456 EXPECT_EQ(memcmp(test_str_first, recv_mem, strlen(test_str_first)), 0); 457 EXPECT_EQ(memcmp(test_str_first, recv_mem, strlen(test_str_first)), 0);
457 memset(recv_mem, 0, sizeof(recv_mem)); 458 memset(recv_mem, 0, sizeof(recv_mem));
458 EXPECT_NE(recv(self->cfd, recv_mem, strlen(test_str_second), 0), -1); 459 EXPECT_NE(recv(self->cfd, recv_mem, strlen(test_str_second),
460 MSG_WAITALL), -1);
459 EXPECT_EQ(memcmp(test_str_second, recv_mem, strlen(test_str_second)), 461 EXPECT_EQ(memcmp(test_str_second, recv_mem, strlen(test_str_second)),
460 0); 462 0);
461} 463}
@@ -565,10 +567,10 @@ TEST_F(tls, recv_peek_large_buf_mult_recs)
565 len = strlen(test_str_second) + 1; 567 len = strlen(test_str_second) + 1;
566 EXPECT_EQ(send(self->fd, test_str_second, len, 0), len); 568 EXPECT_EQ(send(self->fd, test_str_second, len, 0), len);
567 569
568 len = sizeof(buf); 570 len = strlen(test_str) + 1;
569 memset(buf, 0, len); 571 memset(buf, 0, len);
570 EXPECT_NE(recv(self->cfd, buf, len, MSG_PEEK), -1); 572 EXPECT_NE((len = recv(self->cfd, buf, len,
571 573 MSG_PEEK | MSG_WAITALL)), -1);
572 len = strlen(test_str) + 1; 574 len = strlen(test_str) + 1;
573 EXPECT_EQ(memcmp(test_str, buf, len), 0); 575 EXPECT_EQ(memcmp(test_str, buf, len), 0);
574} 576}