aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-05-28 14:04:16 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2016-05-28 14:04:16 -0400
commit1cbe06c3cf542d48eb22180163e00f91760ef8cd (patch)
treeba093bf9e32790950b99bfec838a0354df5bf0dc
parented2608faa0f701b1dbc65277a9e5c7ff7118bfd4 (diff)
parent7a226f9c32b0481b0744e2726cd7f8349b866af5 (diff)
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma
Pull more rdma updates from Doug Ledford: "This is the second group of code for the 4.7 merge window. It looks large, but only in one sense. I'll get to that in a minute. The list of changes here breaks down as follows: - Dynamic counter infrastructure in the IB drivers This is a sysfs based code to allow free form access to the hardware counters RDMA devices might support so drivers don't need to code this up repeatedly themselves - SendOnlyFullMember multicast support - IB router support - A couple misc fixes - The big item on the list: hfi1 driver updates, plus moving the hfi1 driver out of staging There was a group of 15 patches in the hfi1 list that I thought I had in the first pull request but they weren't. So that added to the length of the hfi1 section here. As far as these go, everything but the hfi1 is pretty straight forward. The hfi1 is, if you recall, the driver that Al had complaints about how it used the write/writev interfaces in an overloaded fashion. The write portion of their interface behaved like the write handler in the IB stack proper and did bi-directional communications. The writev interface, on the other hand, only accepts SDMA request structures. The completions for those structures are sent back via an entirely different event mechanism. With the security patch, we put security checks on the write interface, however, we also knew they would be going away soon. Now, we've converted the write handler in the hfi1 driver to use ioctls from the IB reserved magic area for its bidirectional communications. With that change, Intel has addressed all of the items originally on their TODO when they went into staging (as well as many items added to the list later). As such, I moved them out, and since they were the last item in the staging/rdma directory, and I don't have immediate plans to use the staging area again, I removed the staging/rdma area. Because of the move out of staging, as well as a series of 5 patches in the hfi1 driver that removed code people thought should be done in a different way and was optional to begin with (a snoop debug interface, an eeprom driver for an eeprom connected directory to their hfi1 chip and not via an i2c bus, and a few other things like that), the line count, especially the removal count, is high" * tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma: (56 commits) staging/rdma: Remove the entire rdma subdirectory of staging IB/core: Make device counter infrastructure dynamic IB/hfi1: Fix pio map initialization IB/hfi1: Correct 8051 link parameter settings IB/hfi1: Update pkey table properly after link down or FM start IB/rdamvt: Fix rdmavt s_ack_queue sizing IB/rdmavt: Max atomic value should be a u8 IB/hfi1: Fix hard lockup due to not using save/restore spin lock IB/hfi1: Add tracing support for send with invalidate opcode IB/hfi1, qib: Add ieth to the packet header definitions IB/hfi1: Move driver out of staging IB/hfi1: Do not free hfi1 cdev parent structure early IB/hfi1: Add trace message in user IOCTL handling IB/hfi1: Remove write(), use ioctl() for user cmds IB/hfi1: Add ioctl() interface for user commands IB/hfi1: Remove unused user command IB/hfi1: Remove snoop/diag interface IB/hfi1: Remove EPROM functionality from data device IB/hfi1: Remove UI char device IB/hfi1: Remove multiple device cdev ...
-rw-r--r--Documentation/infiniband/sysfs.txt12
-rw-r--r--MAINTAINERS14
-rw-r--r--drivers/infiniband/Kconfig2
-rw-r--r--drivers/infiniband/core/Makefile12
-rw-r--r--drivers/infiniband/core/addr.c226
-rw-r--r--drivers/infiniband/core/core_priv.h16
-rw-r--r--drivers/infiniband/core/device.c58
-rw-r--r--drivers/infiniband/core/mad.c13
-rw-r--r--drivers/infiniband/core/multicast.c23
-rw-r--r--drivers/infiniband/core/sa_query.c211
-rw-r--r--drivers/infiniband/core/sysfs.c366
-rw-r--r--drivers/infiniband/hw/Makefile1
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_hal.c2
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.c147
-rw-r--r--drivers/infiniband/hw/cxgb4/provider.c58
-rw-r--r--drivers/infiniband/hw/hfi1/Kconfig (renamed from drivers/staging/rdma/hfi1/Kconfig)0
-rw-r--r--drivers/infiniband/hw/hfi1/Makefile (renamed from drivers/staging/rdma/hfi1/Makefile)2
-rw-r--r--drivers/infiniband/hw/hfi1/affinity.c (renamed from drivers/staging/rdma/hfi1/affinity.c)0
-rw-r--r--drivers/infiniband/hw/hfi1/affinity.h (renamed from drivers/staging/rdma/hfi1/affinity.h)0
-rw-r--r--drivers/infiniband/hw/hfi1/aspm.h (renamed from drivers/staging/rdma/hfi1/aspm.h)0
-rw-r--r--drivers/infiniband/hw/hfi1/chip.c (renamed from drivers/staging/rdma/hfi1/chip.c)41
-rw-r--r--drivers/infiniband/hw/hfi1/chip.h (renamed from drivers/staging/rdma/hfi1/chip.h)6
-rw-r--r--drivers/infiniband/hw/hfi1/chip_registers.h (renamed from drivers/staging/rdma/hfi1/chip_registers.h)0
-rw-r--r--drivers/infiniband/hw/hfi1/common.h (renamed from drivers/staging/rdma/hfi1/common.h)5
-rw-r--r--drivers/infiniband/hw/hfi1/debugfs.c (renamed from drivers/staging/rdma/hfi1/debugfs.c)0
-rw-r--r--drivers/infiniband/hw/hfi1/debugfs.h (renamed from drivers/staging/rdma/hfi1/debugfs.h)0
-rw-r--r--drivers/infiniband/hw/hfi1/device.c (renamed from drivers/staging/rdma/hfi1/device.c)18
-rw-r--r--drivers/infiniband/hw/hfi1/device.h (renamed from drivers/staging/rdma/hfi1/device.h)3
-rw-r--r--drivers/infiniband/hw/hfi1/dma.c (renamed from drivers/staging/rdma/hfi1/dma.c)0
-rw-r--r--drivers/infiniband/hw/hfi1/driver.c (renamed from drivers/staging/rdma/hfi1/driver.c)2
-rw-r--r--drivers/infiniband/hw/hfi1/efivar.c (renamed from drivers/staging/rdma/hfi1/efivar.c)0
-rw-r--r--drivers/infiniband/hw/hfi1/efivar.h (renamed from drivers/staging/rdma/hfi1/efivar.h)0
-rw-r--r--drivers/infiniband/hw/hfi1/eprom.c102
-rw-r--r--drivers/infiniband/hw/hfi1/eprom.h (renamed from drivers/staging/rdma/hfi1/eprom.h)0
-rw-r--r--drivers/infiniband/hw/hfi1/file_ops.c (renamed from drivers/staging/rdma/hfi1/file_ops.c)549
-rw-r--r--drivers/infiniband/hw/hfi1/firmware.c (renamed from drivers/staging/rdma/hfi1/firmware.c)0
-rw-r--r--drivers/infiniband/hw/hfi1/hfi.h (renamed from drivers/staging/rdma/hfi1/hfi.h)7
-rw-r--r--drivers/infiniband/hw/hfi1/init.c (renamed from drivers/staging/rdma/hfi1/init.c)22
-rw-r--r--drivers/infiniband/hw/hfi1/intr.c (renamed from drivers/staging/rdma/hfi1/intr.c)0
-rw-r--r--drivers/infiniband/hw/hfi1/iowait.h (renamed from drivers/staging/rdma/hfi1/iowait.h)0
-rw-r--r--drivers/infiniband/hw/hfi1/mad.c (renamed from drivers/staging/rdma/hfi1/mad.c)99
-rw-r--r--drivers/infiniband/hw/hfi1/mad.h (renamed from drivers/staging/rdma/hfi1/mad.h)0
-rw-r--r--drivers/infiniband/hw/hfi1/mmu_rb.c (renamed from drivers/staging/rdma/hfi1/mmu_rb.c)22
-rw-r--r--drivers/infiniband/hw/hfi1/mmu_rb.h (renamed from drivers/staging/rdma/hfi1/mmu_rb.h)0
-rw-r--r--drivers/infiniband/hw/hfi1/opa_compat.h (renamed from drivers/staging/rdma/hfi1/opa_compat.h)0
-rw-r--r--drivers/infiniband/hw/hfi1/pcie.c (renamed from drivers/staging/rdma/hfi1/pcie.c)0
-rw-r--r--drivers/infiniband/hw/hfi1/pio.c (renamed from drivers/staging/rdma/hfi1/pio.c)3
-rw-r--r--drivers/infiniband/hw/hfi1/pio.h (renamed from drivers/staging/rdma/hfi1/pio.h)8
-rw-r--r--drivers/infiniband/hw/hfi1/pio_copy.c (renamed from drivers/staging/rdma/hfi1/pio_copy.c)0
-rw-r--r--drivers/infiniband/hw/hfi1/platform.c (renamed from drivers/staging/rdma/hfi1/platform.c)27
-rw-r--r--drivers/infiniband/hw/hfi1/platform.h (renamed from drivers/staging/rdma/hfi1/platform.h)1
-rw-r--r--drivers/infiniband/hw/hfi1/qp.c (renamed from drivers/staging/rdma/hfi1/qp.c)9
-rw-r--r--drivers/infiniband/hw/hfi1/qp.h (renamed from drivers/staging/rdma/hfi1/qp.h)0
-rw-r--r--drivers/infiniband/hw/hfi1/qsfp.c (renamed from drivers/staging/rdma/hfi1/qsfp.c)0
-rw-r--r--drivers/infiniband/hw/hfi1/qsfp.h (renamed from drivers/staging/rdma/hfi1/qsfp.h)0
-rw-r--r--drivers/infiniband/hw/hfi1/rc.c (renamed from drivers/staging/rdma/hfi1/rc.c)0
-rw-r--r--drivers/infiniband/hw/hfi1/ruc.c (renamed from drivers/staging/rdma/hfi1/ruc.c)0
-rw-r--r--drivers/infiniband/hw/hfi1/sdma.c (renamed from drivers/staging/rdma/hfi1/sdma.c)4
-rw-r--r--drivers/infiniband/hw/hfi1/sdma.h (renamed from drivers/staging/rdma/hfi1/sdma.h)0
-rw-r--r--drivers/infiniband/hw/hfi1/sdma_txreq.h (renamed from drivers/staging/rdma/hfi1/sdma_txreq.h)0
-rw-r--r--drivers/infiniband/hw/hfi1/sysfs.c (renamed from drivers/staging/rdma/hfi1/sysfs.c)4
-rw-r--r--drivers/infiniband/hw/hfi1/trace.c (renamed from drivers/staging/rdma/hfi1/trace.c)8
-rw-r--r--drivers/infiniband/hw/hfi1/trace.h (renamed from drivers/staging/rdma/hfi1/trace.h)5
-rw-r--r--drivers/infiniband/hw/hfi1/twsi.c (renamed from drivers/staging/rdma/hfi1/twsi.c)0
-rw-r--r--drivers/infiniband/hw/hfi1/twsi.h (renamed from drivers/staging/rdma/hfi1/twsi.h)0
-rw-r--r--drivers/infiniband/hw/hfi1/uc.c (renamed from drivers/staging/rdma/hfi1/uc.c)0
-rw-r--r--drivers/infiniband/hw/hfi1/ud.c (renamed from drivers/staging/rdma/hfi1/ud.c)0
-rw-r--r--drivers/infiniband/hw/hfi1/user_exp_rcv.c (renamed from drivers/staging/rdma/hfi1/user_exp_rcv.c)0
-rw-r--r--drivers/infiniband/hw/hfi1/user_exp_rcv.h (renamed from drivers/staging/rdma/hfi1/user_exp_rcv.h)0
-rw-r--r--drivers/infiniband/hw/hfi1/user_pages.c (renamed from drivers/staging/rdma/hfi1/user_pages.c)0
-rw-r--r--drivers/infiniband/hw/hfi1/user_sdma.c (renamed from drivers/staging/rdma/hfi1/user_sdma.c)18
-rw-r--r--drivers/infiniband/hw/hfi1/user_sdma.h (renamed from drivers/staging/rdma/hfi1/user_sdma.h)0
-rw-r--r--drivers/infiniband/hw/hfi1/verbs.c (renamed from drivers/staging/rdma/hfi1/verbs.c)4
-rw-r--r--drivers/infiniband/hw/hfi1/verbs.h (renamed from drivers/staging/rdma/hfi1/verbs.h)1
-rw-r--r--drivers/infiniband/hw/hfi1/verbs_txreq.c (renamed from drivers/staging/rdma/hfi1/verbs_txreq.c)0
-rw-r--r--drivers/infiniband/hw/hfi1/verbs_txreq.h (renamed from drivers/staging/rdma/hfi1/verbs_txreq.h)0
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_verbs.c145
-rw-r--r--drivers/infiniband/hw/qib/qib_iba7322.c15
-rw-r--r--drivers/infiniband/hw/qib/qib_mad.c6
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.h1
-rw-r--r--drivers/infiniband/sw/rdmavt/cq.c1
-rw-r--r--drivers/infiniband/sw/rdmavt/mr.c4
-rw-r--r--drivers/infiniband/sw/rdmavt/qp.c30
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib.h4
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ib.c109
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c140
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_multicast.c48
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_verbs.c3
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_vlan.c2
-rw-r--r--drivers/infiniband/ulp/srpt/ib_srpt.c2
-rw-r--r--drivers/staging/Kconfig2
-rw-r--r--drivers/staging/Makefile1
-rw-r--r--drivers/staging/rdma/Kconfig27
-rw-r--r--drivers/staging/rdma/Makefile2
-rw-r--r--drivers/staging/rdma/hfi1/TODO6
-rw-r--r--drivers/staging/rdma/hfi1/diag.c1925
-rw-r--r--drivers/staging/rdma/hfi1/eprom.c471
-rw-r--r--include/rdma/ib_mad.h60
-rw-r--r--include/rdma/ib_pack.h5
-rw-r--r--include/rdma/ib_sa.h12
-rw-r--r--include/rdma/ib_verbs.h126
-rw-r--r--include/rdma/rdma_vt.h13
-rw-r--r--include/rdma/rdmavt_qp.h5
-rw-r--r--include/uapi/rdma/hfi/hfi1_user.h80
-rw-r--r--include/uapi/rdma/rdma_netlink.h10
105 files changed, 1986 insertions, 3400 deletions
diff --git a/Documentation/infiniband/sysfs.txt b/Documentation/infiniband/sysfs.txt
index 3ecf0c3a133f..45bcafe6ff8a 100644
--- a/Documentation/infiniband/sysfs.txt
+++ b/Documentation/infiniband/sysfs.txt
@@ -56,6 +56,18 @@ SYSFS FILES
56 ports/1/pkeys/10 contains the value at index 10 in port 1's P_Key 56 ports/1/pkeys/10 contains the value at index 10 in port 1's P_Key
57 table. 57 table.
58 58
59 There is an optional "hw_counters" subdirectory that may be under either
60 the parent device or the port subdirectories or both. If present,
61 there are a list of counters provided by the hardware. They may match
62 some of the counters in the counters directory, but they often include
63 many other counters. In addition to the various counters, there will
64 be a file named "lifespan" that configures how frequently the core
65 should update the counters when they are being accessed (counters are
66 not updated if they are not being accessed). The lifespan is in milli-
67 seconds and defaults to 10 unless set to something else by the driver.
68 Users may echo a value between 0 - 10000 to the lifespan file to set
69 the length of time between updates in milliseconds.
70
59MTHCA 71MTHCA
60 72
61 The Mellanox HCA driver also creates the files: 73 The Mellanox HCA driver also creates the files:
diff --git a/MAINTAINERS b/MAINTAINERS
index f466673f86ff..216165a1384d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5308,6 +5308,13 @@ F: drivers/block/cciss*
5308F: include/linux/cciss_ioctl.h 5308F: include/linux/cciss_ioctl.h
5309F: include/uapi/linux/cciss_ioctl.h 5309F: include/uapi/linux/cciss_ioctl.h
5310 5310
5311HFI1 DRIVER
5312M: Mike Marciniszyn <mike.marciniszyn@intel.com>
5313M: Dennis Dalessandro <dennis.dalessandro@intel.com>
5314L: linux-rdma@vger.kernel.org
5315S: Supported
5316F: drivers/infiniband/hw/hfi1
5317
5311HFS FILESYSTEM 5318HFS FILESYSTEM
5312L: linux-fsdevel@vger.kernel.org 5319L: linux-fsdevel@vger.kernel.org
5313S: Orphan 5320S: Orphan
@@ -5837,7 +5844,6 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma.git
5837S: Supported 5844S: Supported
5838F: Documentation/infiniband/ 5845F: Documentation/infiniband/
5839F: drivers/infiniband/ 5846F: drivers/infiniband/
5840F: drivers/staging/rdma/
5841F: include/uapi/linux/if_infiniband.h 5847F: include/uapi/linux/if_infiniband.h
5842F: include/uapi/rdma/ 5848F: include/uapi/rdma/
5843F: include/rdma/ 5849F: include/rdma/
@@ -10920,12 +10926,6 @@ M: Arnaud Patard <arnaud.patard@rtp-net.org>
10920S: Odd Fixes 10926S: Odd Fixes
10921F: drivers/staging/xgifb/ 10927F: drivers/staging/xgifb/
10922 10928
10923HFI1 DRIVER
10924M: Mike Marciniszyn <infinipath@intel.com>
10925L: linux-rdma@vger.kernel.org
10926S: Supported
10927F: drivers/staging/rdma/hfi1
10928
10929STARFIRE/DURALAN NETWORK DRIVER 10929STARFIRE/DURALAN NETWORK DRIVER
10930M: Ion Badulescu <ionut@badula.org> 10930M: Ion Badulescu <ionut@badula.org>
10931S: Odd Fixes 10931S: Odd Fixes
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index 6425c0e5d18a..2137adfbd8c3 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -85,4 +85,6 @@ source "drivers/infiniband/ulp/isert/Kconfig"
85 85
86source "drivers/infiniband/sw/rdmavt/Kconfig" 86source "drivers/infiniband/sw/rdmavt/Kconfig"
87 87
88source "drivers/infiniband/hw/hfi1/Kconfig"
89
88endif # INFINIBAND 90endif # INFINIBAND
diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile
index 26987d9d7e1c..edaae9f9853c 100644
--- a/drivers/infiniband/core/Makefile
+++ b/drivers/infiniband/core/Makefile
@@ -1,8 +1,7 @@
1infiniband-$(CONFIG_INFINIBAND_ADDR_TRANS) := rdma_cm.o 1infiniband-$(CONFIG_INFINIBAND_ADDR_TRANS) := rdma_cm.o
2user_access-$(CONFIG_INFINIBAND_ADDR_TRANS) := rdma_ucm.o 2user_access-$(CONFIG_INFINIBAND_ADDR_TRANS) := rdma_ucm.o
3 3
4obj-$(CONFIG_INFINIBAND) += ib_core.o ib_mad.o ib_sa.o \ 4obj-$(CONFIG_INFINIBAND) += ib_core.o ib_cm.o iw_cm.o \
5 ib_cm.o iw_cm.o ib_addr.o \
6 $(infiniband-y) 5 $(infiniband-y)
7obj-$(CONFIG_INFINIBAND_USER_MAD) += ib_umad.o 6obj-$(CONFIG_INFINIBAND_USER_MAD) += ib_umad.o
8obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o ib_ucm.o \ 7obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o ib_ucm.o \
@@ -10,14 +9,11 @@ obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o ib_ucm.o \
10 9
11ib_core-y := packer.o ud_header.o verbs.o cq.o rw.o sysfs.o \ 10ib_core-y := packer.o ud_header.o verbs.o cq.o rw.o sysfs.o \
12 device.o fmr_pool.o cache.o netlink.o \ 11 device.o fmr_pool.o cache.o netlink.o \
13 roce_gid_mgmt.o mr_pool.o 12 roce_gid_mgmt.o mr_pool.o addr.o sa_query.o \
13 multicast.o mad.o smi.o agent.o mad_rmpp.o
14ib_core-$(CONFIG_INFINIBAND_USER_MEM) += umem.o 14ib_core-$(CONFIG_INFINIBAND_USER_MEM) += umem.o
15ib_core-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += umem_odp.o umem_rbtree.o 15ib_core-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += umem_odp.o umem_rbtree.o
16 16
17ib_mad-y := mad.o smi.o agent.o mad_rmpp.o
18
19ib_sa-y := sa_query.o multicast.o
20
21ib_cm-y := cm.o 17ib_cm-y := cm.o
22 18
23iw_cm-y := iwcm.o iwpm_util.o iwpm_msg.o 19iw_cm-y := iwcm.o iwpm_util.o iwpm_msg.o
@@ -28,8 +24,6 @@ rdma_cm-$(CONFIG_INFINIBAND_ADDR_TRANS_CONFIGFS) += cma_configfs.o
28 24
29rdma_ucm-y := ucma.o 25rdma_ucm-y := ucma.o
30 26
31ib_addr-y := addr.o
32
33ib_umad-y := user_mad.o 27ib_umad-y := user_mad.o
34 28
35ib_ucm-y := ucm.o 29ib_ucm-y := ucm.o
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index 337353d86cfa..1374541a4528 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -46,10 +46,10 @@
46#include <net/ip6_route.h> 46#include <net/ip6_route.h>
47#include <rdma/ib_addr.h> 47#include <rdma/ib_addr.h>
48#include <rdma/ib.h> 48#include <rdma/ib.h>
49#include <rdma/rdma_netlink.h>
50#include <net/netlink.h>
49 51
50MODULE_AUTHOR("Sean Hefty"); 52#include "core_priv.h"
51MODULE_DESCRIPTION("IB Address Translation");
52MODULE_LICENSE("Dual BSD/GPL");
53 53
54struct addr_req { 54struct addr_req {
55 struct list_head list; 55 struct list_head list;
@@ -62,8 +62,11 @@ struct addr_req {
62 struct rdma_dev_addr *addr, void *context); 62 struct rdma_dev_addr *addr, void *context);
63 unsigned long timeout; 63 unsigned long timeout;
64 int status; 64 int status;
65 u32 seq;
65}; 66};
66 67
68static atomic_t ib_nl_addr_request_seq = ATOMIC_INIT(0);
69
67static void process_req(struct work_struct *work); 70static void process_req(struct work_struct *work);
68 71
69static DEFINE_MUTEX(lock); 72static DEFINE_MUTEX(lock);
@@ -71,6 +74,126 @@ static LIST_HEAD(req_list);
71static DECLARE_DELAYED_WORK(work, process_req); 74static DECLARE_DELAYED_WORK(work, process_req);
72static struct workqueue_struct *addr_wq; 75static struct workqueue_struct *addr_wq;
73 76
77static const struct nla_policy ib_nl_addr_policy[LS_NLA_TYPE_MAX] = {
78 [LS_NLA_TYPE_DGID] = {.type = NLA_BINARY,
79 .len = sizeof(struct rdma_nla_ls_gid)},
80};
81
82static inline bool ib_nl_is_good_ip_resp(const struct nlmsghdr *nlh)
83{
84 struct nlattr *tb[LS_NLA_TYPE_MAX] = {};
85 int ret;
86
87 if (nlh->nlmsg_flags & RDMA_NL_LS_F_ERR)
88 return false;
89
90 ret = nla_parse(tb, LS_NLA_TYPE_MAX - 1, nlmsg_data(nlh),
91 nlmsg_len(nlh), ib_nl_addr_policy);
92 if (ret)
93 return false;
94
95 return true;
96}
97
98static void ib_nl_process_good_ip_rsep(const struct nlmsghdr *nlh)
99{
100 const struct nlattr *head, *curr;
101 union ib_gid gid;
102 struct addr_req *req;
103 int len, rem;
104 int found = 0;
105
106 head = (const struct nlattr *)nlmsg_data(nlh);
107 len = nlmsg_len(nlh);
108
109 nla_for_each_attr(curr, head, len, rem) {
110 if (curr->nla_type == LS_NLA_TYPE_DGID)
111 memcpy(&gid, nla_data(curr), nla_len(curr));
112 }
113
114 mutex_lock(&lock);
115 list_for_each_entry(req, &req_list, list) {
116 if (nlh->nlmsg_seq != req->seq)
117 continue;
118 /* We set the DGID part, the rest was set earlier */
119 rdma_addr_set_dgid(req->addr, &gid);
120 req->status = 0;
121 found = 1;
122 break;
123 }
124 mutex_unlock(&lock);
125
126 if (!found)
127 pr_info("Couldn't find request waiting for DGID: %pI6\n",
128 &gid);
129}
130
131int ib_nl_handle_ip_res_resp(struct sk_buff *skb,
132 struct netlink_callback *cb)
133{
134 const struct nlmsghdr *nlh = (struct nlmsghdr *)cb->nlh;
135
136 if ((nlh->nlmsg_flags & NLM_F_REQUEST) ||
137 !(NETLINK_CB(skb).sk) ||
138 !netlink_capable(skb, CAP_NET_ADMIN))
139 return -EPERM;
140
141 if (ib_nl_is_good_ip_resp(nlh))
142 ib_nl_process_good_ip_rsep(nlh);
143
144 return skb->len;
145}
146
147static int ib_nl_ip_send_msg(struct rdma_dev_addr *dev_addr,
148 const void *daddr,
149 u32 seq, u16 family)
150{
151 struct sk_buff *skb = NULL;
152 struct nlmsghdr *nlh;
153 struct rdma_ls_ip_resolve_header *header;
154 void *data;
155 size_t size;
156 int attrtype;
157 int len;
158
159 if (family == AF_INET) {
160 size = sizeof(struct in_addr);
161 attrtype = RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_IPV4;
162 } else {
163 size = sizeof(struct in6_addr);
164 attrtype = RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_IPV6;
165 }
166
167 len = nla_total_size(sizeof(size));
168 len += NLMSG_ALIGN(sizeof(*header));
169
170 skb = nlmsg_new(len, GFP_KERNEL);
171 if (!skb)
172 return -ENOMEM;
173
174 data = ibnl_put_msg(skb, &nlh, seq, 0, RDMA_NL_LS,
175 RDMA_NL_LS_OP_IP_RESOLVE, NLM_F_REQUEST);
176 if (!data) {
177 nlmsg_free(skb);
178 return -ENODATA;
179 }
180
181 /* Construct the family header first */
182 header = (struct rdma_ls_ip_resolve_header *)
183 skb_put(skb, NLMSG_ALIGN(sizeof(*header)));
184 header->ifindex = dev_addr->bound_dev_if;
185 nla_put(skb, attrtype, size, daddr);
186
187 /* Repair the nlmsg header length */
188 nlmsg_end(skb, nlh);
189 ibnl_multicast(skb, nlh, RDMA_NL_GROUP_LS, GFP_KERNEL);
190
191 /* Make the request retry, so when we get the response from userspace
192 * we will have something.
193 */
194 return -ENODATA;
195}
196
74int rdma_addr_size(struct sockaddr *addr) 197int rdma_addr_size(struct sockaddr *addr)
75{ 198{
76 switch (addr->sa_family) { 199 switch (addr->sa_family) {
@@ -199,6 +322,17 @@ static void queue_req(struct addr_req *req)
199 mutex_unlock(&lock); 322 mutex_unlock(&lock);
200} 323}
201 324
325static int ib_nl_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
326 const void *daddr, u32 seq, u16 family)
327{
328 if (ibnl_chk_listeners(RDMA_NL_GROUP_LS))
329 return -EADDRNOTAVAIL;
330
331 /* We fill in what we can, the response will fill the rest */
332 rdma_copy_addr(dev_addr, dst->dev, NULL);
333 return ib_nl_ip_send_msg(dev_addr, daddr, seq, family);
334}
335
202static int dst_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr, 336static int dst_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
203 const void *daddr) 337 const void *daddr)
204{ 338{
@@ -223,6 +357,39 @@ static int dst_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
223 return ret; 357 return ret;
224} 358}
225 359
360static bool has_gateway(struct dst_entry *dst, sa_family_t family)
361{
362 struct rtable *rt;
363 struct rt6_info *rt6;
364
365 if (family == AF_INET) {
366 rt = container_of(dst, struct rtable, dst);
367 return rt->rt_uses_gateway;
368 }
369
370 rt6 = container_of(dst, struct rt6_info, dst);
371 return rt6->rt6i_flags & RTF_GATEWAY;
372}
373
374static int fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
375 const struct sockaddr *dst_in, u32 seq)
376{
377 const struct sockaddr_in *dst_in4 =
378 (const struct sockaddr_in *)dst_in;
379 const struct sockaddr_in6 *dst_in6 =
380 (const struct sockaddr_in6 *)dst_in;
381 const void *daddr = (dst_in->sa_family == AF_INET) ?
382 (const void *)&dst_in4->sin_addr.s_addr :
383 (const void *)&dst_in6->sin6_addr;
384 sa_family_t family = dst_in->sa_family;
385
386 /* Gateway + ARPHRD_INFINIBAND -> IB router */
387 if (has_gateway(dst, family) && dst->dev->type == ARPHRD_INFINIBAND)
388 return ib_nl_fetch_ha(dst, dev_addr, daddr, seq, family);
389 else
390 return dst_fetch_ha(dst, dev_addr, daddr);
391}
392
226static int addr4_resolve(struct sockaddr_in *src_in, 393static int addr4_resolve(struct sockaddr_in *src_in,
227 const struct sockaddr_in *dst_in, 394 const struct sockaddr_in *dst_in,
228 struct rdma_dev_addr *addr, 395 struct rdma_dev_addr *addr,
@@ -246,10 +413,11 @@ static int addr4_resolve(struct sockaddr_in *src_in,
246 src_in->sin_family = AF_INET; 413 src_in->sin_family = AF_INET;
247 src_in->sin_addr.s_addr = fl4.saddr; 414 src_in->sin_addr.s_addr = fl4.saddr;
248 415
249 /* If there's a gateway, we're definitely in RoCE v2 (as RoCE v1 isn't 416 /* If there's a gateway and type of device not ARPHRD_INFINIBAND, we're
250 * routable) and we could set the network type accordingly. 417 * definitely in RoCE v2 (as RoCE v1 isn't routable) set the network
418 * type accordingly.
251 */ 419 */
252 if (rt->rt_uses_gateway) 420 if (rt->rt_uses_gateway && rt->dst.dev->type != ARPHRD_INFINIBAND)
253 addr->network = RDMA_NETWORK_IPV4; 421 addr->network = RDMA_NETWORK_IPV4;
254 422
255 addr->hoplimit = ip4_dst_hoplimit(&rt->dst); 423 addr->hoplimit = ip4_dst_hoplimit(&rt->dst);
@@ -291,10 +459,12 @@ static int addr6_resolve(struct sockaddr_in6 *src_in,
291 src_in->sin6_addr = fl6.saddr; 459 src_in->sin6_addr = fl6.saddr;
292 } 460 }
293 461
294 /* If there's a gateway, we're definitely in RoCE v2 (as RoCE v1 isn't 462 /* If there's a gateway and type of device not ARPHRD_INFINIBAND, we're
295 * routable) and we could set the network type accordingly. 463 * definitely in RoCE v2 (as RoCE v1 isn't routable) set the network
464 * type accordingly.
296 */ 465 */
297 if (rt->rt6i_flags & RTF_GATEWAY) 466 if (rt->rt6i_flags & RTF_GATEWAY &&
467 ip6_dst_idev(dst)->dev->type != ARPHRD_INFINIBAND)
298 addr->network = RDMA_NETWORK_IPV6; 468 addr->network = RDMA_NETWORK_IPV6;
299 469
300 addr->hoplimit = ip6_dst_hoplimit(dst); 470 addr->hoplimit = ip6_dst_hoplimit(dst);
@@ -317,7 +487,8 @@ static int addr6_resolve(struct sockaddr_in6 *src_in,
317 487
318static int addr_resolve_neigh(struct dst_entry *dst, 488static int addr_resolve_neigh(struct dst_entry *dst,
319 const struct sockaddr *dst_in, 489 const struct sockaddr *dst_in,
320 struct rdma_dev_addr *addr) 490 struct rdma_dev_addr *addr,
491 u32 seq)
321{ 492{
322 if (dst->dev->flags & IFF_LOOPBACK) { 493 if (dst->dev->flags & IFF_LOOPBACK) {
323 int ret; 494 int ret;
@@ -331,17 +502,8 @@ static int addr_resolve_neigh(struct dst_entry *dst,
331 } 502 }
332 503
333 /* If the device doesn't do ARP internally */ 504 /* If the device doesn't do ARP internally */
334 if (!(dst->dev->flags & IFF_NOARP)) { 505 if (!(dst->dev->flags & IFF_NOARP))
335 const struct sockaddr_in *dst_in4 = 506 return fetch_ha(dst, addr, dst_in, seq);
336 (const struct sockaddr_in *)dst_in;
337 const struct sockaddr_in6 *dst_in6 =
338 (const struct sockaddr_in6 *)dst_in;
339
340 return dst_fetch_ha(dst, addr,
341 dst_in->sa_family == AF_INET ?
342 (const void *)&dst_in4->sin_addr.s_addr :
343 (const void *)&dst_in6->sin6_addr);
344 }
345 507
346 return rdma_copy_addr(addr, dst->dev, NULL); 508 return rdma_copy_addr(addr, dst->dev, NULL);
347} 509}
@@ -349,7 +511,8 @@ static int addr_resolve_neigh(struct dst_entry *dst,
349static int addr_resolve(struct sockaddr *src_in, 511static int addr_resolve(struct sockaddr *src_in,
350 const struct sockaddr *dst_in, 512 const struct sockaddr *dst_in,
351 struct rdma_dev_addr *addr, 513 struct rdma_dev_addr *addr,
352 bool resolve_neigh) 514 bool resolve_neigh,
515 u32 seq)
353{ 516{
354 struct net_device *ndev; 517 struct net_device *ndev;
355 struct dst_entry *dst; 518 struct dst_entry *dst;
@@ -366,7 +529,7 @@ static int addr_resolve(struct sockaddr *src_in,
366 return ret; 529 return ret;
367 530
368 if (resolve_neigh) 531 if (resolve_neigh)
369 ret = addr_resolve_neigh(&rt->dst, dst_in, addr); 532 ret = addr_resolve_neigh(&rt->dst, dst_in, addr, seq);
370 533
371 ndev = rt->dst.dev; 534 ndev = rt->dst.dev;
372 dev_hold(ndev); 535 dev_hold(ndev);
@@ -383,7 +546,7 @@ static int addr_resolve(struct sockaddr *src_in,
383 return ret; 546 return ret;
384 547
385 if (resolve_neigh) 548 if (resolve_neigh)
386 ret = addr_resolve_neigh(dst, dst_in, addr); 549 ret = addr_resolve_neigh(dst, dst_in, addr, seq);
387 550
388 ndev = dst->dev; 551 ndev = dst->dev;
389 dev_hold(ndev); 552 dev_hold(ndev);
@@ -412,7 +575,7 @@ static void process_req(struct work_struct *work)
412 src_in = (struct sockaddr *) &req->src_addr; 575 src_in = (struct sockaddr *) &req->src_addr;
413 dst_in = (struct sockaddr *) &req->dst_addr; 576 dst_in = (struct sockaddr *) &req->dst_addr;
414 req->status = addr_resolve(src_in, dst_in, req->addr, 577 req->status = addr_resolve(src_in, dst_in, req->addr,
415 true); 578 true, req->seq);
416 if (req->status && time_after_eq(jiffies, req->timeout)) 579 if (req->status && time_after_eq(jiffies, req->timeout))
417 req->status = -ETIMEDOUT; 580 req->status = -ETIMEDOUT;
418 else if (req->status == -ENODATA) 581 else if (req->status == -ENODATA)
@@ -471,8 +634,9 @@ int rdma_resolve_ip(struct rdma_addr_client *client,
471 req->context = context; 634 req->context = context;
472 req->client = client; 635 req->client = client;
473 atomic_inc(&client->refcount); 636 atomic_inc(&client->refcount);
637 req->seq = (u32)atomic_inc_return(&ib_nl_addr_request_seq);
474 638
475 req->status = addr_resolve(src_in, dst_in, addr, true); 639 req->status = addr_resolve(src_in, dst_in, addr, true, req->seq);
476 switch (req->status) { 640 switch (req->status) {
477 case 0: 641 case 0:
478 req->timeout = jiffies; 642 req->timeout = jiffies;
@@ -510,7 +674,7 @@ int rdma_resolve_ip_route(struct sockaddr *src_addr,
510 src_in->sa_family = dst_addr->sa_family; 674 src_in->sa_family = dst_addr->sa_family;
511 } 675 }
512 676
513 return addr_resolve(src_in, dst_addr, addr, false); 677 return addr_resolve(src_in, dst_addr, addr, false, 0);
514} 678}
515EXPORT_SYMBOL(rdma_resolve_ip_route); 679EXPORT_SYMBOL(rdma_resolve_ip_route);
516 680
@@ -634,7 +798,7 @@ static struct notifier_block nb = {
634 .notifier_call = netevent_callback 798 .notifier_call = netevent_callback
635}; 799};
636 800
637static int __init addr_init(void) 801int addr_init(void)
638{ 802{
639 addr_wq = create_singlethread_workqueue("ib_addr"); 803 addr_wq = create_singlethread_workqueue("ib_addr");
640 if (!addr_wq) 804 if (!addr_wq)
@@ -642,15 +806,13 @@ static int __init addr_init(void)
642 806
643 register_netevent_notifier(&nb); 807 register_netevent_notifier(&nb);
644 rdma_addr_register_client(&self); 808 rdma_addr_register_client(&self);
809
645 return 0; 810 return 0;
646} 811}
647 812
648static void __exit addr_cleanup(void) 813void addr_cleanup(void)
649{ 814{
650 rdma_addr_unregister_client(&self); 815 rdma_addr_unregister_client(&self);
651 unregister_netevent_notifier(&nb); 816 unregister_netevent_notifier(&nb);
652 destroy_workqueue(addr_wq); 817 destroy_workqueue(addr_wq);
653} 818}
654
655module_init(addr_init);
656module_exit(addr_cleanup);
diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h
index eab32215756b..19d499dcab76 100644
--- a/drivers/infiniband/core/core_priv.h
+++ b/drivers/infiniband/core/core_priv.h
@@ -137,4 +137,20 @@ static inline bool rdma_is_upper_dev_rcu(struct net_device *dev,
137 return _upper == upper; 137 return _upper == upper;
138} 138}
139 139
140int addr_init(void);
141void addr_cleanup(void);
142
143int ib_mad_init(void);
144void ib_mad_cleanup(void);
145
146int ib_sa_init(void);
147void ib_sa_cleanup(void);
148
149int ib_nl_handle_resolve_resp(struct sk_buff *skb,
150 struct netlink_callback *cb);
151int ib_nl_handle_set_timeout(struct sk_buff *skb,
152 struct netlink_callback *cb);
153int ib_nl_handle_ip_res_resp(struct sk_buff *skb,
154 struct netlink_callback *cb);
155
140#endif /* _CORE_PRIV_H */ 156#endif /* _CORE_PRIV_H */
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 10979844026a..5516fb070344 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -955,6 +955,29 @@ struct net_device *ib_get_net_dev_by_params(struct ib_device *dev,
955} 955}
956EXPORT_SYMBOL(ib_get_net_dev_by_params); 956EXPORT_SYMBOL(ib_get_net_dev_by_params);
957 957
958static struct ibnl_client_cbs ibnl_ls_cb_table[] = {
959 [RDMA_NL_LS_OP_RESOLVE] = {
960 .dump = ib_nl_handle_resolve_resp,
961 .module = THIS_MODULE },
962 [RDMA_NL_LS_OP_SET_TIMEOUT] = {
963 .dump = ib_nl_handle_set_timeout,
964 .module = THIS_MODULE },
965 [RDMA_NL_LS_OP_IP_RESOLVE] = {
966 .dump = ib_nl_handle_ip_res_resp,
967 .module = THIS_MODULE },
968};
969
970static int ib_add_ibnl_clients(void)
971{
972 return ibnl_add_client(RDMA_NL_LS, ARRAY_SIZE(ibnl_ls_cb_table),
973 ibnl_ls_cb_table);
974}
975
976static void ib_remove_ibnl_clients(void)
977{
978 ibnl_remove_client(RDMA_NL_LS);
979}
980
958static int __init ib_core_init(void) 981static int __init ib_core_init(void)
959{ 982{
960 int ret; 983 int ret;
@@ -983,10 +1006,41 @@ static int __init ib_core_init(void)
983 goto err_sysfs; 1006 goto err_sysfs;
984 } 1007 }
985 1008
1009 ret = addr_init();
1010 if (ret) {
1011 pr_warn("Could't init IB address resolution\n");
1012 goto err_ibnl;
1013 }
1014
1015 ret = ib_mad_init();
1016 if (ret) {
1017 pr_warn("Couldn't init IB MAD\n");
1018 goto err_addr;
1019 }
1020
1021 ret = ib_sa_init();
1022 if (ret) {
1023 pr_warn("Couldn't init SA\n");
1024 goto err_mad;
1025 }
1026
1027 if (ib_add_ibnl_clients()) {
1028 pr_warn("Couldn't register ibnl clients\n");
1029 goto err_sa;
1030 }
1031
986 ib_cache_setup(); 1032 ib_cache_setup();
987 1033
988 return 0; 1034 return 0;
989 1035
1036err_sa:
1037 ib_sa_cleanup();
1038err_mad:
1039 ib_mad_cleanup();
1040err_addr:
1041 addr_cleanup();
1042err_ibnl:
1043 ibnl_cleanup();
990err_sysfs: 1044err_sysfs:
991 class_unregister(&ib_class); 1045 class_unregister(&ib_class);
992err_comp: 1046err_comp:
@@ -999,6 +1053,10 @@ err:
999static void __exit ib_core_cleanup(void) 1053static void __exit ib_core_cleanup(void)
1000{ 1054{
1001 ib_cache_cleanup(); 1055 ib_cache_cleanup();
1056 ib_remove_ibnl_clients();
1057 ib_sa_cleanup();
1058 ib_mad_cleanup();
1059 addr_cleanup();
1002 ibnl_cleanup(); 1060 ibnl_cleanup();
1003 class_unregister(&ib_class); 1061 class_unregister(&ib_class);
1004 destroy_workqueue(ib_comp_wq); 1062 destroy_workqueue(ib_comp_wq);
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index 9fa5bf33f5a3..82fb511112da 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -47,11 +47,7 @@
47#include "smi.h" 47#include "smi.h"
48#include "opa_smi.h" 48#include "opa_smi.h"
49#include "agent.h" 49#include "agent.h"
50 50#include "core_priv.h"
51MODULE_LICENSE("Dual BSD/GPL");
52MODULE_DESCRIPTION("kernel IB MAD API");
53MODULE_AUTHOR("Hal Rosenstock");
54MODULE_AUTHOR("Sean Hefty");
55 51
56static int mad_sendq_size = IB_MAD_QP_SEND_SIZE; 52static int mad_sendq_size = IB_MAD_QP_SEND_SIZE;
57static int mad_recvq_size = IB_MAD_QP_RECV_SIZE; 53static int mad_recvq_size = IB_MAD_QP_RECV_SIZE;
@@ -3316,7 +3312,7 @@ static struct ib_client mad_client = {
3316 .remove = ib_mad_remove_device 3312 .remove = ib_mad_remove_device
3317}; 3313};
3318 3314
3319static int __init ib_mad_init_module(void) 3315int ib_mad_init(void)
3320{ 3316{
3321 mad_recvq_size = min(mad_recvq_size, IB_MAD_QP_MAX_SIZE); 3317 mad_recvq_size = min(mad_recvq_size, IB_MAD_QP_MAX_SIZE);
3322 mad_recvq_size = max(mad_recvq_size, IB_MAD_QP_MIN_SIZE); 3318 mad_recvq_size = max(mad_recvq_size, IB_MAD_QP_MIN_SIZE);
@@ -3334,10 +3330,7 @@ static int __init ib_mad_init_module(void)
3334 return 0; 3330 return 0;
3335} 3331}
3336 3332
3337static void __exit ib_mad_cleanup_module(void) 3333void ib_mad_cleanup(void)
3338{ 3334{
3339 ib_unregister_client(&mad_client); 3335 ib_unregister_client(&mad_client);
3340} 3336}
3341
3342module_init(ib_mad_init_module);
3343module_exit(ib_mad_cleanup_module);
diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c
index 250937cb9a1a..a83ec28a147b 100644
--- a/drivers/infiniband/core/multicast.c
+++ b/drivers/infiniband/core/multicast.c
@@ -93,6 +93,18 @@ enum {
93 93
94struct mcast_member; 94struct mcast_member;
95 95
96/*
97* There are 4 types of join states:
98* FullMember, NonMember, SendOnlyNonMember, SendOnlyFullMember.
99*/
100enum {
101 FULLMEMBER_JOIN,
102 NONMEMBER_JOIN,
103 SENDONLY_NONMEBER_JOIN,
104 SENDONLY_FULLMEMBER_JOIN,
105 NUM_JOIN_MEMBERSHIP_TYPES,
106};
107
96struct mcast_group { 108struct mcast_group {
97 struct ib_sa_mcmember_rec rec; 109 struct ib_sa_mcmember_rec rec;
98 struct rb_node node; 110 struct rb_node node;
@@ -102,7 +114,7 @@ struct mcast_group {
102 struct list_head pending_list; 114 struct list_head pending_list;
103 struct list_head active_list; 115 struct list_head active_list;
104 struct mcast_member *last_join; 116 struct mcast_member *last_join;
105 int members[3]; 117 int members[NUM_JOIN_MEMBERSHIP_TYPES];
106 atomic_t refcount; 118 atomic_t refcount;
107 enum mcast_group_state state; 119 enum mcast_group_state state;
108 struct ib_sa_query *query; 120 struct ib_sa_query *query;
@@ -220,8 +232,9 @@ static void queue_join(struct mcast_member *member)
220} 232}
221 233
222/* 234/*
223 * A multicast group has three types of members: full member, non member, and 235 * A multicast group has four types of members: full member, non member,
224 * send only member. We need to keep track of the number of members of each 236 * sendonly non member and sendonly full member.
237 * We need to keep track of the number of members of each
225 * type based on their join state. Adjust the number of members the belong to 238 * type based on their join state. Adjust the number of members the belong to
226 * the specified join states. 239 * the specified join states.
227 */ 240 */
@@ -229,7 +242,7 @@ static void adjust_membership(struct mcast_group *group, u8 join_state, int inc)
229{ 242{
230 int i; 243 int i;
231 244
232 for (i = 0; i < 3; i++, join_state >>= 1) 245 for (i = 0; i < NUM_JOIN_MEMBERSHIP_TYPES; i++, join_state >>= 1)
233 if (join_state & 0x1) 246 if (join_state & 0x1)
234 group->members[i] += inc; 247 group->members[i] += inc;
235} 248}
@@ -245,7 +258,7 @@ static u8 get_leave_state(struct mcast_group *group)
245 u8 leave_state = 0; 258 u8 leave_state = 0;
246 int i; 259 int i;
247 260
248 for (i = 0; i < 3; i++) 261 for (i = 0; i < NUM_JOIN_MEMBERSHIP_TYPES; i++)
249 if (!group->members[i]) 262 if (!group->members[i])
250 leave_state |= (0x1 << i); 263 leave_state |= (0x1 << i);
251 264
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index 3ebd108bcc5f..e95538650dc6 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -53,10 +53,6 @@
53#include "sa.h" 53#include "sa.h"
54#include "core_priv.h" 54#include "core_priv.h"
55 55
56MODULE_AUTHOR("Roland Dreier");
57MODULE_DESCRIPTION("InfiniBand subnet administration query support");
58MODULE_LICENSE("Dual BSD/GPL");
59
60#define IB_SA_LOCAL_SVC_TIMEOUT_MIN 100 56#define IB_SA_LOCAL_SVC_TIMEOUT_MIN 100
61#define IB_SA_LOCAL_SVC_TIMEOUT_DEFAULT 2000 57#define IB_SA_LOCAL_SVC_TIMEOUT_DEFAULT 2000
62#define IB_SA_LOCAL_SVC_TIMEOUT_MAX 200000 58#define IB_SA_LOCAL_SVC_TIMEOUT_MAX 200000
@@ -119,6 +115,12 @@ struct ib_sa_guidinfo_query {
119 struct ib_sa_query sa_query; 115 struct ib_sa_query sa_query;
120}; 116};
121 117
118struct ib_sa_classport_info_query {
119 void (*callback)(int, struct ib_class_port_info *, void *);
120 void *context;
121 struct ib_sa_query sa_query;
122};
123
122struct ib_sa_mcmember_query { 124struct ib_sa_mcmember_query {
123 void (*callback)(int, struct ib_sa_mcmember_rec *, void *); 125 void (*callback)(int, struct ib_sa_mcmember_rec *, void *);
124 void *context; 126 void *context;
@@ -392,6 +394,82 @@ static const struct ib_field service_rec_table[] = {
392 .size_bits = 2*64 }, 394 .size_bits = 2*64 },
393}; 395};
394 396
397#define CLASSPORTINFO_REC_FIELD(field) \
398 .struct_offset_bytes = offsetof(struct ib_class_port_info, field), \
399 .struct_size_bytes = sizeof((struct ib_class_port_info *)0)->field, \
400 .field_name = "ib_class_port_info:" #field
401
402static const struct ib_field classport_info_rec_table[] = {
403 { CLASSPORTINFO_REC_FIELD(base_version),
404 .offset_words = 0,
405 .offset_bits = 0,
406 .size_bits = 8 },
407 { CLASSPORTINFO_REC_FIELD(class_version),
408 .offset_words = 0,
409 .offset_bits = 8,
410 .size_bits = 8 },
411 { CLASSPORTINFO_REC_FIELD(capability_mask),
412 .offset_words = 0,
413 .offset_bits = 16,
414 .size_bits = 16 },
415 { CLASSPORTINFO_REC_FIELD(cap_mask2_resp_time),
416 .offset_words = 1,
417 .offset_bits = 0,
418 .size_bits = 32 },
419 { CLASSPORTINFO_REC_FIELD(redirect_gid),
420 .offset_words = 2,
421 .offset_bits = 0,
422 .size_bits = 128 },
423 { CLASSPORTINFO_REC_FIELD(redirect_tcslfl),
424 .offset_words = 6,
425 .offset_bits = 0,
426 .size_bits = 32 },
427 { CLASSPORTINFO_REC_FIELD(redirect_lid),
428 .offset_words = 7,
429 .offset_bits = 0,
430 .size_bits = 16 },
431 { CLASSPORTINFO_REC_FIELD(redirect_pkey),
432 .offset_words = 7,
433 .offset_bits = 16,
434 .size_bits = 16 },
435
436 { CLASSPORTINFO_REC_FIELD(redirect_qp),
437 .offset_words = 8,
438 .offset_bits = 0,
439 .size_bits = 32 },
440 { CLASSPORTINFO_REC_FIELD(redirect_qkey),
441 .offset_words = 9,
442 .offset_bits = 0,
443 .size_bits = 32 },
444
445 { CLASSPORTINFO_REC_FIELD(trap_gid),
446 .offset_words = 10,
447 .offset_bits = 0,
448 .size_bits = 128 },
449 { CLASSPORTINFO_REC_FIELD(trap_tcslfl),
450 .offset_words = 14,
451 .offset_bits = 0,
452 .size_bits = 32 },
453
454 { CLASSPORTINFO_REC_FIELD(trap_lid),
455 .offset_words = 15,
456 .offset_bits = 0,
457 .size_bits = 16 },
458 { CLASSPORTINFO_REC_FIELD(trap_pkey),
459 .offset_words = 15,
460 .offset_bits = 16,
461 .size_bits = 16 },
462
463 { CLASSPORTINFO_REC_FIELD(trap_hlqp),
464 .offset_words = 16,
465 .offset_bits = 0,
466 .size_bits = 32 },
467 { CLASSPORTINFO_REC_FIELD(trap_qkey),
468 .offset_words = 17,
469 .offset_bits = 0,
470 .size_bits = 32 },
471};
472
395#define GUIDINFO_REC_FIELD(field) \ 473#define GUIDINFO_REC_FIELD(field) \
396 .struct_offset_bytes = offsetof(struct ib_sa_guidinfo_rec, field), \ 474 .struct_offset_bytes = offsetof(struct ib_sa_guidinfo_rec, field), \
397 .struct_size_bytes = sizeof((struct ib_sa_guidinfo_rec *) 0)->field, \ 475 .struct_size_bytes = sizeof((struct ib_sa_guidinfo_rec *) 0)->field, \
@@ -705,8 +783,8 @@ static void ib_nl_request_timeout(struct work_struct *work)
705 spin_unlock_irqrestore(&ib_nl_request_lock, flags); 783 spin_unlock_irqrestore(&ib_nl_request_lock, flags);
706} 784}
707 785
708static int ib_nl_handle_set_timeout(struct sk_buff *skb, 786int ib_nl_handle_set_timeout(struct sk_buff *skb,
709 struct netlink_callback *cb) 787 struct netlink_callback *cb)
710{ 788{
711 const struct nlmsghdr *nlh = (struct nlmsghdr *)cb->nlh; 789 const struct nlmsghdr *nlh = (struct nlmsghdr *)cb->nlh;
712 int timeout, delta, abs_delta; 790 int timeout, delta, abs_delta;
@@ -782,8 +860,8 @@ static inline int ib_nl_is_good_resolve_resp(const struct nlmsghdr *nlh)
782 return 1; 860 return 1;
783} 861}
784 862
785static int ib_nl_handle_resolve_resp(struct sk_buff *skb, 863int ib_nl_handle_resolve_resp(struct sk_buff *skb,
786 struct netlink_callback *cb) 864 struct netlink_callback *cb)
787{ 865{
788 const struct nlmsghdr *nlh = (struct nlmsghdr *)cb->nlh; 866 const struct nlmsghdr *nlh = (struct nlmsghdr *)cb->nlh;
789 unsigned long flags; 867 unsigned long flags;
@@ -838,15 +916,6 @@ resp_out:
838 return skb->len; 916 return skb->len;
839} 917}
840 918
841static struct ibnl_client_cbs ib_sa_cb_table[] = {
842 [RDMA_NL_LS_OP_RESOLVE] = {
843 .dump = ib_nl_handle_resolve_resp,
844 .module = THIS_MODULE },
845 [RDMA_NL_LS_OP_SET_TIMEOUT] = {
846 .dump = ib_nl_handle_set_timeout,
847 .module = THIS_MODULE },
848};
849
850static void free_sm_ah(struct kref *kref) 919static void free_sm_ah(struct kref *kref)
851{ 920{
852 struct ib_sa_sm_ah *sm_ah = container_of(kref, struct ib_sa_sm_ah, ref); 921 struct ib_sa_sm_ah *sm_ah = container_of(kref, struct ib_sa_sm_ah, ref);
@@ -1645,6 +1714,97 @@ err1:
1645} 1714}
1646EXPORT_SYMBOL(ib_sa_guid_info_rec_query); 1715EXPORT_SYMBOL(ib_sa_guid_info_rec_query);
1647 1716
1717/* Support get SA ClassPortInfo */
1718static void ib_sa_classport_info_rec_callback(struct ib_sa_query *sa_query,
1719 int status,
1720 struct ib_sa_mad *mad)
1721{
1722 struct ib_sa_classport_info_query *query =
1723 container_of(sa_query, struct ib_sa_classport_info_query, sa_query);
1724
1725 if (mad) {
1726 struct ib_class_port_info rec;
1727
1728 ib_unpack(classport_info_rec_table,
1729 ARRAY_SIZE(classport_info_rec_table),
1730 mad->data, &rec);
1731 query->callback(status, &rec, query->context);
1732 } else {
1733 query->callback(status, NULL, query->context);
1734 }
1735}
1736
1737static void ib_sa_portclass_info_rec_release(struct ib_sa_query *sa_query)
1738{
1739 kfree(container_of(sa_query, struct ib_sa_classport_info_query,
1740 sa_query));
1741}
1742
1743int ib_sa_classport_info_rec_query(struct ib_sa_client *client,
1744 struct ib_device *device, u8 port_num,
1745 int timeout_ms, gfp_t gfp_mask,
1746 void (*callback)(int status,
1747 struct ib_class_port_info *resp,
1748 void *context),
1749 void *context,
1750 struct ib_sa_query **sa_query)
1751{
1752 struct ib_sa_classport_info_query *query;
1753 struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
1754 struct ib_sa_port *port;
1755 struct ib_mad_agent *agent;
1756 struct ib_sa_mad *mad;
1757 int ret;
1758
1759 if (!sa_dev)
1760 return -ENODEV;
1761
1762 port = &sa_dev->port[port_num - sa_dev->start_port];
1763 agent = port->agent;
1764
1765 query = kzalloc(sizeof(*query), gfp_mask);
1766 if (!query)
1767 return -ENOMEM;
1768
1769 query->sa_query.port = port;
1770 ret = alloc_mad(&query->sa_query, gfp_mask);
1771 if (ret)
1772 goto err1;
1773
1774 ib_sa_client_get(client);
1775 query->sa_query.client = client;
1776 query->callback = callback;
1777 query->context = context;
1778
1779 mad = query->sa_query.mad_buf->mad;
1780 init_mad(mad, agent);
1781
1782 query->sa_query.callback = callback ? ib_sa_classport_info_rec_callback : NULL;
1783
1784 query->sa_query.release = ib_sa_portclass_info_rec_release;
1785 /* support GET only */
1786 mad->mad_hdr.method = IB_MGMT_METHOD_GET;
1787 mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_CLASS_PORTINFO);
1788 mad->sa_hdr.comp_mask = 0;
1789 *sa_query = &query->sa_query;
1790
1791 ret = send_mad(&query->sa_query, timeout_ms, gfp_mask);
1792 if (ret < 0)
1793 goto err2;
1794
1795 return ret;
1796
1797err2:
1798 *sa_query = NULL;
1799 ib_sa_client_put(query->sa_query.client);
1800 free_mad(&query->sa_query);
1801
1802err1:
1803 kfree(query);
1804 return ret;
1805}
1806EXPORT_SYMBOL(ib_sa_classport_info_rec_query);
1807
1648static void send_handler(struct ib_mad_agent *agent, 1808static void send_handler(struct ib_mad_agent *agent,
1649 struct ib_mad_send_wc *mad_send_wc) 1809 struct ib_mad_send_wc *mad_send_wc)
1650{ 1810{
@@ -1794,7 +1954,7 @@ static void ib_sa_remove_one(struct ib_device *device, void *client_data)
1794 kfree(sa_dev); 1954 kfree(sa_dev);
1795} 1955}
1796 1956
1797static int __init ib_sa_init(void) 1957int ib_sa_init(void)
1798{ 1958{
1799 int ret; 1959 int ret;
1800 1960
@@ -1820,17 +1980,10 @@ static int __init ib_sa_init(void)
1820 goto err3; 1980 goto err3;
1821 } 1981 }
1822 1982
1823 if (ibnl_add_client(RDMA_NL_LS, ARRAY_SIZE(ib_sa_cb_table),
1824 ib_sa_cb_table)) {
1825 pr_err("Failed to add netlink callback\n");
1826 ret = -EINVAL;
1827 goto err4;
1828 }
1829 INIT_DELAYED_WORK(&ib_nl_timed_work, ib_nl_request_timeout); 1983 INIT_DELAYED_WORK(&ib_nl_timed_work, ib_nl_request_timeout);
1830 1984
1831 return 0; 1985 return 0;
1832err4: 1986
1833 destroy_workqueue(ib_nl_wq);
1834err3: 1987err3:
1835 mcast_cleanup(); 1988 mcast_cleanup();
1836err2: 1989err2:
@@ -1839,9 +1992,8 @@ err1:
1839 return ret; 1992 return ret;
1840} 1993}
1841 1994
1842static void __exit ib_sa_cleanup(void) 1995void ib_sa_cleanup(void)
1843{ 1996{
1844 ibnl_remove_client(RDMA_NL_LS);
1845 cancel_delayed_work(&ib_nl_timed_work); 1997 cancel_delayed_work(&ib_nl_timed_work);
1846 flush_workqueue(ib_nl_wq); 1998 flush_workqueue(ib_nl_wq);
1847 destroy_workqueue(ib_nl_wq); 1999 destroy_workqueue(ib_nl_wq);
@@ -1849,6 +2001,3 @@ static void __exit ib_sa_cleanup(void)
1849 ib_unregister_client(&sa_client); 2001 ib_unregister_client(&sa_client);
1850 idr_destroy(&query_idr); 2002 idr_destroy(&query_idr);
1851} 2003}
1852
1853module_init(ib_sa_init);
1854module_exit(ib_sa_cleanup);
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index 14606afbfaa8..5e573bb18660 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -56,8 +56,10 @@ struct ib_port {
56 struct gid_attr_group *gid_attr_group; 56 struct gid_attr_group *gid_attr_group;
57 struct attribute_group gid_group; 57 struct attribute_group gid_group;
58 struct attribute_group pkey_group; 58 struct attribute_group pkey_group;
59 u8 port_num;
60 struct attribute_group *pma_table; 59 struct attribute_group *pma_table;
60 struct attribute_group *hw_stats_ag;
61 struct rdma_hw_stats *hw_stats;
62 u8 port_num;
61}; 63};
62 64
63struct port_attribute { 65struct port_attribute {
@@ -80,6 +82,18 @@ struct port_table_attribute {
80 __be16 attr_id; 82 __be16 attr_id;
81}; 83};
82 84
85struct hw_stats_attribute {
86 struct attribute attr;
87 ssize_t (*show)(struct kobject *kobj,
88 struct attribute *attr, char *buf);
89 ssize_t (*store)(struct kobject *kobj,
90 struct attribute *attr,
91 const char *buf,
92 size_t count);
93 int index;
94 u8 port_num;
95};
96
83static ssize_t port_attr_show(struct kobject *kobj, 97static ssize_t port_attr_show(struct kobject *kobj,
84 struct attribute *attr, char *buf) 98 struct attribute *attr, char *buf)
85{ 99{
@@ -733,6 +747,212 @@ static struct attribute_group *get_counter_table(struct ib_device *dev,
733 return &pma_group; 747 return &pma_group;
734} 748}
735 749
750static int update_hw_stats(struct ib_device *dev, struct rdma_hw_stats *stats,
751 u8 port_num, int index)
752{
753 int ret;
754
755 if (time_is_after_eq_jiffies(stats->timestamp + stats->lifespan))
756 return 0;
757 ret = dev->get_hw_stats(dev, stats, port_num, index);
758 if (ret < 0)
759 return ret;
760 if (ret == stats->num_counters)
761 stats->timestamp = jiffies;
762
763 return 0;
764}
765
766static ssize_t print_hw_stat(struct rdma_hw_stats *stats, int index, char *buf)
767{
768 return sprintf(buf, "%llu\n", stats->value[index]);
769}
770
771static ssize_t show_hw_stats(struct kobject *kobj, struct attribute *attr,
772 char *buf)
773{
774 struct ib_device *dev;
775 struct ib_port *port;
776 struct hw_stats_attribute *hsa;
777 struct rdma_hw_stats *stats;
778 int ret;
779
780 hsa = container_of(attr, struct hw_stats_attribute, attr);
781 if (!hsa->port_num) {
782 dev = container_of((struct device *)kobj,
783 struct ib_device, dev);
784 stats = dev->hw_stats;
785 } else {
786 port = container_of(kobj, struct ib_port, kobj);
787 dev = port->ibdev;
788 stats = port->hw_stats;
789 }
790 ret = update_hw_stats(dev, stats, hsa->port_num, hsa->index);
791 if (ret)
792 return ret;
793 return print_hw_stat(stats, hsa->index, buf);
794}
795
796static ssize_t show_stats_lifespan(struct kobject *kobj,
797 struct attribute *attr,
798 char *buf)
799{
800 struct hw_stats_attribute *hsa;
801 int msecs;
802
803 hsa = container_of(attr, struct hw_stats_attribute, attr);
804 if (!hsa->port_num) {
805 struct ib_device *dev = container_of((struct device *)kobj,
806 struct ib_device, dev);
807 msecs = jiffies_to_msecs(dev->hw_stats->lifespan);
808 } else {
809 struct ib_port *p = container_of(kobj, struct ib_port, kobj);
810 msecs = jiffies_to_msecs(p->hw_stats->lifespan);
811 }
812 return sprintf(buf, "%d\n", msecs);
813}
814
815static ssize_t set_stats_lifespan(struct kobject *kobj,
816 struct attribute *attr,
817 const char *buf, size_t count)
818{
819 struct hw_stats_attribute *hsa;
820 int msecs;
821 int jiffies;
822 int ret;
823
824 ret = kstrtoint(buf, 10, &msecs);
825 if (ret)
826 return ret;
827 if (msecs < 0 || msecs > 10000)
828 return -EINVAL;
829 jiffies = msecs_to_jiffies(msecs);
830 hsa = container_of(attr, struct hw_stats_attribute, attr);
831 if (!hsa->port_num) {
832 struct ib_device *dev = container_of((struct device *)kobj,
833 struct ib_device, dev);
834 dev->hw_stats->lifespan = jiffies;
835 } else {
836 struct ib_port *p = container_of(kobj, struct ib_port, kobj);
837 p->hw_stats->lifespan = jiffies;
838 }
839 return count;
840}
841
842static void free_hsag(struct kobject *kobj, struct attribute_group *attr_group)
843{
844 struct attribute **attr;
845
846 sysfs_remove_group(kobj, attr_group);
847
848 for (attr = attr_group->attrs; *attr; attr++)
849 kfree(*attr);
850 kfree(attr_group);
851}
852
853static struct attribute *alloc_hsa(int index, u8 port_num, const char *name)
854{
855 struct hw_stats_attribute *hsa;
856
857 hsa = kmalloc(sizeof(*hsa), GFP_KERNEL);
858 if (!hsa)
859 return NULL;
860
861 hsa->attr.name = (char *)name;
862 hsa->attr.mode = S_IRUGO;
863 hsa->show = show_hw_stats;
864 hsa->store = NULL;
865 hsa->index = index;
866 hsa->port_num = port_num;
867
868 return &hsa->attr;
869}
870
871static struct attribute *alloc_hsa_lifespan(char *name, u8 port_num)
872{
873 struct hw_stats_attribute *hsa;
874
875 hsa = kmalloc(sizeof(*hsa), GFP_KERNEL);
876 if (!hsa)
877 return NULL;
878
879 hsa->attr.name = name;
880 hsa->attr.mode = S_IWUSR | S_IRUGO;
881 hsa->show = show_stats_lifespan;
882 hsa->store = set_stats_lifespan;
883 hsa->index = 0;
884 hsa->port_num = port_num;
885
886 return &hsa->attr;
887}
888
889static void setup_hw_stats(struct ib_device *device, struct ib_port *port,
890 u8 port_num)
891{
892 struct attribute_group *hsag = NULL;
893 struct rdma_hw_stats *stats;
894 int i = 0, ret;
895
896 stats = device->alloc_hw_stats(device, port_num);
897
898 if (!stats)
899 return;
900
901 if (!stats->names || stats->num_counters <= 0)
902 goto err;
903
904 hsag = kzalloc(sizeof(*hsag) +
905 // 1 extra for the lifespan config entry
906 sizeof(void *) * (stats->num_counters + 1),
907 GFP_KERNEL);
908 if (!hsag)
909 return;
910
911 ret = device->get_hw_stats(device, stats, port_num,
912 stats->num_counters);
913 if (ret != stats->num_counters)
914 goto err;
915
916 stats->timestamp = jiffies;
917
918 hsag->name = "hw_counters";
919 hsag->attrs = (void *)hsag + sizeof(*hsag);
920
921 for (i = 0; i < stats->num_counters; i++) {
922 hsag->attrs[i] = alloc_hsa(i, port_num, stats->names[i]);
923 if (!hsag->attrs[i])
924 goto err;
925 }
926
927 /* treat an error here as non-fatal */
928 hsag->attrs[i] = alloc_hsa_lifespan("lifespan", port_num);
929
930 if (port) {
931 struct kobject *kobj = &port->kobj;
932 ret = sysfs_create_group(kobj, hsag);
933 if (ret)
934 goto err;
935 port->hw_stats_ag = hsag;
936 port->hw_stats = stats;
937 } else {
938 struct kobject *kobj = &device->dev.kobj;
939 ret = sysfs_create_group(kobj, hsag);
940 if (ret)
941 goto err;
942 device->hw_stats_ag = hsag;
943 device->hw_stats = stats;
944 }
945
946 return;
947
948err:
949 kfree(stats);
950 for (; i >= 0; i--)
951 kfree(hsag->attrs[i]);
952 kfree(hsag);
953 return;
954}
955
736static int add_port(struct ib_device *device, int port_num, 956static int add_port(struct ib_device *device, int port_num,
737 int (*port_callback)(struct ib_device *, 957 int (*port_callback)(struct ib_device *,
738 u8, struct kobject *)) 958 u8, struct kobject *))
@@ -835,6 +1055,14 @@ static int add_port(struct ib_device *device, int port_num,
835 goto err_remove_pkey; 1055 goto err_remove_pkey;
836 } 1056 }
837 1057
1058 /*
1059 * If port == 0, it means we have only one port and the parent
1060 * device, not this port device, should be the holder of the
1061 * hw_counters
1062 */
1063 if (device->alloc_hw_stats && port_num)
1064 setup_hw_stats(device, p, port_num);
1065
838 list_add_tail(&p->kobj.entry, &device->port_list); 1066 list_add_tail(&p->kobj.entry, &device->port_list);
839 1067
840 kobject_uevent(&p->kobj, KOBJ_ADD); 1068 kobject_uevent(&p->kobj, KOBJ_ADD);
@@ -972,120 +1200,6 @@ static struct device_attribute *ib_class_attributes[] = {
972 &dev_attr_node_desc 1200 &dev_attr_node_desc
973}; 1201};
974 1202
975/* Show a given an attribute in the statistics group */
976static ssize_t show_protocol_stat(const struct device *device,
977 struct device_attribute *attr, char *buf,
978 unsigned offset)
979{
980 struct ib_device *dev = container_of(device, struct ib_device, dev);
981 union rdma_protocol_stats stats;
982 ssize_t ret;
983
984 ret = dev->get_protocol_stats(dev, &stats);
985 if (ret)
986 return ret;
987
988 return sprintf(buf, "%llu\n",
989 (unsigned long long) ((u64 *) &stats)[offset]);
990}
991
992/* generate a read-only iwarp statistics attribute */
993#define IW_STATS_ENTRY(name) \
994static ssize_t show_##name(struct device *device, \
995 struct device_attribute *attr, char *buf) \
996{ \
997 return show_protocol_stat(device, attr, buf, \
998 offsetof(struct iw_protocol_stats, name) / \
999 sizeof (u64)); \
1000} \
1001static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL)
1002
1003IW_STATS_ENTRY(ipInReceives);
1004IW_STATS_ENTRY(ipInHdrErrors);
1005IW_STATS_ENTRY(ipInTooBigErrors);
1006IW_STATS_ENTRY(ipInNoRoutes);
1007IW_STATS_ENTRY(ipInAddrErrors);
1008IW_STATS_ENTRY(ipInUnknownProtos);
1009IW_STATS_ENTRY(ipInTruncatedPkts);
1010IW_STATS_ENTRY(ipInDiscards);
1011IW_STATS_ENTRY(ipInDelivers);
1012IW_STATS_ENTRY(ipOutForwDatagrams);
1013IW_STATS_ENTRY(ipOutRequests);
1014IW_STATS_ENTRY(ipOutDiscards);
1015IW_STATS_ENTRY(ipOutNoRoutes);
1016IW_STATS_ENTRY(ipReasmTimeout);
1017IW_STATS_ENTRY(ipReasmReqds);
1018IW_STATS_ENTRY(ipReasmOKs);
1019IW_STATS_ENTRY(ipReasmFails);
1020IW_STATS_ENTRY(ipFragOKs);
1021IW_STATS_ENTRY(ipFragFails);
1022IW_STATS_ENTRY(ipFragCreates);
1023IW_STATS_ENTRY(ipInMcastPkts);
1024IW_STATS_ENTRY(ipOutMcastPkts);
1025IW_STATS_ENTRY(ipInBcastPkts);
1026IW_STATS_ENTRY(ipOutBcastPkts);
1027IW_STATS_ENTRY(tcpRtoAlgorithm);
1028IW_STATS_ENTRY(tcpRtoMin);
1029IW_STATS_ENTRY(tcpRtoMax);
1030IW_STATS_ENTRY(tcpMaxConn);
1031IW_STATS_ENTRY(tcpActiveOpens);
1032IW_STATS_ENTRY(tcpPassiveOpens);
1033IW_STATS_ENTRY(tcpAttemptFails);
1034IW_STATS_ENTRY(tcpEstabResets);
1035IW_STATS_ENTRY(tcpCurrEstab);
1036IW_STATS_ENTRY(tcpInSegs);
1037IW_STATS_ENTRY(tcpOutSegs);
1038IW_STATS_ENTRY(tcpRetransSegs);
1039IW_STATS_ENTRY(tcpInErrs);
1040IW_STATS_ENTRY(tcpOutRsts);
1041
1042static struct attribute *iw_proto_stats_attrs[] = {
1043 &dev_attr_ipInReceives.attr,
1044 &dev_attr_ipInHdrErrors.attr,
1045 &dev_attr_ipInTooBigErrors.attr,
1046 &dev_attr_ipInNoRoutes.attr,
1047 &dev_attr_ipInAddrErrors.attr,
1048 &dev_attr_ipInUnknownProtos.attr,
1049 &dev_attr_ipInTruncatedPkts.attr,
1050 &dev_attr_ipInDiscards.attr,
1051 &dev_attr_ipInDelivers.attr,
1052 &dev_attr_ipOutForwDatagrams.attr,
1053 &dev_attr_ipOutRequests.attr,
1054 &dev_attr_ipOutDiscards.attr,
1055 &dev_attr_ipOutNoRoutes.attr,
1056 &dev_attr_ipReasmTimeout.attr,
1057 &dev_attr_ipReasmReqds.attr,
1058 &dev_attr_ipReasmOKs.attr,
1059 &dev_attr_ipReasmFails.attr,
1060 &dev_attr_ipFragOKs.attr,
1061 &dev_attr_ipFragFails.attr,
1062 &dev_attr_ipFragCreates.attr,
1063 &dev_attr_ipInMcastPkts.attr,
1064 &dev_attr_ipOutMcastPkts.attr,
1065 &dev_attr_ipInBcastPkts.attr,
1066 &dev_attr_ipOutBcastPkts.attr,
1067 &dev_attr_tcpRtoAlgorithm.attr,
1068 &dev_attr_tcpRtoMin.attr,
1069 &dev_attr_tcpRtoMax.attr,
1070 &dev_attr_tcpMaxConn.attr,
1071 &dev_attr_tcpActiveOpens.attr,
1072 &dev_attr_tcpPassiveOpens.attr,
1073 &dev_attr_tcpAttemptFails.attr,
1074 &dev_attr_tcpEstabResets.attr,
1075 &dev_attr_tcpCurrEstab.attr,
1076 &dev_attr_tcpInSegs.attr,
1077 &dev_attr_tcpOutSegs.attr,
1078 &dev_attr_tcpRetransSegs.attr,
1079 &dev_attr_tcpInErrs.attr,
1080 &dev_attr_tcpOutRsts.attr,
1081 NULL
1082};
1083
1084static struct attribute_group iw_stats_group = {
1085 .name = "proto_stats",
1086 .attrs = iw_proto_stats_attrs,
1087};
1088
1089static void free_port_list_attributes(struct ib_device *device) 1203static void free_port_list_attributes(struct ib_device *device)
1090{ 1204{
1091 struct kobject *p, *t; 1205 struct kobject *p, *t;
@@ -1093,6 +1207,10 @@ static void free_port_list_attributes(struct ib_device *device)
1093 list_for_each_entry_safe(p, t, &device->port_list, entry) { 1207 list_for_each_entry_safe(p, t, &device->port_list, entry) {
1094 struct ib_port *port = container_of(p, struct ib_port, kobj); 1208 struct ib_port *port = container_of(p, struct ib_port, kobj);
1095 list_del(&p->entry); 1209 list_del(&p->entry);
1210 if (port->hw_stats) {
1211 kfree(port->hw_stats);
1212 free_hsag(&port->kobj, port->hw_stats_ag);
1213 }
1096 sysfs_remove_group(p, port->pma_table); 1214 sysfs_remove_group(p, port->pma_table);
1097 sysfs_remove_group(p, &port->pkey_group); 1215 sysfs_remove_group(p, &port->pkey_group);
1098 sysfs_remove_group(p, &port->gid_group); 1216 sysfs_remove_group(p, &port->gid_group);
@@ -1149,11 +1267,8 @@ int ib_device_register_sysfs(struct ib_device *device,
1149 } 1267 }
1150 } 1268 }
1151 1269
1152 if (device->node_type == RDMA_NODE_RNIC && device->get_protocol_stats) { 1270 if (device->alloc_hw_stats)
1153 ret = sysfs_create_group(&class_dev->kobj, &iw_stats_group); 1271 setup_hw_stats(device, NULL, 0);
1154 if (ret)
1155 goto err_put;
1156 }
1157 1272
1158 return 0; 1273 return 0;
1159 1274
@@ -1169,15 +1284,18 @@ err:
1169 1284
1170void ib_device_unregister_sysfs(struct ib_device *device) 1285void ib_device_unregister_sysfs(struct ib_device *device)
1171{ 1286{
1172 /* Hold kobject until ib_dealloc_device() */
1173 struct kobject *kobj_dev = kobject_get(&device->dev.kobj);
1174 int i; 1287 int i;
1175 1288
1176 if (device->node_type == RDMA_NODE_RNIC && device->get_protocol_stats) 1289 /* Hold kobject until ib_dealloc_device() */
1177 sysfs_remove_group(kobj_dev, &iw_stats_group); 1290 kobject_get(&device->dev.kobj);
1178 1291
1179 free_port_list_attributes(device); 1292 free_port_list_attributes(device);
1180 1293
1294 if (device->hw_stats) {
1295 kfree(device->hw_stats);
1296 free_hsag(&device->dev.kobj, device->hw_stats_ag);
1297 }
1298
1181 for (i = 0; i < ARRAY_SIZE(ib_class_attributes); ++i) 1299 for (i = 0; i < ARRAY_SIZE(ib_class_attributes); ++i)
1182 device_remove_file(&device->dev, ib_class_attributes[i]); 1300 device_remove_file(&device->dev, ib_class_attributes[i]);
1183 1301
diff --git a/drivers/infiniband/hw/Makefile b/drivers/infiniband/hw/Makefile
index c7ad0a4c8b15..c0c7cf8af3f4 100644
--- a/drivers/infiniband/hw/Makefile
+++ b/drivers/infiniband/hw/Makefile
@@ -8,3 +8,4 @@ obj-$(CONFIG_MLX5_INFINIBAND) += mlx5/
8obj-$(CONFIG_INFINIBAND_NES) += nes/ 8obj-$(CONFIG_INFINIBAND_NES) += nes/
9obj-$(CONFIG_INFINIBAND_OCRDMA) += ocrdma/ 9obj-$(CONFIG_INFINIBAND_OCRDMA) += ocrdma/
10obj-$(CONFIG_INFINIBAND_USNIC) += usnic/ 10obj-$(CONFIG_INFINIBAND_USNIC) += usnic/
11obj-$(CONFIG_INFINIBAND_HFI1) += hfi1/
diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.c b/drivers/infiniband/hw/cxgb3/cxio_hal.c
index de1c61b417d6..ada2e5009c86 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_hal.c
+++ b/drivers/infiniband/hw/cxgb3/cxio_hal.c
@@ -327,7 +327,7 @@ int cxio_destroy_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq)
327 kfree(cq->sw_queue); 327 kfree(cq->sw_queue);
328 dma_free_coherent(&(rdev_p->rnic_info.pdev->dev), 328 dma_free_coherent(&(rdev_p->rnic_info.pdev->dev),
329 (1UL << (cq->size_log2)) 329 (1UL << (cq->size_log2))
330 * sizeof(struct t3_cqe), cq->queue, 330 * sizeof(struct t3_cqe) + 1, cq->queue,
331 dma_unmap_addr(cq, mapping)); 331 dma_unmap_addr(cq, mapping));
332 cxio_hal_put_cqid(rdev_p->rscp, cq->cqid); 332 cxio_hal_put_cqid(rdev_p->rscp, cq->cqid);
333 return err; 333 return err;
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c
index 47cb927a0dd6..bb1a839d4d6d 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c
@@ -1218,59 +1218,119 @@ static ssize_t show_board(struct device *dev, struct device_attribute *attr,
1218 iwch_dev->rdev.rnic_info.pdev->device); 1218 iwch_dev->rdev.rnic_info.pdev->device);
1219} 1219}
1220 1220
1221static int iwch_get_mib(struct ib_device *ibdev, 1221enum counters {
1222 union rdma_protocol_stats *stats) 1222 IPINRECEIVES,
1223 IPINHDRERRORS,
1224 IPINADDRERRORS,
1225 IPINUNKNOWNPROTOS,
1226 IPINDISCARDS,
1227 IPINDELIVERS,
1228 IPOUTREQUESTS,
1229 IPOUTDISCARDS,
1230 IPOUTNOROUTES,
1231 IPREASMTIMEOUT,
1232 IPREASMREQDS,
1233 IPREASMOKS,
1234 IPREASMFAILS,
1235 TCPACTIVEOPENS,
1236 TCPPASSIVEOPENS,
1237 TCPATTEMPTFAILS,
1238 TCPESTABRESETS,
1239 TCPCURRESTAB,
1240 TCPINSEGS,
1241 TCPOUTSEGS,
1242 TCPRETRANSSEGS,
1243 TCPINERRS,
1244 TCPOUTRSTS,
1245 TCPRTOMIN,
1246 TCPRTOMAX,
1247 NR_COUNTERS
1248};
1249
1250static const char * const names[] = {
1251 [IPINRECEIVES] = "ipInReceives",
1252 [IPINHDRERRORS] = "ipInHdrErrors",
1253 [IPINADDRERRORS] = "ipInAddrErrors",
1254 [IPINUNKNOWNPROTOS] = "ipInUnknownProtos",
1255 [IPINDISCARDS] = "ipInDiscards",
1256 [IPINDELIVERS] = "ipInDelivers",
1257 [IPOUTREQUESTS] = "ipOutRequests",
1258 [IPOUTDISCARDS] = "ipOutDiscards",
1259 [IPOUTNOROUTES] = "ipOutNoRoutes",
1260 [IPREASMTIMEOUT] = "ipReasmTimeout",
1261 [IPREASMREQDS] = "ipReasmReqds",
1262 [IPREASMOKS] = "ipReasmOKs",
1263 [IPREASMFAILS] = "ipReasmFails",
1264 [TCPACTIVEOPENS] = "tcpActiveOpens",
1265 [TCPPASSIVEOPENS] = "tcpPassiveOpens",
1266 [TCPATTEMPTFAILS] = "tcpAttemptFails",
1267 [TCPESTABRESETS] = "tcpEstabResets",
1268 [TCPCURRESTAB] = "tcpCurrEstab",
1269 [TCPINSEGS] = "tcpInSegs",
1270 [TCPOUTSEGS] = "tcpOutSegs",
1271 [TCPRETRANSSEGS] = "tcpRetransSegs",
1272 [TCPINERRS] = "tcpInErrs",
1273 [TCPOUTRSTS] = "tcpOutRsts",
1274 [TCPRTOMIN] = "tcpRtoMin",
1275 [TCPRTOMAX] = "tcpRtoMax",
1276};
1277
1278static struct rdma_hw_stats *iwch_alloc_stats(struct ib_device *ibdev,
1279 u8 port_num)
1280{
1281 BUILD_BUG_ON(ARRAY_SIZE(names) != NR_COUNTERS);
1282
1283 /* Our driver only supports device level stats */
1284 if (port_num != 0)
1285 return NULL;
1286
1287 return rdma_alloc_hw_stats_struct(names, NR_COUNTERS,
1288 RDMA_HW_STATS_DEFAULT_LIFESPAN);
1289}
1290
1291static int iwch_get_mib(struct ib_device *ibdev, struct rdma_hw_stats *stats,
1292 u8 port, int index)
1223{ 1293{
1224 struct iwch_dev *dev; 1294 struct iwch_dev *dev;
1225 struct tp_mib_stats m; 1295 struct tp_mib_stats m;
1226 int ret; 1296 int ret;
1227 1297
1298 if (port != 0 || !stats)
1299 return -ENOSYS;
1300
1228 PDBG("%s ibdev %p\n", __func__, ibdev); 1301 PDBG("%s ibdev %p\n", __func__, ibdev);
1229 dev = to_iwch_dev(ibdev); 1302 dev = to_iwch_dev(ibdev);
1230 ret = dev->rdev.t3cdev_p->ctl(dev->rdev.t3cdev_p, RDMA_GET_MIB, &m); 1303 ret = dev->rdev.t3cdev_p->ctl(dev->rdev.t3cdev_p, RDMA_GET_MIB, &m);
1231 if (ret) 1304 if (ret)
1232 return -ENOSYS; 1305 return -ENOSYS;
1233 1306
1234 memset(stats, 0, sizeof *stats); 1307 stats->value[IPINRECEIVES] = ((u64)m.ipInReceive_hi << 32) + m.ipInReceive_lo;
1235 stats->iw.ipInReceives = ((u64) m.ipInReceive_hi << 32) + 1308 stats->value[IPINHDRERRORS] = ((u64)m.ipInHdrErrors_hi << 32) + m.ipInHdrErrors_lo;
1236 m.ipInReceive_lo; 1309 stats->value[IPINADDRERRORS] = ((u64)m.ipInAddrErrors_hi << 32) + m.ipInAddrErrors_lo;
1237 stats->iw.ipInHdrErrors = ((u64) m.ipInHdrErrors_hi << 32) + 1310 stats->value[IPINUNKNOWNPROTOS] = ((u64)m.ipInUnknownProtos_hi << 32) + m.ipInUnknownProtos_lo;
1238 m.ipInHdrErrors_lo; 1311 stats->value[IPINDISCARDS] = ((u64)m.ipInDiscards_hi << 32) + m.ipInDiscards_lo;
1239 stats->iw.ipInAddrErrors = ((u64) m.ipInAddrErrors_hi << 32) + 1312 stats->value[IPINDELIVERS] = ((u64)m.ipInDelivers_hi << 32) + m.ipInDelivers_lo;
1240 m.ipInAddrErrors_lo; 1313 stats->value[IPOUTREQUESTS] = ((u64)m.ipOutRequests_hi << 32) + m.ipOutRequests_lo;
1241 stats->iw.ipInUnknownProtos = ((u64) m.ipInUnknownProtos_hi << 32) + 1314 stats->value[IPOUTDISCARDS] = ((u64)m.ipOutDiscards_hi << 32) + m.ipOutDiscards_lo;
1242 m.ipInUnknownProtos_lo; 1315 stats->value[IPOUTNOROUTES] = ((u64)m.ipOutNoRoutes_hi << 32) + m.ipOutNoRoutes_lo;
1243 stats->iw.ipInDiscards = ((u64) m.ipInDiscards_hi << 32) + 1316 stats->value[IPREASMTIMEOUT] = m.ipReasmTimeout;
1244 m.ipInDiscards_lo; 1317 stats->value[IPREASMREQDS] = m.ipReasmReqds;
1245 stats->iw.ipInDelivers = ((u64) m.ipInDelivers_hi << 32) + 1318 stats->value[IPREASMOKS] = m.ipReasmOKs;
1246 m.ipInDelivers_lo; 1319 stats->value[IPREASMFAILS] = m.ipReasmFails;
1247 stats->iw.ipOutRequests = ((u64) m.ipOutRequests_hi << 32) + 1320 stats->value[TCPACTIVEOPENS] = m.tcpActiveOpens;
1248 m.ipOutRequests_lo; 1321 stats->value[TCPPASSIVEOPENS] = m.tcpPassiveOpens;
1249 stats->iw.ipOutDiscards = ((u64) m.ipOutDiscards_hi << 32) + 1322 stats->value[TCPATTEMPTFAILS] = m.tcpAttemptFails;
1250 m.ipOutDiscards_lo; 1323 stats->value[TCPESTABRESETS] = m.tcpEstabResets;
1251 stats->iw.ipOutNoRoutes = ((u64) m.ipOutNoRoutes_hi << 32) + 1324 stats->value[TCPCURRESTAB] = m.tcpOutRsts;
1252 m.ipOutNoRoutes_lo; 1325 stats->value[TCPINSEGS] = m.tcpCurrEstab;
1253 stats->iw.ipReasmTimeout = (u64) m.ipReasmTimeout; 1326 stats->value[TCPOUTSEGS] = ((u64)m.tcpInSegs_hi << 32) + m.tcpInSegs_lo;
1254 stats->iw.ipReasmReqds = (u64) m.ipReasmReqds; 1327 stats->value[TCPRETRANSSEGS] = ((u64)m.tcpOutSegs_hi << 32) + m.tcpOutSegs_lo;
1255 stats->iw.ipReasmOKs = (u64) m.ipReasmOKs; 1328 stats->value[TCPINERRS] = ((u64)m.tcpRetransSeg_hi << 32) + m.tcpRetransSeg_lo,
1256 stats->iw.ipReasmFails = (u64) m.ipReasmFails; 1329 stats->value[TCPOUTRSTS] = ((u64)m.tcpInErrs_hi << 32) + m.tcpInErrs_lo;
1257 stats->iw.tcpActiveOpens = (u64) m.tcpActiveOpens; 1330 stats->value[TCPRTOMIN] = m.tcpRtoMin;
1258 stats->iw.tcpPassiveOpens = (u64) m.tcpPassiveOpens; 1331 stats->value[TCPRTOMAX] = m.tcpRtoMax;
1259 stats->iw.tcpAttemptFails = (u64) m.tcpAttemptFails; 1332
1260 stats->iw.tcpEstabResets = (u64) m.tcpEstabResets; 1333 return stats->num_counters;
1261 stats->iw.tcpOutRsts = (u64) m.tcpOutRsts;
1262 stats->iw.tcpCurrEstab = (u64) m.tcpCurrEstab;
1263 stats->iw.tcpInSegs = ((u64) m.tcpInSegs_hi << 32) +
1264 m.tcpInSegs_lo;
1265 stats->iw.tcpOutSegs = ((u64) m.tcpOutSegs_hi << 32) +
1266 m.tcpOutSegs_lo;
1267 stats->iw.tcpRetransSegs = ((u64) m.tcpRetransSeg_hi << 32) +
1268 m.tcpRetransSeg_lo;
1269 stats->iw.tcpInErrs = ((u64) m.tcpInErrs_hi << 32) +
1270 m.tcpInErrs_lo;
1271 stats->iw.tcpRtoMin = (u64) m.tcpRtoMin;
1272 stats->iw.tcpRtoMax = (u64) m.tcpRtoMax;
1273 return 0;
1274} 1334}
1275 1335
1276static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); 1336static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
@@ -1373,7 +1433,8 @@ int iwch_register_device(struct iwch_dev *dev)
1373 dev->ibdev.req_notify_cq = iwch_arm_cq; 1433 dev->ibdev.req_notify_cq = iwch_arm_cq;
1374 dev->ibdev.post_send = iwch_post_send; 1434 dev->ibdev.post_send = iwch_post_send;
1375 dev->ibdev.post_recv = iwch_post_receive; 1435 dev->ibdev.post_recv = iwch_post_receive;
1376 dev->ibdev.get_protocol_stats = iwch_get_mib; 1436 dev->ibdev.alloc_hw_stats = iwch_alloc_stats;
1437 dev->ibdev.get_hw_stats = iwch_get_mib;
1377 dev->ibdev.uverbs_abi_ver = IWCH_UVERBS_ABI_VERSION; 1438 dev->ibdev.uverbs_abi_ver = IWCH_UVERBS_ABI_VERSION;
1378 dev->ibdev.get_port_immutable = iwch_port_immutable; 1439 dev->ibdev.get_port_immutable = iwch_port_immutable;
1379 1440
diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c
index 7574f394fdac..dd8a86b726d2 100644
--- a/drivers/infiniband/hw/cxgb4/provider.c
+++ b/drivers/infiniband/hw/cxgb4/provider.c
@@ -446,20 +446,59 @@ static ssize_t show_board(struct device *dev, struct device_attribute *attr,
446 c4iw_dev->rdev.lldi.pdev->device); 446 c4iw_dev->rdev.lldi.pdev->device);
447} 447}
448 448
449enum counters {
450 IP4INSEGS,
451 IP4OUTSEGS,
452 IP4RETRANSSEGS,
453 IP4OUTRSTS,
454 IP6INSEGS,
455 IP6OUTSEGS,
456 IP6RETRANSSEGS,
457 IP6OUTRSTS,
458 NR_COUNTERS
459};
460
461static const char * const names[] = {
462 [IP4INSEGS] = "ip4InSegs",
463 [IP4OUTSEGS] = "ip4OutSegs",
464 [IP4RETRANSSEGS] = "ip4RetransSegs",
465 [IP4OUTRSTS] = "ip4OutRsts",
466 [IP6INSEGS] = "ip6InSegs",
467 [IP6OUTSEGS] = "ip6OutSegs",
468 [IP6RETRANSSEGS] = "ip6RetransSegs",
469 [IP6OUTRSTS] = "ip6OutRsts"
470};
471
472static struct rdma_hw_stats *c4iw_alloc_stats(struct ib_device *ibdev,
473 u8 port_num)
474{
475 BUILD_BUG_ON(ARRAY_SIZE(names) != NR_COUNTERS);
476
477 if (port_num != 0)
478 return NULL;
479
480 return rdma_alloc_hw_stats_struct(names, NR_COUNTERS,
481 RDMA_HW_STATS_DEFAULT_LIFESPAN);
482}
483
449static int c4iw_get_mib(struct ib_device *ibdev, 484static int c4iw_get_mib(struct ib_device *ibdev,
450 union rdma_protocol_stats *stats) 485 struct rdma_hw_stats *stats,
486 u8 port, int index)
451{ 487{
452 struct tp_tcp_stats v4, v6; 488 struct tp_tcp_stats v4, v6;
453 struct c4iw_dev *c4iw_dev = to_c4iw_dev(ibdev); 489 struct c4iw_dev *c4iw_dev = to_c4iw_dev(ibdev);
454 490
455 cxgb4_get_tcp_stats(c4iw_dev->rdev.lldi.pdev, &v4, &v6); 491 cxgb4_get_tcp_stats(c4iw_dev->rdev.lldi.pdev, &v4, &v6);
456 memset(stats, 0, sizeof *stats); 492 stats->value[IP4INSEGS] = v4.tcp_in_segs;
457 stats->iw.tcpInSegs = v4.tcp_in_segs + v6.tcp_in_segs; 493 stats->value[IP4OUTSEGS] = v4.tcp_out_segs;
458 stats->iw.tcpOutSegs = v4.tcp_out_segs + v6.tcp_out_segs; 494 stats->value[IP4RETRANSSEGS] = v4.tcp_retrans_segs;
459 stats->iw.tcpRetransSegs = v4.tcp_retrans_segs + v6.tcp_retrans_segs; 495 stats->value[IP4OUTRSTS] = v4.tcp_out_rsts;
460 stats->iw.tcpOutRsts = v4.tcp_out_rsts + v6.tcp_out_rsts; 496 stats->value[IP6INSEGS] = v6.tcp_in_segs;
461 497 stats->value[IP6OUTSEGS] = v6.tcp_out_segs;
462 return 0; 498 stats->value[IP6RETRANSSEGS] = v6.tcp_retrans_segs;
499 stats->value[IP6OUTRSTS] = v6.tcp_out_rsts;
500
501 return stats->num_counters;
463} 502}
464 503
465static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); 504static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
@@ -562,7 +601,8 @@ int c4iw_register_device(struct c4iw_dev *dev)
562 dev->ibdev.req_notify_cq = c4iw_arm_cq; 601 dev->ibdev.req_notify_cq = c4iw_arm_cq;
563 dev->ibdev.post_send = c4iw_post_send; 602 dev->ibdev.post_send = c4iw_post_send;
564 dev->ibdev.post_recv = c4iw_post_receive; 603 dev->ibdev.post_recv = c4iw_post_receive;
565 dev->ibdev.get_protocol_stats = c4iw_get_mib; 604 dev->ibdev.alloc_hw_stats = c4iw_alloc_stats;
605 dev->ibdev.get_hw_stats = c4iw_get_mib;
566 dev->ibdev.uverbs_abi_ver = C4IW_UVERBS_ABI_VERSION; 606 dev->ibdev.uverbs_abi_ver = C4IW_UVERBS_ABI_VERSION;
567 dev->ibdev.get_port_immutable = c4iw_port_immutable; 607 dev->ibdev.get_port_immutable = c4iw_port_immutable;
568 dev->ibdev.drain_sq = c4iw_drain_sq; 608 dev->ibdev.drain_sq = c4iw_drain_sq;
diff --git a/drivers/staging/rdma/hfi1/Kconfig b/drivers/infiniband/hw/hfi1/Kconfig
index a925fb0db706..a925fb0db706 100644
--- a/drivers/staging/rdma/hfi1/Kconfig
+++ b/drivers/infiniband/hw/hfi1/Kconfig
diff --git a/drivers/staging/rdma/hfi1/Makefile b/drivers/infiniband/hw/hfi1/Makefile
index 8dc59382ee96..9b5382c94b0c 100644
--- a/drivers/staging/rdma/hfi1/Makefile
+++ b/drivers/infiniband/hw/hfi1/Makefile
@@ -7,7 +7,7 @@
7# 7#
8obj-$(CONFIG_INFINIBAND_HFI1) += hfi1.o 8obj-$(CONFIG_INFINIBAND_HFI1) += hfi1.o
9 9
10hfi1-y := affinity.o chip.o device.o diag.o driver.o efivar.o \ 10hfi1-y := affinity.o chip.o device.o driver.o efivar.o \
11 eprom.o file_ops.o firmware.o \ 11 eprom.o file_ops.o firmware.o \
12 init.o intr.o mad.o mmu_rb.o pcie.o pio.o pio_copy.o platform.o \ 12 init.o intr.o mad.o mmu_rb.o pcie.o pio.o pio_copy.o platform.o \
13 qp.o qsfp.o rc.o ruc.o sdma.o sysfs.o trace.o twsi.o \ 13 qp.o qsfp.o rc.o ruc.o sdma.o sysfs.o trace.o twsi.o \
diff --git a/drivers/staging/rdma/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c
index 6e7050ab9e16..6e7050ab9e16 100644
--- a/drivers/staging/rdma/hfi1/affinity.c
+++ b/drivers/infiniband/hw/hfi1/affinity.c
diff --git a/drivers/staging/rdma/hfi1/affinity.h b/drivers/infiniband/hw/hfi1/affinity.h
index 20f52fe74091..20f52fe74091 100644
--- a/drivers/staging/rdma/hfi1/affinity.h
+++ b/drivers/infiniband/hw/hfi1/affinity.h
diff --git a/drivers/staging/rdma/hfi1/aspm.h b/drivers/infiniband/hw/hfi1/aspm.h
index 0d58fe3b49b5..0d58fe3b49b5 100644
--- a/drivers/staging/rdma/hfi1/aspm.h
+++ b/drivers/infiniband/hw/hfi1/aspm.h
diff --git a/drivers/staging/rdma/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index dcae8e723f98..3b876da745a1 100644
--- a/drivers/staging/rdma/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -1037,6 +1037,7 @@ static void dc_shutdown(struct hfi1_devdata *);
1037static void dc_start(struct hfi1_devdata *); 1037static void dc_start(struct hfi1_devdata *);
1038static int qos_rmt_entries(struct hfi1_devdata *dd, unsigned int *mp, 1038static int qos_rmt_entries(struct hfi1_devdata *dd, unsigned int *mp,
1039 unsigned int *np); 1039 unsigned int *np);
1040static void remove_full_mgmt_pkey(struct hfi1_pportdata *ppd);
1040 1041
1041/* 1042/*
1042 * Error interrupt table entry. This is used as input to the interrupt 1043 * Error interrupt table entry. This is used as input to the interrupt
@@ -6105,7 +6106,7 @@ int acquire_lcb_access(struct hfi1_devdata *dd, int sleep_ok)
6105 } 6106 }
6106 6107
6107 /* this access is valid only when the link is up */ 6108 /* this access is valid only when the link is up */
6108 if ((ppd->host_link_state & HLS_UP) == 0) { 6109 if (ppd->host_link_state & HLS_DOWN) {
6109 dd_dev_info(dd, "%s: link state %s not up\n", 6110 dd_dev_info(dd, "%s: link state %s not up\n",
6110 __func__, link_state_name(ppd->host_link_state)); 6111 __func__, link_state_name(ppd->host_link_state));
6111 ret = -EBUSY; 6112 ret = -EBUSY;
@@ -6961,6 +6962,8 @@ void handle_link_down(struct work_struct *work)
6961 } 6962 }
6962 6963
6963 reset_neighbor_info(ppd); 6964 reset_neighbor_info(ppd);
6965 if (ppd->mgmt_allowed)
6966 remove_full_mgmt_pkey(ppd);
6964 6967
6965 /* disable the port */ 6968 /* disable the port */
6966 clear_rcvctrl(ppd->dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK); 6969 clear_rcvctrl(ppd->dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK);
@@ -7069,6 +7072,12 @@ static void add_full_mgmt_pkey(struct hfi1_pportdata *ppd)
7069 (void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_PKEYS, 0); 7072 (void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_PKEYS, 0);
7070} 7073}
7071 7074
7075static void remove_full_mgmt_pkey(struct hfi1_pportdata *ppd)
7076{
7077 ppd->pkeys[2] = 0;
7078 (void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_PKEYS, 0);
7079}
7080
7072/* 7081/*
7073 * Convert the given link width to the OPA link width bitmask. 7082 * Convert the given link width to the OPA link width bitmask.
7074 */ 7083 */
@@ -7429,7 +7438,7 @@ void apply_link_downgrade_policy(struct hfi1_pportdata *ppd, int refresh_widths)
7429retry: 7438retry:
7430 mutex_lock(&ppd->hls_lock); 7439 mutex_lock(&ppd->hls_lock);
7431 /* only apply if the link is up */ 7440 /* only apply if the link is up */
7432 if (!(ppd->host_link_state & HLS_UP)) { 7441 if (ppd->host_link_state & HLS_DOWN) {
7433 /* still going up..wait and retry */ 7442 /* still going up..wait and retry */
7434 if (ppd->host_link_state & HLS_GOING_UP) { 7443 if (ppd->host_link_state & HLS_GOING_UP) {
7435 if (++tries < 1000) { 7444 if (++tries < 1000) {
@@ -9212,9 +9221,6 @@ void reset_qsfp(struct hfi1_pportdata *ppd)
9212 9221
9213 /* Reset the QSFP */ 9222 /* Reset the QSFP */
9214 mask = (u64)QSFP_HFI0_RESET_N; 9223 mask = (u64)QSFP_HFI0_RESET_N;
9215 qsfp_mask = read_csr(dd, dd->hfi1_id ? ASIC_QSFP2_OE : ASIC_QSFP1_OE);
9216 qsfp_mask |= mask;
9217 write_csr(dd, dd->hfi1_id ? ASIC_QSFP2_OE : ASIC_QSFP1_OE, qsfp_mask);
9218 9224
9219 qsfp_mask = read_csr(dd, 9225 qsfp_mask = read_csr(dd,
9220 dd->hfi1_id ? ASIC_QSFP2_OUT : ASIC_QSFP1_OUT); 9226 dd->hfi1_id ? ASIC_QSFP2_OUT : ASIC_QSFP1_OUT);
@@ -9252,6 +9258,12 @@ static int handle_qsfp_error_conditions(struct hfi1_pportdata *ppd,
9252 dd_dev_info(dd, "%s: QSFP cable temperature too low\n", 9258 dd_dev_info(dd, "%s: QSFP cable temperature too low\n",
9253 __func__); 9259 __func__);
9254 9260
9261 /*
9262 * The remaining alarms/warnings don't matter if the link is down.
9263 */
9264 if (ppd->host_link_state & HLS_DOWN)
9265 return 0;
9266
9255 if ((qsfp_interrupt_status[1] & QSFP_HIGH_VCC_ALARM) || 9267 if ((qsfp_interrupt_status[1] & QSFP_HIGH_VCC_ALARM) ||
9256 (qsfp_interrupt_status[1] & QSFP_HIGH_VCC_WARNING)) 9268 (qsfp_interrupt_status[1] & QSFP_HIGH_VCC_WARNING))
9257 dd_dev_info(dd, "%s: QSFP supply voltage too high\n", 9269 dd_dev_info(dd, "%s: QSFP supply voltage too high\n",
@@ -9346,9 +9358,8 @@ void qsfp_event(struct work_struct *work)
9346 return; 9358 return;
9347 9359
9348 /* 9360 /*
9349 * Turn DC back on after cables has been 9361 * Turn DC back on after cable has been re-inserted. Up until
9350 * re-inserted. Up until now, the DC has been in 9362 * now, the DC has been in reset to save power.
9351 * reset to save power.
9352 */ 9363 */
9353 dc_start(dd); 9364 dc_start(dd);
9354 9365
@@ -9480,7 +9491,15 @@ int bringup_serdes(struct hfi1_pportdata *ppd)
9480 return ret; 9491 return ret;
9481 } 9492 }
9482 9493
9483 /* tune the SERDES to a ballpark setting for 9494 get_port_type(ppd);
9495 if (ppd->port_type == PORT_TYPE_QSFP) {
9496 set_qsfp_int_n(ppd, 0);
9497 wait_for_qsfp_init(ppd);
9498 set_qsfp_int_n(ppd, 1);
9499 }
9500
9501 /*
9502 * Tune the SerDes to a ballpark setting for
9484 * optimal signal and bit error rate 9503 * optimal signal and bit error rate
9485 * Needs to be done before starting the link 9504 * Needs to be done before starting the link
9486 */ 9505 */
@@ -10074,7 +10093,7 @@ u32 driver_physical_state(struct hfi1_pportdata *ppd)
10074 */ 10093 */
10075u32 driver_logical_state(struct hfi1_pportdata *ppd) 10094u32 driver_logical_state(struct hfi1_pportdata *ppd)
10076{ 10095{
10077 if (ppd->host_link_state && !(ppd->host_link_state & HLS_UP)) 10096 if (ppd->host_link_state && (ppd->host_link_state & HLS_DOWN))
10078 return IB_PORT_DOWN; 10097 return IB_PORT_DOWN;
10079 10098
10080 switch (ppd->host_link_state & HLS_UP) { 10099 switch (ppd->host_link_state & HLS_UP) {
@@ -14578,7 +14597,7 @@ u64 create_pbc(struct hfi1_pportdata *ppd, u64 flags, int srate_mbs, u32 vl,
14578 (reason), (ret)) 14597 (reason), (ret))
14579 14598
14580/* 14599/*
14581 * Initialize the Avago Thermal sensor. 14600 * Initialize the thermal sensor.
14582 * 14601 *
14583 * After initialization, enable polling of thermal sensor through 14602 * After initialization, enable polling of thermal sensor through
14584 * SBus interface. In order for this to work, the SBus Master 14603 * SBus interface. In order for this to work, the SBus Master
diff --git a/drivers/staging/rdma/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h
index 1948706fff1a..66a327978739 100644
--- a/drivers/staging/rdma/hfi1/chip.h
+++ b/drivers/infiniband/hw/hfi1/chip.h
@@ -398,6 +398,12 @@
398/* Lane ID for general configuration registers */ 398/* Lane ID for general configuration registers */
399#define GENERAL_CONFIG 4 399#define GENERAL_CONFIG 4
400 400
401/* LINK_TUNING_PARAMETERS fields */
402#define TUNING_METHOD_SHIFT 24
403
404/* LINK_OPTIMIZATION_SETTINGS fields */
405#define ENABLE_EXT_DEV_CONFIG_SHIFT 24
406
401/* LOAD_DATA 8051 command shifts and fields */ 407/* LOAD_DATA 8051 command shifts and fields */
402#define LOAD_DATA_FIELD_ID_SHIFT 40 408#define LOAD_DATA_FIELD_ID_SHIFT 40
403#define LOAD_DATA_FIELD_ID_MASK 0xfull 409#define LOAD_DATA_FIELD_ID_MASK 0xfull
diff --git a/drivers/staging/rdma/hfi1/chip_registers.h b/drivers/infiniband/hw/hfi1/chip_registers.h
index 8744de6667c2..8744de6667c2 100644
--- a/drivers/staging/rdma/hfi1/chip_registers.h
+++ b/drivers/infiniband/hw/hfi1/chip_registers.h
diff --git a/drivers/staging/rdma/hfi1/common.h b/drivers/infiniband/hw/hfi1/common.h
index e9b6bb322025..fcc9c217a97a 100644
--- a/drivers/staging/rdma/hfi1/common.h
+++ b/drivers/infiniband/hw/hfi1/common.h
@@ -178,7 +178,8 @@
178 HFI1_CAP_PKEY_CHECK | \ 178 HFI1_CAP_PKEY_CHECK | \
179 HFI1_CAP_NO_INTEGRITY) 179 HFI1_CAP_NO_INTEGRITY)
180 180
181#define HFI1_USER_SWVERSION ((HFI1_USER_SWMAJOR << 16) | HFI1_USER_SWMINOR) 181#define HFI1_USER_SWVERSION ((HFI1_USER_SWMAJOR << HFI1_SWMAJOR_SHIFT) | \
182 HFI1_USER_SWMINOR)
182 183
183#ifndef HFI1_KERN_TYPE 184#ifndef HFI1_KERN_TYPE
184#define HFI1_KERN_TYPE 0 185#define HFI1_KERN_TYPE 0
@@ -349,6 +350,8 @@ struct hfi1_message_header {
349#define HFI1_BECN_MASK 1 350#define HFI1_BECN_MASK 1
350#define HFI1_BECN_SMASK BIT(HFI1_BECN_SHIFT) 351#define HFI1_BECN_SMASK BIT(HFI1_BECN_SHIFT)
351 352
353#define HFI1_PSM_IOC_BASE_SEQ 0x0
354
352static inline __u64 rhf_to_cpu(const __le32 *rbuf) 355static inline __u64 rhf_to_cpu(const __le32 *rbuf)
353{ 356{
354 return __le64_to_cpu(*((__le64 *)rbuf)); 357 return __le64_to_cpu(*((__le64 *)rbuf));
diff --git a/drivers/staging/rdma/hfi1/debugfs.c b/drivers/infiniband/hw/hfi1/debugfs.c
index dbab9d9cc288..dbab9d9cc288 100644
--- a/drivers/staging/rdma/hfi1/debugfs.c
+++ b/drivers/infiniband/hw/hfi1/debugfs.c
diff --git a/drivers/staging/rdma/hfi1/debugfs.h b/drivers/infiniband/hw/hfi1/debugfs.h
index b6fb6814f1b8..b6fb6814f1b8 100644
--- a/drivers/staging/rdma/hfi1/debugfs.h
+++ b/drivers/infiniband/hw/hfi1/debugfs.h
diff --git a/drivers/staging/rdma/hfi1/device.c b/drivers/infiniband/hw/hfi1/device.c
index c05c39da83b1..bf64b5a7bfd7 100644
--- a/drivers/staging/rdma/hfi1/device.c
+++ b/drivers/infiniband/hw/hfi1/device.c
@@ -60,7 +60,8 @@ static dev_t hfi1_dev;
60int hfi1_cdev_init(int minor, const char *name, 60int hfi1_cdev_init(int minor, const char *name,
61 const struct file_operations *fops, 61 const struct file_operations *fops,
62 struct cdev *cdev, struct device **devp, 62 struct cdev *cdev, struct device **devp,
63 bool user_accessible) 63 bool user_accessible,
64 struct kobject *parent)
64{ 65{
65 const dev_t dev = MKDEV(MAJOR(hfi1_dev), minor); 66 const dev_t dev = MKDEV(MAJOR(hfi1_dev), minor);
66 struct device *device = NULL; 67 struct device *device = NULL;
@@ -68,6 +69,7 @@ int hfi1_cdev_init(int minor, const char *name,
68 69
69 cdev_init(cdev, fops); 70 cdev_init(cdev, fops);
70 cdev->owner = THIS_MODULE; 71 cdev->owner = THIS_MODULE;
72 cdev->kobj.parent = parent;
71 kobject_set_name(&cdev->kobj, name); 73 kobject_set_name(&cdev->kobj, name);
72 74
73 ret = cdev_add(cdev, dev, 1); 75 ret = cdev_add(cdev, dev, 1);
@@ -82,13 +84,13 @@ int hfi1_cdev_init(int minor, const char *name,
82 else 84 else
83 device = device_create(class, NULL, dev, NULL, "%s", name); 85 device = device_create(class, NULL, dev, NULL, "%s", name);
84 86
85 if (!IS_ERR(device)) 87 if (IS_ERR(device)) {
86 goto done; 88 ret = PTR_ERR(device);
87 ret = PTR_ERR(device); 89 device = NULL;
88 device = NULL; 90 pr_err("Could not create device for minor %d, %s (err %d)\n",
89 pr_err("Could not create device for minor %d, %s (err %d)\n", 91 minor, name, -ret);
90 minor, name, -ret); 92 cdev_del(cdev);
91 cdev_del(cdev); 93 }
92done: 94done:
93 *devp = device; 95 *devp = device;
94 return ret; 96 return ret;
diff --git a/drivers/staging/rdma/hfi1/device.h b/drivers/infiniband/hw/hfi1/device.h
index 5bb3e83cf2da..c3ec19cb0ac9 100644
--- a/drivers/staging/rdma/hfi1/device.h
+++ b/drivers/infiniband/hw/hfi1/device.h
@@ -50,7 +50,8 @@
50int hfi1_cdev_init(int minor, const char *name, 50int hfi1_cdev_init(int minor, const char *name,
51 const struct file_operations *fops, 51 const struct file_operations *fops,
52 struct cdev *cdev, struct device **devp, 52 struct cdev *cdev, struct device **devp,
53 bool user_accessible); 53 bool user_accessible,
54 struct kobject *parent);
54void hfi1_cdev_cleanup(struct cdev *cdev, struct device **devp); 55void hfi1_cdev_cleanup(struct cdev *cdev, struct device **devp);
55const char *class_name(void); 56const char *class_name(void);
56int __init dev_init(void); 57int __init dev_init(void);
diff --git a/drivers/staging/rdma/hfi1/dma.c b/drivers/infiniband/hw/hfi1/dma.c
index 7e8dab892848..7e8dab892848 100644
--- a/drivers/staging/rdma/hfi1/dma.c
+++ b/drivers/infiniband/hw/hfi1/dma.c
diff --git a/drivers/staging/rdma/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c
index 700c6fa3a633..c75b0ae688f8 100644
--- a/drivers/staging/rdma/hfi1/driver.c
+++ b/drivers/infiniband/hw/hfi1/driver.c
@@ -1161,7 +1161,7 @@ int hfi1_set_lid(struct hfi1_pportdata *ppd, u32 lid, u8 lmc)
1161 ppd->lmc = lmc; 1161 ppd->lmc = lmc;
1162 hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_LIDLMC, 0); 1162 hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_LIDLMC, 0);
1163 1163
1164 dd_dev_info(dd, "IB%u:%u got a lid: 0x%x\n", dd->unit, ppd->port, lid); 1164 dd_dev_info(dd, "port %u: got a lid: 0x%x\n", ppd->port, lid);
1165 1165
1166 return 0; 1166 return 0;
1167} 1167}
diff --git a/drivers/staging/rdma/hfi1/efivar.c b/drivers/infiniband/hw/hfi1/efivar.c
index 106349fc1fb9..106349fc1fb9 100644
--- a/drivers/staging/rdma/hfi1/efivar.c
+++ b/drivers/infiniband/hw/hfi1/efivar.c
diff --git a/drivers/staging/rdma/hfi1/efivar.h b/drivers/infiniband/hw/hfi1/efivar.h
index 94e9e70de568..94e9e70de568 100644
--- a/drivers/staging/rdma/hfi1/efivar.h
+++ b/drivers/infiniband/hw/hfi1/efivar.h
diff --git a/drivers/infiniband/hw/hfi1/eprom.c b/drivers/infiniband/hw/hfi1/eprom.c
new file mode 100644
index 000000000000..36b77943cbfd
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/eprom.c
@@ -0,0 +1,102 @@
1/*
2 * Copyright(c) 2015, 2016 Intel Corporation.
3 *
4 * This file is provided under a dual BSD/GPLv2 license. When using or
5 * redistributing this file, you may do so under either license.
6 *
7 * GPL LICENSE SUMMARY
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of version 2 of the GNU General Public License as
11 * published by the Free Software Foundation.
12 *
13 * This program is distributed in the hope that it will be useful, but
14 * WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * General Public License for more details.
17 *
18 * BSD LICENSE
19 *
20 * Redistribution and use in source and binary forms, with or without
21 * modification, are permitted provided that the following conditions
22 * are met:
23 *
24 * - Redistributions of source code must retain the above copyright
25 * notice, this list of conditions and the following disclaimer.
26 * - Redistributions in binary form must reproduce the above copyright
27 * notice, this list of conditions and the following disclaimer in
28 * the documentation and/or other materials provided with the
29 * distribution.
30 * - Neither the name of Intel Corporation nor the names of its
31 * contributors may be used to endorse or promote products derived
32 * from this software without specific prior written permission.
33 *
34 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
35 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
36 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
37 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
38 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
39 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
40 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
41 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
42 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
43 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
44 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
45 *
46 */
47#include <linux/delay.h>
48#include "hfi.h"
49#include "common.h"
50#include "eprom.h"
51
52#define CMD_SHIFT 24
53#define CMD_RELEASE_POWERDOWN_NOID ((0xab << CMD_SHIFT))
54
55/* controller interface speeds */
56#define EP_SPEED_FULL 0x2 /* full speed */
57
58/*
59 * How long to wait for the EPROM to become available, in ms.
60 * The spec 32 Mb EPROM takes around 40s to erase then write.
61 * Double it for safety.
62 */
63#define EPROM_TIMEOUT 80000 /* ms */
64/*
65 * Initialize the EPROM handler.
66 */
67int eprom_init(struct hfi1_devdata *dd)
68{
69 int ret = 0;
70
71 /* only the discrete chip has an EPROM */
72 if (dd->pcidev->device != PCI_DEVICE_ID_INTEL0)
73 return 0;
74
75 /*
76 * It is OK if both HFIs reset the EPROM as long as they don't
77 * do it at the same time.
78 */
79 ret = acquire_chip_resource(dd, CR_EPROM, EPROM_TIMEOUT);
80 if (ret) {
81 dd_dev_err(dd,
82 "%s: unable to acquire EPROM resource, no EPROM support\n",
83 __func__);
84 goto done_asic;
85 }
86
87 /* reset EPROM to be sure it is in a good state */
88
89 /* set reset */
90 write_csr(dd, ASIC_EEP_CTL_STAT, ASIC_EEP_CTL_STAT_EP_RESET_SMASK);
91 /* clear reset, set speed */
92 write_csr(dd, ASIC_EEP_CTL_STAT,
93 EP_SPEED_FULL << ASIC_EEP_CTL_STAT_RATE_SPI_SHIFT);
94
95 /* wake the device with command "release powerdown NoID" */
96 write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_RELEASE_POWERDOWN_NOID);
97
98 dd->eprom_available = true;
99 release_chip_resource(dd, CR_EPROM);
100done_asic:
101 return ret;
102}
diff --git a/drivers/staging/rdma/hfi1/eprom.h b/drivers/infiniband/hw/hfi1/eprom.h
index d41f0b1afb15..d41f0b1afb15 100644
--- a/drivers/staging/rdma/hfi1/eprom.h
+++ b/drivers/infiniband/hw/hfi1/eprom.h
diff --git a/drivers/staging/rdma/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c
index c1c5bf82addb..7a5b0e676cc7 100644
--- a/drivers/staging/rdma/hfi1/file_ops.c
+++ b/drivers/infiniband/hw/hfi1/file_ops.c
@@ -72,8 +72,6 @@
72 */ 72 */
73static int hfi1_file_open(struct inode *, struct file *); 73static int hfi1_file_open(struct inode *, struct file *);
74static int hfi1_file_close(struct inode *, struct file *); 74static int hfi1_file_close(struct inode *, struct file *);
75static ssize_t hfi1_file_write(struct file *, const char __user *,
76 size_t, loff_t *);
77static ssize_t hfi1_write_iter(struct kiocb *, struct iov_iter *); 75static ssize_t hfi1_write_iter(struct kiocb *, struct iov_iter *);
78static unsigned int hfi1_poll(struct file *, struct poll_table_struct *); 76static unsigned int hfi1_poll(struct file *, struct poll_table_struct *);
79static int hfi1_file_mmap(struct file *, struct vm_area_struct *); 77static int hfi1_file_mmap(struct file *, struct vm_area_struct *);
@@ -86,8 +84,7 @@ static int get_ctxt_info(struct file *, void __user *, __u32);
86static int get_base_info(struct file *, void __user *, __u32); 84static int get_base_info(struct file *, void __user *, __u32);
87static int setup_ctxt(struct file *); 85static int setup_ctxt(struct file *);
88static int setup_subctxt(struct hfi1_ctxtdata *); 86static int setup_subctxt(struct hfi1_ctxtdata *);
89static int get_user_context(struct file *, struct hfi1_user_info *, 87static int get_user_context(struct file *, struct hfi1_user_info *, int);
90 int, unsigned);
91static int find_shared_ctxt(struct file *, const struct hfi1_user_info *); 88static int find_shared_ctxt(struct file *, const struct hfi1_user_info *);
92static int allocate_ctxt(struct file *, struct hfi1_devdata *, 89static int allocate_ctxt(struct file *, struct hfi1_devdata *,
93 struct hfi1_user_info *); 90 struct hfi1_user_info *);
@@ -97,13 +94,15 @@ static int user_event_ack(struct hfi1_ctxtdata *, int, unsigned long);
97static int set_ctxt_pkey(struct hfi1_ctxtdata *, unsigned, u16); 94static int set_ctxt_pkey(struct hfi1_ctxtdata *, unsigned, u16);
98static int manage_rcvq(struct hfi1_ctxtdata *, unsigned, int); 95static int manage_rcvq(struct hfi1_ctxtdata *, unsigned, int);
99static int vma_fault(struct vm_area_struct *, struct vm_fault *); 96static int vma_fault(struct vm_area_struct *, struct vm_fault *);
97static long hfi1_file_ioctl(struct file *fp, unsigned int cmd,
98 unsigned long arg);
100 99
101static const struct file_operations hfi1_file_ops = { 100static const struct file_operations hfi1_file_ops = {
102 .owner = THIS_MODULE, 101 .owner = THIS_MODULE,
103 .write = hfi1_file_write,
104 .write_iter = hfi1_write_iter, 102 .write_iter = hfi1_write_iter,
105 .open = hfi1_file_open, 103 .open = hfi1_file_open,
106 .release = hfi1_file_close, 104 .release = hfi1_file_close,
105 .unlocked_ioctl = hfi1_file_ioctl,
107 .poll = hfi1_poll, 106 .poll = hfi1_poll,
108 .mmap = hfi1_file_mmap, 107 .mmap = hfi1_file_mmap,
109 .llseek = noop_llseek, 108 .llseek = noop_llseek,
@@ -169,6 +168,13 @@ static inline int is_valid_mmap(u64 token)
169 168
170static int hfi1_file_open(struct inode *inode, struct file *fp) 169static int hfi1_file_open(struct inode *inode, struct file *fp)
171{ 170{
171 struct hfi1_devdata *dd = container_of(inode->i_cdev,
172 struct hfi1_devdata,
173 user_cdev);
174
175 /* Just take a ref now. Not all opens result in a context assign */
176 kobject_get(&dd->kobj);
177
172 /* The real work is performed later in assign_ctxt() */ 178 /* The real work is performed later in assign_ctxt() */
173 fp->private_data = kzalloc(sizeof(struct hfi1_filedata), GFP_KERNEL); 179 fp->private_data = kzalloc(sizeof(struct hfi1_filedata), GFP_KERNEL);
174 if (fp->private_data) /* no cpu affinity by default */ 180 if (fp->private_data) /* no cpu affinity by default */
@@ -176,127 +182,59 @@ static int hfi1_file_open(struct inode *inode, struct file *fp)
176 return fp->private_data ? 0 : -ENOMEM; 182 return fp->private_data ? 0 : -ENOMEM;
177} 183}
178 184
179static ssize_t hfi1_file_write(struct file *fp, const char __user *data, 185static long hfi1_file_ioctl(struct file *fp, unsigned int cmd,
180 size_t count, loff_t *offset) 186 unsigned long arg)
181{ 187{
182 const struct hfi1_cmd __user *ucmd;
183 struct hfi1_filedata *fd = fp->private_data; 188 struct hfi1_filedata *fd = fp->private_data;
184 struct hfi1_ctxtdata *uctxt = fd->uctxt; 189 struct hfi1_ctxtdata *uctxt = fd->uctxt;
185 struct hfi1_cmd cmd;
186 struct hfi1_user_info uinfo; 190 struct hfi1_user_info uinfo;
187 struct hfi1_tid_info tinfo; 191 struct hfi1_tid_info tinfo;
192 int ret = 0;
188 unsigned long addr; 193 unsigned long addr;
189 ssize_t consumed = 0, copy = 0, ret = 0; 194 int uval = 0;
190 void *dest = NULL; 195 unsigned long ul_uval = 0;
191 __u64 user_val = 0; 196 u16 uval16 = 0;
192 int uctxt_required = 1; 197
193 int must_be_root = 0; 198 hfi1_cdbg(IOCTL, "IOCTL recv: 0x%x", cmd);
194 199 if (cmd != HFI1_IOCTL_ASSIGN_CTXT &&
195 /* FIXME: This interface cannot continue out of staging */ 200 cmd != HFI1_IOCTL_GET_VERS &&
196 if (WARN_ON_ONCE(!ib_safe_file_access(fp))) 201 !uctxt)
197 return -EACCES; 202 return -EINVAL;
198
199 if (count < sizeof(cmd)) {
200 ret = -EINVAL;
201 goto bail;
202 }
203
204 ucmd = (const struct hfi1_cmd __user *)data;
205 if (copy_from_user(&cmd, ucmd, sizeof(cmd))) {
206 ret = -EFAULT;
207 goto bail;
208 }
209
210 consumed = sizeof(cmd);
211
212 switch (cmd.type) {
213 case HFI1_CMD_ASSIGN_CTXT:
214 uctxt_required = 0; /* assigned user context not required */
215 copy = sizeof(uinfo);
216 dest = &uinfo;
217 break;
218 case HFI1_CMD_SDMA_STATUS_UPD:
219 case HFI1_CMD_CREDIT_UPD:
220 copy = 0;
221 break;
222 case HFI1_CMD_TID_UPDATE:
223 case HFI1_CMD_TID_FREE:
224 case HFI1_CMD_TID_INVAL_READ:
225 copy = sizeof(tinfo);
226 dest = &tinfo;
227 break;
228 case HFI1_CMD_USER_INFO:
229 case HFI1_CMD_RECV_CTRL:
230 case HFI1_CMD_POLL_TYPE:
231 case HFI1_CMD_ACK_EVENT:
232 case HFI1_CMD_CTXT_INFO:
233 case HFI1_CMD_SET_PKEY:
234 case HFI1_CMD_CTXT_RESET:
235 copy = 0;
236 user_val = cmd.addr;
237 break;
238 case HFI1_CMD_EP_INFO:
239 case HFI1_CMD_EP_ERASE_CHIP:
240 case HFI1_CMD_EP_ERASE_RANGE:
241 case HFI1_CMD_EP_READ_RANGE:
242 case HFI1_CMD_EP_WRITE_RANGE:
243 uctxt_required = 0; /* assigned user context not required */
244 must_be_root = 1; /* validate user */
245 copy = 0;
246 break;
247 default:
248 ret = -EINVAL;
249 goto bail;
250 }
251
252 /* If the command comes with user data, copy it. */
253 if (copy) {
254 if (copy_from_user(dest, (void __user *)cmd.addr, copy)) {
255 ret = -EFAULT;
256 goto bail;
257 }
258 consumed += copy;
259 }
260
261 /*
262 * Make sure there is a uctxt when needed.
263 */
264 if (uctxt_required && !uctxt) {
265 ret = -EINVAL;
266 goto bail;
267 }
268 203
269 /* only root can do these operations */ 204 switch (cmd) {
270 if (must_be_root && !capable(CAP_SYS_ADMIN)) { 205 case HFI1_IOCTL_ASSIGN_CTXT:
271 ret = -EPERM; 206 if (copy_from_user(&uinfo,
272 goto bail; 207 (struct hfi1_user_info __user *)arg,
273 } 208 sizeof(uinfo)))
209 return -EFAULT;
274 210
275 switch (cmd.type) {
276 case HFI1_CMD_ASSIGN_CTXT:
277 ret = assign_ctxt(fp, &uinfo); 211 ret = assign_ctxt(fp, &uinfo);
278 if (ret < 0) 212 if (ret < 0)
279 goto bail; 213 return ret;
280 ret = setup_ctxt(fp); 214 setup_ctxt(fp);
281 if (ret) 215 if (ret)
282 goto bail; 216 return ret;
283 ret = user_init(fp); 217 ret = user_init(fp);
284 break; 218 break;
285 case HFI1_CMD_CTXT_INFO: 219 case HFI1_IOCTL_CTXT_INFO:
286 ret = get_ctxt_info(fp, (void __user *)(unsigned long) 220 ret = get_ctxt_info(fp, (void __user *)(unsigned long)arg,
287 user_val, cmd.len); 221 sizeof(struct hfi1_ctxt_info));
288 break;
289 case HFI1_CMD_USER_INFO:
290 ret = get_base_info(fp, (void __user *)(unsigned long)
291 user_val, cmd.len);
292 break; 222 break;
293 case HFI1_CMD_SDMA_STATUS_UPD: 223 case HFI1_IOCTL_USER_INFO:
224 ret = get_base_info(fp, (void __user *)(unsigned long)arg,
225 sizeof(struct hfi1_base_info));
294 break; 226 break;
295 case HFI1_CMD_CREDIT_UPD: 227 case HFI1_IOCTL_CREDIT_UPD:
296 if (uctxt && uctxt->sc) 228 if (uctxt && uctxt->sc)
297 sc_return_credits(uctxt->sc); 229 sc_return_credits(uctxt->sc);
298 break; 230 break;
299 case HFI1_CMD_TID_UPDATE: 231
232 case HFI1_IOCTL_TID_UPDATE:
233 if (copy_from_user(&tinfo,
234 (struct hfi11_tid_info __user *)arg,
235 sizeof(tinfo)))
236 return -EFAULT;
237
300 ret = hfi1_user_exp_rcv_setup(fp, &tinfo); 238 ret = hfi1_user_exp_rcv_setup(fp, &tinfo);
301 if (!ret) { 239 if (!ret) {
302 /* 240 /*
@@ -305,57 +243,82 @@ static ssize_t hfi1_file_write(struct file *fp, const char __user *data,
305 * These fields are adjacent in the structure so 243 * These fields are adjacent in the structure so
306 * we can copy them at the same time. 244 * we can copy them at the same time.
307 */ 245 */
308 addr = (unsigned long)cmd.addr + 246 addr = arg + offsetof(struct hfi1_tid_info, tidcnt);
309 offsetof(struct hfi1_tid_info, tidcnt);
310 if (copy_to_user((void __user *)addr, &tinfo.tidcnt, 247 if (copy_to_user((void __user *)addr, &tinfo.tidcnt,
311 sizeof(tinfo.tidcnt) + 248 sizeof(tinfo.tidcnt) +
312 sizeof(tinfo.length))) 249 sizeof(tinfo.length)))
313 ret = -EFAULT; 250 ret = -EFAULT;
314 } 251 }
315 break; 252 break;
316 case HFI1_CMD_TID_INVAL_READ: 253
317 ret = hfi1_user_exp_rcv_invalid(fp, &tinfo); 254 case HFI1_IOCTL_TID_FREE:
255 if (copy_from_user(&tinfo,
256 (struct hfi11_tid_info __user *)arg,
257 sizeof(tinfo)))
258 return -EFAULT;
259
260 ret = hfi1_user_exp_rcv_clear(fp, &tinfo);
318 if (ret) 261 if (ret)
319 break; 262 break;
320 addr = (unsigned long)cmd.addr + 263 addr = arg + offsetof(struct hfi1_tid_info, tidcnt);
321 offsetof(struct hfi1_tid_info, tidcnt);
322 if (copy_to_user((void __user *)addr, &tinfo.tidcnt, 264 if (copy_to_user((void __user *)addr, &tinfo.tidcnt,
323 sizeof(tinfo.tidcnt))) 265 sizeof(tinfo.tidcnt)))
324 ret = -EFAULT; 266 ret = -EFAULT;
325 break; 267 break;
326 case HFI1_CMD_TID_FREE: 268
327 ret = hfi1_user_exp_rcv_clear(fp, &tinfo); 269 case HFI1_IOCTL_TID_INVAL_READ:
270 if (copy_from_user(&tinfo,
271 (struct hfi11_tid_info __user *)arg,
272 sizeof(tinfo)))
273 return -EFAULT;
274
275 ret = hfi1_user_exp_rcv_invalid(fp, &tinfo);
328 if (ret) 276 if (ret)
329 break; 277 break;
330 addr = (unsigned long)cmd.addr + 278 addr = arg + offsetof(struct hfi1_tid_info, tidcnt);
331 offsetof(struct hfi1_tid_info, tidcnt);
332 if (copy_to_user((void __user *)addr, &tinfo.tidcnt, 279 if (copy_to_user((void __user *)addr, &tinfo.tidcnt,
333 sizeof(tinfo.tidcnt))) 280 sizeof(tinfo.tidcnt)))
334 ret = -EFAULT; 281 ret = -EFAULT;
335 break; 282 break;
336 case HFI1_CMD_RECV_CTRL: 283
337 ret = manage_rcvq(uctxt, fd->subctxt, (int)user_val); 284 case HFI1_IOCTL_RECV_CTRL:
285 ret = get_user(uval, (int __user *)arg);
286 if (ret != 0)
287 return -EFAULT;
288 ret = manage_rcvq(uctxt, fd->subctxt, uval);
338 break; 289 break;
339 case HFI1_CMD_POLL_TYPE: 290
340 uctxt->poll_type = (typeof(uctxt->poll_type))user_val; 291 case HFI1_IOCTL_POLL_TYPE:
292 ret = get_user(uval, (int __user *)arg);
293 if (ret != 0)
294 return -EFAULT;
295 uctxt->poll_type = (typeof(uctxt->poll_type))uval;
341 break; 296 break;
342 case HFI1_CMD_ACK_EVENT: 297
343 ret = user_event_ack(uctxt, fd->subctxt, user_val); 298 case HFI1_IOCTL_ACK_EVENT:
299 ret = get_user(ul_uval, (unsigned long __user *)arg);
300 if (ret != 0)
301 return -EFAULT;
302 ret = user_event_ack(uctxt, fd->subctxt, ul_uval);
344 break; 303 break;
345 case HFI1_CMD_SET_PKEY: 304
305 case HFI1_IOCTL_SET_PKEY:
306 ret = get_user(uval16, (u16 __user *)arg);
307 if (ret != 0)
308 return -EFAULT;
346 if (HFI1_CAP_IS_USET(PKEY_CHECK)) 309 if (HFI1_CAP_IS_USET(PKEY_CHECK))
347 ret = set_ctxt_pkey(uctxt, fd->subctxt, user_val); 310 ret = set_ctxt_pkey(uctxt, fd->subctxt, uval16);
348 else 311 else
349 ret = -EPERM; 312 return -EPERM;
350 break; 313 break;
351 case HFI1_CMD_CTXT_RESET: { 314
315 case HFI1_IOCTL_CTXT_RESET: {
352 struct send_context *sc; 316 struct send_context *sc;
353 struct hfi1_devdata *dd; 317 struct hfi1_devdata *dd;
354 318
355 if (!uctxt || !uctxt->dd || !uctxt->sc) { 319 if (!uctxt || !uctxt->dd || !uctxt->sc)
356 ret = -EINVAL; 320 return -EINVAL;
357 break; 321
358 }
359 /* 322 /*
360 * There is no protection here. User level has to 323 * There is no protection here. User level has to
361 * guarantee that no one will be writing to the send 324 * guarantee that no one will be writing to the send
@@ -373,10 +336,9 @@ static ssize_t hfi1_file_write(struct file *fp, const char __user *data,
373 wait_event_interruptible_timeout( 336 wait_event_interruptible_timeout(
374 sc->halt_wait, (sc->flags & SCF_HALTED), 337 sc->halt_wait, (sc->flags & SCF_HALTED),
375 msecs_to_jiffies(SEND_CTXT_HALT_TIMEOUT)); 338 msecs_to_jiffies(SEND_CTXT_HALT_TIMEOUT));
376 if (!(sc->flags & SCF_HALTED)) { 339 if (!(sc->flags & SCF_HALTED))
377 ret = -ENOLCK; 340 return -ENOLCK;
378 break; 341
379 }
380 /* 342 /*
381 * If the send context was halted due to a Freeze, 343 * If the send context was halted due to a Freeze,
382 * wait until the device has been "unfrozen" before 344 * wait until the device has been "unfrozen" before
@@ -387,18 +349,16 @@ static ssize_t hfi1_file_write(struct file *fp, const char __user *data,
387 dd->event_queue, 349 dd->event_queue,
388 !(ACCESS_ONCE(dd->flags) & HFI1_FROZEN), 350 !(ACCESS_ONCE(dd->flags) & HFI1_FROZEN),
389 msecs_to_jiffies(SEND_CTXT_HALT_TIMEOUT)); 351 msecs_to_jiffies(SEND_CTXT_HALT_TIMEOUT));
390 if (dd->flags & HFI1_FROZEN) { 352 if (dd->flags & HFI1_FROZEN)
391 ret = -ENOLCK; 353 return -ENOLCK;
392 break; 354
393 } 355 if (dd->flags & HFI1_FORCED_FREEZE)
394 if (dd->flags & HFI1_FORCED_FREEZE) {
395 /* 356 /*
396 * Don't allow context reset if we are into 357 * Don't allow context reset if we are into
397 * forced freeze 358 * forced freeze
398 */ 359 */
399 ret = -ENODEV; 360 return -ENODEV;
400 break; 361
401 }
402 sc_disable(sc); 362 sc_disable(sc);
403 ret = sc_enable(sc); 363 ret = sc_enable(sc);
404 hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_ENB, 364 hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_ENB,
@@ -410,18 +370,17 @@ static ssize_t hfi1_file_write(struct file *fp, const char __user *data,
410 sc_return_credits(sc); 370 sc_return_credits(sc);
411 break; 371 break;
412 } 372 }
413 case HFI1_CMD_EP_INFO: 373
414 case HFI1_CMD_EP_ERASE_CHIP: 374 case HFI1_IOCTL_GET_VERS:
415 case HFI1_CMD_EP_ERASE_RANGE: 375 uval = HFI1_USER_SWVERSION;
416 case HFI1_CMD_EP_READ_RANGE: 376 if (put_user(uval, (int __user *)arg))
417 case HFI1_CMD_EP_WRITE_RANGE: 377 return -EFAULT;
418 ret = handle_eprom_command(fp, &cmd);
419 break; 378 break;
379
380 default:
381 return -EINVAL;
420 } 382 }
421 383
422 if (ret >= 0)
423 ret = consumed;
424bail:
425 return ret; 384 return ret;
426} 385}
427 386
@@ -738,7 +697,9 @@ static int hfi1_file_close(struct inode *inode, struct file *fp)
738{ 697{
739 struct hfi1_filedata *fdata = fp->private_data; 698 struct hfi1_filedata *fdata = fp->private_data;
740 struct hfi1_ctxtdata *uctxt = fdata->uctxt; 699 struct hfi1_ctxtdata *uctxt = fdata->uctxt;
741 struct hfi1_devdata *dd; 700 struct hfi1_devdata *dd = container_of(inode->i_cdev,
701 struct hfi1_devdata,
702 user_cdev);
742 unsigned long flags, *ev; 703 unsigned long flags, *ev;
743 704
744 fp->private_data = NULL; 705 fp->private_data = NULL;
@@ -747,7 +708,6 @@ static int hfi1_file_close(struct inode *inode, struct file *fp)
747 goto done; 708 goto done;
748 709
749 hfi1_cdbg(PROC, "freeing ctxt %u:%u", uctxt->ctxt, fdata->subctxt); 710 hfi1_cdbg(PROC, "freeing ctxt %u:%u", uctxt->ctxt, fdata->subctxt);
750 dd = uctxt->dd;
751 mutex_lock(&hfi1_mutex); 711 mutex_lock(&hfi1_mutex);
752 712
753 flush_wc(); 713 flush_wc();
@@ -813,6 +773,7 @@ static int hfi1_file_close(struct inode *inode, struct file *fp)
813 mutex_unlock(&hfi1_mutex); 773 mutex_unlock(&hfi1_mutex);
814 hfi1_free_ctxtdata(dd, uctxt); 774 hfi1_free_ctxtdata(dd, uctxt);
815done: 775done:
776 kobject_put(&dd->kobj);
816 kfree(fdata); 777 kfree(fdata);
817 return 0; 778 return 0;
818} 779}
@@ -836,7 +797,7 @@ static u64 kvirt_to_phys(void *addr)
836static int assign_ctxt(struct file *fp, struct hfi1_user_info *uinfo) 797static int assign_ctxt(struct file *fp, struct hfi1_user_info *uinfo)
837{ 798{
838 int i_minor, ret = 0; 799 int i_minor, ret = 0;
839 unsigned swmajor, swminor, alg = HFI1_ALG_ACROSS; 800 unsigned int swmajor, swminor;
840 801
841 swmajor = uinfo->userversion >> 16; 802 swmajor = uinfo->userversion >> 16;
842 if (swmajor != HFI1_USER_SWMAJOR) { 803 if (swmajor != HFI1_USER_SWMAJOR) {
@@ -846,9 +807,6 @@ static int assign_ctxt(struct file *fp, struct hfi1_user_info *uinfo)
846 807
847 swminor = uinfo->userversion & 0xffff; 808 swminor = uinfo->userversion & 0xffff;
848 809
849 if (uinfo->hfi1_alg < HFI1_ALG_COUNT)
850 alg = uinfo->hfi1_alg;
851
852 mutex_lock(&hfi1_mutex); 810 mutex_lock(&hfi1_mutex);
853 /* First, lets check if we need to setup a shared context? */ 811 /* First, lets check if we need to setup a shared context? */
854 if (uinfo->subctxt_cnt) { 812 if (uinfo->subctxt_cnt) {
@@ -868,7 +826,7 @@ static int assign_ctxt(struct file *fp, struct hfi1_user_info *uinfo)
868 */ 826 */
869 if (!ret) { 827 if (!ret) {
870 i_minor = iminor(file_inode(fp)) - HFI1_USER_MINOR_BASE; 828 i_minor = iminor(file_inode(fp)) - HFI1_USER_MINOR_BASE;
871 ret = get_user_context(fp, uinfo, i_minor - 1, alg); 829 ret = get_user_context(fp, uinfo, i_minor);
872 } 830 }
873done_unlock: 831done_unlock:
874 mutex_unlock(&hfi1_mutex); 832 mutex_unlock(&hfi1_mutex);
@@ -876,71 +834,26 @@ done:
876 return ret; 834 return ret;
877} 835}
878 836
879/* return true if the device available for general use */
880static int usable_device(struct hfi1_devdata *dd)
881{
882 struct hfi1_pportdata *ppd = dd->pport;
883
884 return driver_lstate(ppd) == IB_PORT_ACTIVE;
885}
886
887static int get_user_context(struct file *fp, struct hfi1_user_info *uinfo, 837static int get_user_context(struct file *fp, struct hfi1_user_info *uinfo,
888 int devno, unsigned alg) 838 int devno)
889{ 839{
890 struct hfi1_devdata *dd = NULL; 840 struct hfi1_devdata *dd = NULL;
891 int ret = 0, devmax, npresent, nup, dev; 841 int devmax, npresent, nup;
892 842
893 devmax = hfi1_count_units(&npresent, &nup); 843 devmax = hfi1_count_units(&npresent, &nup);
894 if (!npresent) { 844 if (!npresent)
895 ret = -ENXIO; 845 return -ENXIO;
896 goto done; 846
897 } 847 if (!nup)
898 if (!nup) { 848 return -ENETDOWN;
899 ret = -ENETDOWN; 849
900 goto done; 850 dd = hfi1_lookup(devno);
901 } 851 if (!dd)
902 if (devno >= 0) { 852 return -ENODEV;
903 dd = hfi1_lookup(devno); 853 else if (!dd->freectxts)
904 if (!dd) 854 return -EBUSY;
905 ret = -ENODEV; 855
906 else if (!dd->freectxts) 856 return allocate_ctxt(fp, dd, uinfo);
907 ret = -EBUSY;
908 } else {
909 struct hfi1_devdata *pdd;
910
911 if (alg == HFI1_ALG_ACROSS) {
912 unsigned free = 0U;
913
914 for (dev = 0; dev < devmax; dev++) {
915 pdd = hfi1_lookup(dev);
916 if (!pdd)
917 continue;
918 if (!usable_device(pdd))
919 continue;
920 if (pdd->freectxts &&
921 pdd->freectxts > free) {
922 dd = pdd;
923 free = pdd->freectxts;
924 }
925 }
926 } else {
927 for (dev = 0; dev < devmax; dev++) {
928 pdd = hfi1_lookup(dev);
929 if (!pdd)
930 continue;
931 if (!usable_device(pdd))
932 continue;
933 if (pdd->freectxts) {
934 dd = pdd;
935 break;
936 }
937 }
938 }
939 if (!dd)
940 ret = -EBUSY;
941 }
942done:
943 return ret ? ret : allocate_ctxt(fp, dd, uinfo);
944} 857}
945 858
946static int find_shared_ctxt(struct file *fp, 859static int find_shared_ctxt(struct file *fp,
@@ -1546,170 +1459,10 @@ done:
1546 return ret; 1459 return ret;
1547} 1460}
1548 1461
1549static int ui_open(struct inode *inode, struct file *filp)
1550{
1551 struct hfi1_devdata *dd;
1552
1553 dd = container_of(inode->i_cdev, struct hfi1_devdata, ui_cdev);
1554 filp->private_data = dd; /* for other methods */
1555 return 0;
1556}
1557
1558static int ui_release(struct inode *inode, struct file *filp)
1559{
1560 /* nothing to do */
1561 return 0;
1562}
1563
1564static loff_t ui_lseek(struct file *filp, loff_t offset, int whence)
1565{
1566 struct hfi1_devdata *dd = filp->private_data;
1567
1568 return fixed_size_llseek(filp, offset, whence,
1569 (dd->kregend - dd->kregbase) + DC8051_DATA_MEM_SIZE);
1570}
1571
1572/* NOTE: assumes unsigned long is 8 bytes */
1573static ssize_t ui_read(struct file *filp, char __user *buf, size_t count,
1574 loff_t *f_pos)
1575{
1576 struct hfi1_devdata *dd = filp->private_data;
1577 void __iomem *base = dd->kregbase;
1578 unsigned long total, csr_off,
1579 barlen = (dd->kregend - dd->kregbase);
1580 u64 data;
1581
1582 /* only read 8 byte quantities */
1583 if ((count % 8) != 0)
1584 return -EINVAL;
1585 /* offset must be 8-byte aligned */
1586 if ((*f_pos % 8) != 0)
1587 return -EINVAL;
1588 /* destination buffer must be 8-byte aligned */
1589 if ((unsigned long)buf % 8 != 0)
1590 return -EINVAL;
1591 /* must be in range */
1592 if (*f_pos + count > (barlen + DC8051_DATA_MEM_SIZE))
1593 return -EINVAL;
1594 /* only set the base if we are not starting past the BAR */
1595 if (*f_pos < barlen)
1596 base += *f_pos;
1597 csr_off = *f_pos;
1598 for (total = 0; total < count; total += 8, csr_off += 8) {
1599 /* accessing LCB CSRs requires more checks */
1600 if (is_lcb_offset(csr_off)) {
1601 if (read_lcb_csr(dd, csr_off, (u64 *)&data))
1602 break; /* failed */
1603 }
1604 /*
1605 * Cannot read ASIC GPIO/QSFP* clear and force CSRs without a
1606 * false parity error. Avoid the whole issue by not reading
1607 * them. These registers are defined as having a read value
1608 * of 0.
1609 */
1610 else if (csr_off == ASIC_GPIO_CLEAR ||
1611 csr_off == ASIC_GPIO_FORCE ||
1612 csr_off == ASIC_QSFP1_CLEAR ||
1613 csr_off == ASIC_QSFP1_FORCE ||
1614 csr_off == ASIC_QSFP2_CLEAR ||
1615 csr_off == ASIC_QSFP2_FORCE)
1616 data = 0;
1617 else if (csr_off >= barlen) {
1618 /*
1619 * read_8051_data can read more than just 8 bytes at
1620 * a time. However, folding this into the loop and
1621 * handling the reads in 8 byte increments allows us
1622 * to smoothly transition from chip memory to 8051
1623 * memory.
1624 */
1625 if (read_8051_data(dd,
1626 (u32)(csr_off - barlen),
1627 sizeof(data), &data))
1628 break; /* failed */
1629 } else
1630 data = readq(base + total);
1631 if (put_user(data, (unsigned long __user *)(buf + total)))
1632 break;
1633 }
1634 *f_pos += total;
1635 return total;
1636}
1637
1638/* NOTE: assumes unsigned long is 8 bytes */
1639static ssize_t ui_write(struct file *filp, const char __user *buf,
1640 size_t count, loff_t *f_pos)
1641{
1642 struct hfi1_devdata *dd = filp->private_data;
1643 void __iomem *base;
1644 unsigned long total, data, csr_off;
1645 int in_lcb;
1646
1647 /* only write 8 byte quantities */
1648 if ((count % 8) != 0)
1649 return -EINVAL;
1650 /* offset must be 8-byte aligned */
1651 if ((*f_pos % 8) != 0)
1652 return -EINVAL;
1653 /* source buffer must be 8-byte aligned */
1654 if ((unsigned long)buf % 8 != 0)
1655 return -EINVAL;
1656 /* must be in range */
1657 if (*f_pos + count > dd->kregend - dd->kregbase)
1658 return -EINVAL;
1659
1660 base = (void __iomem *)dd->kregbase + *f_pos;
1661 csr_off = *f_pos;
1662 in_lcb = 0;
1663 for (total = 0; total < count; total += 8, csr_off += 8) {
1664 if (get_user(data, (unsigned long __user *)(buf + total)))
1665 break;
1666 /* accessing LCB CSRs requires a special procedure */
1667 if (is_lcb_offset(csr_off)) {
1668 if (!in_lcb) {
1669 int ret = acquire_lcb_access(dd, 1);
1670
1671 if (ret)
1672 break;
1673 in_lcb = 1;
1674 }
1675 } else {
1676 if (in_lcb) {
1677 release_lcb_access(dd, 1);
1678 in_lcb = 0;
1679 }
1680 }
1681 writeq(data, base + total);
1682 }
1683 if (in_lcb)
1684 release_lcb_access(dd, 1);
1685 *f_pos += total;
1686 return total;
1687}
1688
1689static const struct file_operations ui_file_ops = {
1690 .owner = THIS_MODULE,
1691 .llseek = ui_lseek,
1692 .read = ui_read,
1693 .write = ui_write,
1694 .open = ui_open,
1695 .release = ui_release,
1696};
1697
1698#define UI_OFFSET 192 /* device minor offset for UI devices */
1699static int create_ui = 1;
1700
1701static struct cdev wildcard_cdev;
1702static struct device *wildcard_device;
1703
1704static atomic_t user_count = ATOMIC_INIT(0);
1705
1706static void user_remove(struct hfi1_devdata *dd) 1462static void user_remove(struct hfi1_devdata *dd)
1707{ 1463{
1708 if (atomic_dec_return(&user_count) == 0)
1709 hfi1_cdev_cleanup(&wildcard_cdev, &wildcard_device);
1710 1464
1711 hfi1_cdev_cleanup(&dd->user_cdev, &dd->user_device); 1465 hfi1_cdev_cleanup(&dd->user_cdev, &dd->user_device);
1712 hfi1_cdev_cleanup(&dd->ui_cdev, &dd->ui_device);
1713} 1466}
1714 1467
1715static int user_add(struct hfi1_devdata *dd) 1468static int user_add(struct hfi1_devdata *dd)
@@ -1717,34 +1470,13 @@ static int user_add(struct hfi1_devdata *dd)
1717 char name[10]; 1470 char name[10];
1718 int ret; 1471 int ret;
1719 1472
1720 if (atomic_inc_return(&user_count) == 1) {
1721 ret = hfi1_cdev_init(0, class_name(), &hfi1_file_ops,
1722 &wildcard_cdev, &wildcard_device,
1723 true);
1724 if (ret)
1725 goto done;
1726 }
1727
1728 snprintf(name, sizeof(name), "%s_%d", class_name(), dd->unit); 1473 snprintf(name, sizeof(name), "%s_%d", class_name(), dd->unit);
1729 ret = hfi1_cdev_init(dd->unit + 1, name, &hfi1_file_ops, 1474 ret = hfi1_cdev_init(dd->unit, name, &hfi1_file_ops,
1730 &dd->user_cdev, &dd->user_device, 1475 &dd->user_cdev, &dd->user_device,
1731 true); 1476 true, &dd->kobj);
1732 if (ret) 1477 if (ret)
1733 goto done; 1478 user_remove(dd);
1734 1479
1735 if (create_ui) {
1736 snprintf(name, sizeof(name),
1737 "%s_ui%d", class_name(), dd->unit);
1738 ret = hfi1_cdev_init(dd->unit + UI_OFFSET, name, &ui_file_ops,
1739 &dd->ui_cdev, &dd->ui_device,
1740 false);
1741 if (ret)
1742 goto done;
1743 }
1744
1745 return 0;
1746done:
1747 user_remove(dd);
1748 return ret; 1480 return ret;
1749} 1481}
1750 1482
@@ -1753,13 +1485,7 @@ done:
1753 */ 1485 */
1754int hfi1_device_create(struct hfi1_devdata *dd) 1486int hfi1_device_create(struct hfi1_devdata *dd)
1755{ 1487{
1756 int r, ret; 1488 return user_add(dd);
1757
1758 r = user_add(dd);
1759 ret = hfi1_diag_add(dd);
1760 if (r && !ret)
1761 ret = r;
1762 return ret;
1763} 1489}
1764 1490
1765/* 1491/*
@@ -1769,5 +1495,4 @@ int hfi1_device_create(struct hfi1_devdata *dd)
1769void hfi1_device_remove(struct hfi1_devdata *dd) 1495void hfi1_device_remove(struct hfi1_devdata *dd)
1770{ 1496{
1771 user_remove(dd); 1497 user_remove(dd);
1772 hfi1_diag_remove(dd);
1773} 1498}
diff --git a/drivers/staging/rdma/hfi1/firmware.c b/drivers/infiniband/hw/hfi1/firmware.c
index ed680fda611d..ed680fda611d 100644
--- a/drivers/staging/rdma/hfi1/firmware.c
+++ b/drivers/infiniband/hw/hfi1/firmware.c
diff --git a/drivers/staging/rdma/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h
index 7b78d56de7f5..4417a0fd3ef9 100644
--- a/drivers/staging/rdma/hfi1/hfi.h
+++ b/drivers/infiniband/hw/hfi1/hfi.h
@@ -453,6 +453,7 @@ struct rvt_sge_state;
453#define HLS_LINK_COOLDOWN BIT(__HLS_LINK_COOLDOWN_BP) 453#define HLS_LINK_COOLDOWN BIT(__HLS_LINK_COOLDOWN_BP)
454 454
455#define HLS_UP (HLS_UP_INIT | HLS_UP_ARMED | HLS_UP_ACTIVE) 455#define HLS_UP (HLS_UP_INIT | HLS_UP_ARMED | HLS_UP_ACTIVE)
456#define HLS_DOWN ~(HLS_UP)
456 457
457/* use this MTU size if none other is given */ 458/* use this MTU size if none other is given */
458#define HFI1_DEFAULT_ACTIVE_MTU 10240 459#define HFI1_DEFAULT_ACTIVE_MTU 10240
@@ -1168,6 +1169,7 @@ struct hfi1_devdata {
1168 atomic_t aspm_disabled_cnt; 1169 atomic_t aspm_disabled_cnt;
1169 1170
1170 struct hfi1_affinity *affinity; 1171 struct hfi1_affinity *affinity;
1172 struct kobject kobj;
1171}; 1173};
1172 1174
1173/* 8051 firmware version helper */ 1175/* 8051 firmware version helper */
@@ -1882,9 +1884,8 @@ static inline u64 hfi1_pkt_base_sdma_integrity(struct hfi1_devdata *dd)
1882 get_unit_name((dd)->unit), ##__VA_ARGS__) 1884 get_unit_name((dd)->unit), ##__VA_ARGS__)
1883 1885
1884#define hfi1_dev_porterr(dd, port, fmt, ...) \ 1886#define hfi1_dev_porterr(dd, port, fmt, ...) \
1885 dev_err(&(dd)->pcidev->dev, "%s: IB%u:%u " fmt, \ 1887 dev_err(&(dd)->pcidev->dev, "%s: port %u: " fmt, \
1886 get_unit_name((dd)->unit), (dd)->unit, (port), \ 1888 get_unit_name((dd)->unit), (port), ##__VA_ARGS__)
1887 ##__VA_ARGS__)
1888 1889
1889/* 1890/*
1890 * this is used for formatting hw error messages... 1891 * this is used for formatting hw error messages...
diff --git a/drivers/staging/rdma/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c
index 502b7cf4647d..5cc492e5776d 100644
--- a/drivers/staging/rdma/hfi1/init.c
+++ b/drivers/infiniband/hw/hfi1/init.c
@@ -732,12 +732,12 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit)
732 lastfail = hfi1_create_rcvhdrq(dd, rcd); 732 lastfail = hfi1_create_rcvhdrq(dd, rcd);
733 if (!lastfail) 733 if (!lastfail)
734 lastfail = hfi1_setup_eagerbufs(rcd); 734 lastfail = hfi1_setup_eagerbufs(rcd);
735 if (lastfail) 735 if (lastfail) {
736 dd_dev_err(dd, 736 dd_dev_err(dd,
737 "failed to allocate kernel ctxt's rcvhdrq and/or egr bufs\n"); 737 "failed to allocate kernel ctxt's rcvhdrq and/or egr bufs\n");
738 ret = lastfail;
739 }
738 } 740 }
739 if (lastfail)
740 ret = lastfail;
741 741
742 /* Allocate enough memory for user event notification. */ 742 /* Allocate enough memory for user event notification. */
743 len = PAGE_ALIGN(dd->chip_rcv_contexts * HFI1_MAX_SHARED_CTXTS * 743 len = PAGE_ALIGN(dd->chip_rcv_contexts * HFI1_MAX_SHARED_CTXTS *
@@ -989,8 +989,10 @@ static void release_asic_data(struct hfi1_devdata *dd)
989 dd->asic_data = NULL; 989 dd->asic_data = NULL;
990} 990}
991 991
992void hfi1_free_devdata(struct hfi1_devdata *dd) 992static void __hfi1_free_devdata(struct kobject *kobj)
993{ 993{
994 struct hfi1_devdata *dd =
995 container_of(kobj, struct hfi1_devdata, kobj);
994 unsigned long flags; 996 unsigned long flags;
995 997
996 spin_lock_irqsave(&hfi1_devs_lock, flags); 998 spin_lock_irqsave(&hfi1_devs_lock, flags);
@@ -1007,6 +1009,15 @@ void hfi1_free_devdata(struct hfi1_devdata *dd)
1007 rvt_dealloc_device(&dd->verbs_dev.rdi); 1009 rvt_dealloc_device(&dd->verbs_dev.rdi);
1008} 1010}
1009 1011
1012static struct kobj_type hfi1_devdata_type = {
1013 .release = __hfi1_free_devdata,
1014};
1015
1016void hfi1_free_devdata(struct hfi1_devdata *dd)
1017{
1018 kobject_put(&dd->kobj);
1019}
1020
1010/* 1021/*
1011 * Allocate our primary per-unit data structure. Must be done via verbs 1022 * Allocate our primary per-unit data structure. Must be done via verbs
1012 * allocator, because the verbs cleanup process both does cleanup and 1023 * allocator, because the verbs cleanup process both does cleanup and
@@ -1102,6 +1113,7 @@ struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra)
1102 &pdev->dev, 1113 &pdev->dev,
1103 "Could not alloc cpulist info, cpu affinity might be wrong\n"); 1114 "Could not alloc cpulist info, cpu affinity might be wrong\n");
1104 } 1115 }
1116 kobject_init(&dd->kobj, &hfi1_devdata_type);
1105 return dd; 1117 return dd;
1106 1118
1107bail: 1119bail:
@@ -1300,7 +1312,7 @@ static void cleanup_device_data(struct hfi1_devdata *dd)
1300 1312
1301 spin_lock(&ppd->cc_state_lock); 1313 spin_lock(&ppd->cc_state_lock);
1302 cc_state = get_cc_state(ppd); 1314 cc_state = get_cc_state(ppd);
1303 rcu_assign_pointer(ppd->cc_state, NULL); 1315 RCU_INIT_POINTER(ppd->cc_state, NULL);
1304 spin_unlock(&ppd->cc_state_lock); 1316 spin_unlock(&ppd->cc_state_lock);
1305 1317
1306 if (cc_state) 1318 if (cc_state)
diff --git a/drivers/staging/rdma/hfi1/intr.c b/drivers/infiniband/hw/hfi1/intr.c
index 65348d16ab2f..65348d16ab2f 100644
--- a/drivers/staging/rdma/hfi1/intr.c
+++ b/drivers/infiniband/hw/hfi1/intr.c
diff --git a/drivers/staging/rdma/hfi1/iowait.h b/drivers/infiniband/hw/hfi1/iowait.h
index 2ec6ef38d389..2ec6ef38d389 100644
--- a/drivers/staging/rdma/hfi1/iowait.h
+++ b/drivers/infiniband/hw/hfi1/iowait.h
diff --git a/drivers/staging/rdma/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c
index ed58cf21e790..219029576ba0 100644
--- a/drivers/staging/rdma/hfi1/mad.c
+++ b/drivers/infiniband/hw/hfi1/mad.c
@@ -1403,6 +1403,12 @@ static int set_pkeys(struct hfi1_devdata *dd, u8 port, u16 *pkeys)
1403 if (key == okey) 1403 if (key == okey)
1404 continue; 1404 continue;
1405 /* 1405 /*
1406 * Don't update pkeys[2], if an HFI port without MgmtAllowed
1407 * by neighbor is a switch.
1408 */
1409 if (i == 2 && !ppd->mgmt_allowed && ppd->neighbor_type == 1)
1410 continue;
1411 /*
1406 * The SM gives us the complete PKey table. We have 1412 * The SM gives us the complete PKey table. We have
1407 * to ensure that we put the PKeys in the matching 1413 * to ensure that we put the PKeys in the matching
1408 * slots. 1414 * slots.
@@ -3363,6 +3369,50 @@ static int __subn_get_opa_cong_setting(struct opa_smp *smp, u32 am,
3363 return reply((struct ib_mad_hdr *)smp); 3369 return reply((struct ib_mad_hdr *)smp);
3364} 3370}
3365 3371
3372/*
3373 * Apply congestion control information stored in the ppd to the
3374 * active structure.
3375 */
3376static void apply_cc_state(struct hfi1_pportdata *ppd)
3377{
3378 struct cc_state *old_cc_state, *new_cc_state;
3379
3380 new_cc_state = kzalloc(sizeof(*new_cc_state), GFP_KERNEL);
3381 if (!new_cc_state)
3382 return;
3383
3384 /*
3385 * Hold the lock for updating *and* to prevent ppd information
3386 * from changing during the update.
3387 */
3388 spin_lock(&ppd->cc_state_lock);
3389
3390 old_cc_state = get_cc_state(ppd);
3391 if (!old_cc_state) {
3392 /* never active, or shutting down */
3393 spin_unlock(&ppd->cc_state_lock);
3394 kfree(new_cc_state);
3395 return;
3396 }
3397
3398 *new_cc_state = *old_cc_state;
3399
3400 new_cc_state->cct.ccti_limit = ppd->total_cct_entry - 1;
3401 memcpy(new_cc_state->cct.entries, ppd->ccti_entries,
3402 ppd->total_cct_entry * sizeof(struct ib_cc_table_entry));
3403
3404 new_cc_state->cong_setting.port_control = IB_CC_CCS_PC_SL_BASED;
3405 new_cc_state->cong_setting.control_map = ppd->cc_sl_control_map;
3406 memcpy(new_cc_state->cong_setting.entries, ppd->congestion_entries,
3407 OPA_MAX_SLS * sizeof(struct opa_congestion_setting_entry));
3408
3409 rcu_assign_pointer(ppd->cc_state, new_cc_state);
3410
3411 spin_unlock(&ppd->cc_state_lock);
3412
3413 call_rcu(&old_cc_state->rcu, cc_state_reclaim);
3414}
3415
3366static int __subn_set_opa_cong_setting(struct opa_smp *smp, u32 am, u8 *data, 3416static int __subn_set_opa_cong_setting(struct opa_smp *smp, u32 am, u8 *data,
3367 struct ib_device *ibdev, u8 port, 3417 struct ib_device *ibdev, u8 port,
3368 u32 *resp_len) 3418 u32 *resp_len)
@@ -3374,6 +3424,11 @@ static int __subn_set_opa_cong_setting(struct opa_smp *smp, u32 am, u8 *data,
3374 struct opa_congestion_setting_entry_shadow *entries; 3424 struct opa_congestion_setting_entry_shadow *entries;
3375 int i; 3425 int i;
3376 3426
3427 /*
3428 * Save details from packet into the ppd. Hold the cc_state_lock so
3429 * our information is consistent with anyone trying to apply the state.
3430 */
3431 spin_lock(&ppd->cc_state_lock);
3377 ppd->cc_sl_control_map = be32_to_cpu(p->control_map); 3432 ppd->cc_sl_control_map = be32_to_cpu(p->control_map);
3378 3433
3379 entries = ppd->congestion_entries; 3434 entries = ppd->congestion_entries;
@@ -3384,6 +3439,10 @@ static int __subn_set_opa_cong_setting(struct opa_smp *smp, u32 am, u8 *data,
3384 p->entries[i].trigger_threshold; 3439 p->entries[i].trigger_threshold;
3385 entries[i].ccti_min = p->entries[i].ccti_min; 3440 entries[i].ccti_min = p->entries[i].ccti_min;
3386 } 3441 }
3442 spin_unlock(&ppd->cc_state_lock);
3443
3444 /* now apply the information */
3445 apply_cc_state(ppd);
3387 3446
3388 return __subn_get_opa_cong_setting(smp, am, data, ibdev, port, 3447 return __subn_get_opa_cong_setting(smp, am, data, ibdev, port,
3389 resp_len); 3448 resp_len);
@@ -3526,7 +3585,6 @@ static int __subn_set_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data,
3526 int i, j; 3585 int i, j;
3527 u32 sentry, eentry; 3586 u32 sentry, eentry;
3528 u16 ccti_limit; 3587 u16 ccti_limit;
3529 struct cc_state *old_cc_state, *new_cc_state;
3530 3588
3531 /* sanity check n_blocks, start_block */ 3589 /* sanity check n_blocks, start_block */
3532 if (n_blocks == 0 || 3590 if (n_blocks == 0 ||
@@ -3546,45 +3604,20 @@ static int __subn_set_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data,
3546 return reply((struct ib_mad_hdr *)smp); 3604 return reply((struct ib_mad_hdr *)smp);
3547 } 3605 }
3548 3606
3549 new_cc_state = kzalloc(sizeof(*new_cc_state), GFP_KERNEL); 3607 /*
3550 if (!new_cc_state) 3608 * Save details from packet into the ppd. Hold the cc_state_lock so
3551 goto getit; 3609 * our information is consistent with anyone trying to apply the state.
3552 3610 */
3553 spin_lock(&ppd->cc_state_lock); 3611 spin_lock(&ppd->cc_state_lock);
3554
3555 old_cc_state = get_cc_state(ppd);
3556
3557 if (!old_cc_state) {
3558 spin_unlock(&ppd->cc_state_lock);
3559 kfree(new_cc_state);
3560 return reply((struct ib_mad_hdr *)smp);
3561 }
3562
3563 *new_cc_state = *old_cc_state;
3564
3565 new_cc_state->cct.ccti_limit = ccti_limit;
3566
3567 entries = ppd->ccti_entries;
3568 ppd->total_cct_entry = ccti_limit + 1; 3612 ppd->total_cct_entry = ccti_limit + 1;
3569 3613 entries = ppd->ccti_entries;
3570 for (j = 0, i = sentry; i < eentry; j++, i++) 3614 for (j = 0, i = sentry; i < eentry; j++, i++)
3571 entries[i].entry = be16_to_cpu(p->ccti_entries[j].entry); 3615 entries[i].entry = be16_to_cpu(p->ccti_entries[j].entry);
3572
3573 memcpy(new_cc_state->cct.entries, entries,
3574 eentry * sizeof(struct ib_cc_table_entry));
3575
3576 new_cc_state->cong_setting.port_control = IB_CC_CCS_PC_SL_BASED;
3577 new_cc_state->cong_setting.control_map = ppd->cc_sl_control_map;
3578 memcpy(new_cc_state->cong_setting.entries, ppd->congestion_entries,
3579 OPA_MAX_SLS * sizeof(struct opa_congestion_setting_entry));
3580
3581 rcu_assign_pointer(ppd->cc_state, new_cc_state);
3582
3583 spin_unlock(&ppd->cc_state_lock); 3616 spin_unlock(&ppd->cc_state_lock);
3584 3617
3585 call_rcu(&old_cc_state->rcu, cc_state_reclaim); 3618 /* now apply the information */
3619 apply_cc_state(ppd);
3586 3620
3587getit:
3588 return __subn_get_opa_cc_table(smp, am, data, ibdev, port, resp_len); 3621 return __subn_get_opa_cc_table(smp, am, data, ibdev, port, resp_len);
3589} 3622}
3590 3623
diff --git a/drivers/staging/rdma/hfi1/mad.h b/drivers/infiniband/hw/hfi1/mad.h
index 55ee08675333..55ee08675333 100644
--- a/drivers/staging/rdma/hfi1/mad.h
+++ b/drivers/infiniband/hw/hfi1/mad.h
diff --git a/drivers/staging/rdma/hfi1/mmu_rb.c b/drivers/infiniband/hw/hfi1/mmu_rb.c
index 2b0e91d3093d..b7a80aa1ae30 100644
--- a/drivers/staging/rdma/hfi1/mmu_rb.c
+++ b/drivers/infiniband/hw/hfi1/mmu_rb.c
@@ -45,6 +45,7 @@
45 * 45 *
46 */ 46 */
47#include <linux/list.h> 47#include <linux/list.h>
48#include <linux/rculist.h>
48#include <linux/mmu_notifier.h> 49#include <linux/mmu_notifier.h>
49#include <linux/interval_tree_generic.h> 50#include <linux/interval_tree_generic.h>
50 51
@@ -97,7 +98,6 @@ static unsigned long mmu_node_last(struct mmu_rb_node *node)
97int hfi1_mmu_rb_register(struct rb_root *root, struct mmu_rb_ops *ops) 98int hfi1_mmu_rb_register(struct rb_root *root, struct mmu_rb_ops *ops)
98{ 99{
99 struct mmu_rb_handler *handlr; 100 struct mmu_rb_handler *handlr;
100 unsigned long flags;
101 101
102 if (!ops->invalidate) 102 if (!ops->invalidate)
103 return -EINVAL; 103 return -EINVAL;
@@ -111,9 +111,9 @@ int hfi1_mmu_rb_register(struct rb_root *root, struct mmu_rb_ops *ops)
111 INIT_HLIST_NODE(&handlr->mn.hlist); 111 INIT_HLIST_NODE(&handlr->mn.hlist);
112 spin_lock_init(&handlr->lock); 112 spin_lock_init(&handlr->lock);
113 handlr->mn.ops = &mn_opts; 113 handlr->mn.ops = &mn_opts;
114 spin_lock_irqsave(&mmu_rb_lock, flags); 114 spin_lock(&mmu_rb_lock);
115 list_add_tail(&handlr->list, &mmu_rb_handlers); 115 list_add_tail_rcu(&handlr->list, &mmu_rb_handlers);
116 spin_unlock_irqrestore(&mmu_rb_lock, flags); 116 spin_unlock(&mmu_rb_lock);
117 117
118 return mmu_notifier_register(&handlr->mn, current->mm); 118 return mmu_notifier_register(&handlr->mn, current->mm);
119} 119}
@@ -130,9 +130,10 @@ void hfi1_mmu_rb_unregister(struct rb_root *root)
130 if (current->mm) 130 if (current->mm)
131 mmu_notifier_unregister(&handler->mn, current->mm); 131 mmu_notifier_unregister(&handler->mn, current->mm);
132 132
133 spin_lock_irqsave(&mmu_rb_lock, flags); 133 spin_lock(&mmu_rb_lock);
134 list_del(&handler->list); 134 list_del_rcu(&handler->list);
135 spin_unlock_irqrestore(&mmu_rb_lock, flags); 135 spin_unlock(&mmu_rb_lock);
136 synchronize_rcu();
136 137
137 spin_lock_irqsave(&handler->lock, flags); 138 spin_lock_irqsave(&handler->lock, flags);
138 if (!RB_EMPTY_ROOT(root)) { 139 if (!RB_EMPTY_ROOT(root)) {
@@ -271,16 +272,15 @@ void hfi1_mmu_rb_remove(struct rb_root *root, struct mmu_rb_node *node)
271static struct mmu_rb_handler *find_mmu_handler(struct rb_root *root) 272static struct mmu_rb_handler *find_mmu_handler(struct rb_root *root)
272{ 273{
273 struct mmu_rb_handler *handler; 274 struct mmu_rb_handler *handler;
274 unsigned long flags;
275 275
276 spin_lock_irqsave(&mmu_rb_lock, flags); 276 rcu_read_lock();
277 list_for_each_entry(handler, &mmu_rb_handlers, list) { 277 list_for_each_entry_rcu(handler, &mmu_rb_handlers, list) {
278 if (handler->root == root) 278 if (handler->root == root)
279 goto unlock; 279 goto unlock;
280 } 280 }
281 handler = NULL; 281 handler = NULL;
282unlock: 282unlock:
283 spin_unlock_irqrestore(&mmu_rb_lock, flags); 283 rcu_read_unlock();
284 return handler; 284 return handler;
285} 285}
286 286
diff --git a/drivers/staging/rdma/hfi1/mmu_rb.h b/drivers/infiniband/hw/hfi1/mmu_rb.h
index 7a57b9c49d27..7a57b9c49d27 100644
--- a/drivers/staging/rdma/hfi1/mmu_rb.h
+++ b/drivers/infiniband/hw/hfi1/mmu_rb.h
diff --git a/drivers/staging/rdma/hfi1/opa_compat.h b/drivers/infiniband/hw/hfi1/opa_compat.h
index 6ef3c1cbdcd7..6ef3c1cbdcd7 100644
--- a/drivers/staging/rdma/hfi1/opa_compat.h
+++ b/drivers/infiniband/hw/hfi1/opa_compat.h
diff --git a/drivers/staging/rdma/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c
index 0bac21e6a658..0bac21e6a658 100644
--- a/drivers/staging/rdma/hfi1/pcie.c
+++ b/drivers/infiniband/hw/hfi1/pcie.c
diff --git a/drivers/staging/rdma/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c
index c67b9ad3fcf4..d5edb1afbb8f 100644
--- a/drivers/staging/rdma/hfi1/pio.c
+++ b/drivers/infiniband/hw/hfi1/pio.c
@@ -1835,8 +1835,7 @@ int pio_map_init(struct hfi1_devdata *dd, u8 port, u8 num_vls, u8 *vl_scontexts)
1835 struct pio_vl_map *oldmap, *newmap; 1835 struct pio_vl_map *oldmap, *newmap;
1836 1836
1837 if (!vl_scontexts) { 1837 if (!vl_scontexts) {
1838 /* send context 0 reserved for VL15 */ 1838 for (i = 0; i < dd->num_send_contexts; i++)
1839 for (i = 1; i < dd->num_send_contexts; i++)
1840 if (dd->send_contexts[i].type == SC_KERNEL) 1839 if (dd->send_contexts[i].type == SC_KERNEL)
1841 num_kernel_send_contexts++; 1840 num_kernel_send_contexts++;
1842 /* truncate divide */ 1841 /* truncate divide */
diff --git a/drivers/staging/rdma/hfi1/pio.h b/drivers/infiniband/hw/hfi1/pio.h
index 53a08edb7f64..464cbd27b975 100644
--- a/drivers/staging/rdma/hfi1/pio.h
+++ b/drivers/infiniband/hw/hfi1/pio.h
@@ -49,10 +49,10 @@
49 49
50/* send context types */ 50/* send context types */
51#define SC_KERNEL 0 51#define SC_KERNEL 0
52#define SC_ACK 1 52#define SC_VL15 1
53#define SC_USER 2 53#define SC_ACK 2
54#define SC_VL15 3 54#define SC_USER 3 /* must be the last one: it may take all left */
55#define SC_MAX 4 55#define SC_MAX 4 /* count of send context types */
56 56
57/* invalid send context index */ 57/* invalid send context index */
58#define INVALID_SCI 0xff 58#define INVALID_SCI 0xff
diff --git a/drivers/staging/rdma/hfi1/pio_copy.c b/drivers/infiniband/hw/hfi1/pio_copy.c
index 8c25e1b58849..8c25e1b58849 100644
--- a/drivers/staging/rdma/hfi1/pio_copy.c
+++ b/drivers/infiniband/hw/hfi1/pio_copy.c
diff --git a/drivers/staging/rdma/hfi1/platform.c b/drivers/infiniband/hw/hfi1/platform.c
index 8fe8a205b5bb..03df9322f862 100644
--- a/drivers/staging/rdma/hfi1/platform.c
+++ b/drivers/infiniband/hw/hfi1/platform.c
@@ -87,6 +87,17 @@ void free_platform_config(struct hfi1_devdata *dd)
87 */ 87 */
88} 88}
89 89
90void get_port_type(struct hfi1_pportdata *ppd)
91{
92 int ret;
93
94 ret = get_platform_config_field(ppd->dd, PLATFORM_CONFIG_PORT_TABLE, 0,
95 PORT_TABLE_PORT_TYPE, &ppd->port_type,
96 4);
97 if (ret)
98 ppd->port_type = PORT_TYPE_UNKNOWN;
99}
100
90int set_qsfp_tx(struct hfi1_pportdata *ppd, int on) 101int set_qsfp_tx(struct hfi1_pportdata *ppd, int on)
91{ 102{
92 u8 tx_ctrl_byte = on ? 0x0 : 0xF; 103 u8 tx_ctrl_byte = on ? 0x0 : 0xF;
@@ -529,7 +540,8 @@ static void apply_tunings(
529 /* Enable external device config if channel is limiting active */ 540 /* Enable external device config if channel is limiting active */
530 read_8051_config(ppd->dd, LINK_OPTIMIZATION_SETTINGS, 541 read_8051_config(ppd->dd, LINK_OPTIMIZATION_SETTINGS,
531 GENERAL_CONFIG, &config_data); 542 GENERAL_CONFIG, &config_data);
532 config_data |= limiting_active; 543 config_data &= ~(0xff << ENABLE_EXT_DEV_CONFIG_SHIFT);
544 config_data |= ((u32)limiting_active << ENABLE_EXT_DEV_CONFIG_SHIFT);
533 ret = load_8051_config(ppd->dd, LINK_OPTIMIZATION_SETTINGS, 545 ret = load_8051_config(ppd->dd, LINK_OPTIMIZATION_SETTINGS,
534 GENERAL_CONFIG, config_data); 546 GENERAL_CONFIG, config_data);
535 if (ret != HCMD_SUCCESS) 547 if (ret != HCMD_SUCCESS)
@@ -542,7 +554,8 @@ static void apply_tunings(
542 /* Pass tuning method to 8051 */ 554 /* Pass tuning method to 8051 */
543 read_8051_config(ppd->dd, LINK_TUNING_PARAMETERS, GENERAL_CONFIG, 555 read_8051_config(ppd->dd, LINK_TUNING_PARAMETERS, GENERAL_CONFIG,
544 &config_data); 556 &config_data);
545 config_data |= tuning_method; 557 config_data &= ~(0xff << TUNING_METHOD_SHIFT);
558 config_data |= ((u32)tuning_method << TUNING_METHOD_SHIFT);
546 ret = load_8051_config(ppd->dd, LINK_TUNING_PARAMETERS, GENERAL_CONFIG, 559 ret = load_8051_config(ppd->dd, LINK_TUNING_PARAMETERS, GENERAL_CONFIG,
547 config_data); 560 config_data);
548 if (ret != HCMD_SUCCESS) 561 if (ret != HCMD_SUCCESS)
@@ -564,8 +577,8 @@ static void apply_tunings(
564 ret = read_8051_config(ppd->dd, DC_HOST_COMM_SETTINGS, 577 ret = read_8051_config(ppd->dd, DC_HOST_COMM_SETTINGS,
565 GENERAL_CONFIG, &config_data); 578 GENERAL_CONFIG, &config_data);
566 /* Clear, then set the external device config field */ 579 /* Clear, then set the external device config field */
567 config_data &= ~(0xFF << 24); 580 config_data &= ~(u32)0xFF;
568 config_data |= (external_device_config << 24); 581 config_data |= external_device_config;
569 ret = load_8051_config(ppd->dd, DC_HOST_COMM_SETTINGS, 582 ret = load_8051_config(ppd->dd, DC_HOST_COMM_SETTINGS,
570 GENERAL_CONFIG, config_data); 583 GENERAL_CONFIG, config_data);
571 if (ret != HCMD_SUCCESS) 584 if (ret != HCMD_SUCCESS)
@@ -784,12 +797,6 @@ void tune_serdes(struct hfi1_pportdata *ppd)
784 return; 797 return;
785 } 798 }
786 799
787 ret = get_platform_config_field(ppd->dd, PLATFORM_CONFIG_PORT_TABLE, 0,
788 PORT_TABLE_PORT_TYPE, &ppd->port_type,
789 4);
790 if (ret)
791 ppd->port_type = PORT_TYPE_UNKNOWN;
792
793 switch (ppd->port_type) { 800 switch (ppd->port_type) {
794 case PORT_TYPE_DISCONNECTED: 801 case PORT_TYPE_DISCONNECTED:
795 ppd->offline_disabled_reason = 802 ppd->offline_disabled_reason =
diff --git a/drivers/staging/rdma/hfi1/platform.h b/drivers/infiniband/hw/hfi1/platform.h
index 19620cf546d5..e2c21613c326 100644
--- a/drivers/staging/rdma/hfi1/platform.h
+++ b/drivers/infiniband/hw/hfi1/platform.h
@@ -298,6 +298,7 @@ enum link_tuning_encoding {
298/* platform.c */ 298/* platform.c */
299void get_platform_config(struct hfi1_devdata *dd); 299void get_platform_config(struct hfi1_devdata *dd);
300void free_platform_config(struct hfi1_devdata *dd); 300void free_platform_config(struct hfi1_devdata *dd);
301void get_port_type(struct hfi1_pportdata *ppd);
301int set_qsfp_tx(struct hfi1_pportdata *ppd, int on); 302int set_qsfp_tx(struct hfi1_pportdata *ppd, int on);
302void tune_serdes(struct hfi1_pportdata *ppd); 303void tune_serdes(struct hfi1_pportdata *ppd);
303 304
diff --git a/drivers/staging/rdma/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c
index 91eb42316df9..1a942ffba4cb 100644
--- a/drivers/staging/rdma/hfi1/qp.c
+++ b/drivers/infiniband/hw/hfi1/qp.c
@@ -49,7 +49,6 @@
49#include <linux/vmalloc.h> 49#include <linux/vmalloc.h>
50#include <linux/hash.h> 50#include <linux/hash.h>
51#include <linux/module.h> 51#include <linux/module.h>
52#include <linux/random.h>
53#include <linux/seq_file.h> 52#include <linux/seq_file.h>
54#include <rdma/rdma_vt.h> 53#include <rdma/rdma_vt.h>
55#include <rdma/rdmavt_qp.h> 54#include <rdma/rdmavt_qp.h>
@@ -161,9 +160,6 @@ static inline int opa_mtu_enum_to_int(int mtu)
161 * This function is what we would push to the core layer if we wanted to be a 160 * This function is what we would push to the core layer if we wanted to be a
162 * "first class citizen". Instead we hide this here and rely on Verbs ULPs 161 * "first class citizen". Instead we hide this here and rely on Verbs ULPs
163 * to blindly pass the MTU enum value from the PathRecord to us. 162 * to blindly pass the MTU enum value from the PathRecord to us.
164 *
165 * The actual flag used to determine "8k MTU" will change and is currently
166 * unknown.
167 */ 163 */
168static inline int verbs_mtu_enum_to_int(struct ib_device *dev, enum ib_mtu mtu) 164static inline int verbs_mtu_enum_to_int(struct ib_device *dev, enum ib_mtu mtu)
169{ 165{
@@ -516,6 +512,7 @@ static void iowait_wakeup(struct iowait *wait, int reason)
516static void iowait_sdma_drained(struct iowait *wait) 512static void iowait_sdma_drained(struct iowait *wait)
517{ 513{
518 struct rvt_qp *qp = iowait_to_qp(wait); 514 struct rvt_qp *qp = iowait_to_qp(wait);
515 unsigned long flags;
519 516
520 /* 517 /*
521 * This happens when the send engine notes 518 * This happens when the send engine notes
@@ -523,12 +520,12 @@ static void iowait_sdma_drained(struct iowait *wait)
523 * do the flush work until that QP's 520 * do the flush work until that QP's
524 * sdma work has finished. 521 * sdma work has finished.
525 */ 522 */
526 spin_lock(&qp->s_lock); 523 spin_lock_irqsave(&qp->s_lock, flags);
527 if (qp->s_flags & RVT_S_WAIT_DMA) { 524 if (qp->s_flags & RVT_S_WAIT_DMA) {
528 qp->s_flags &= ~RVT_S_WAIT_DMA; 525 qp->s_flags &= ~RVT_S_WAIT_DMA;
529 hfi1_schedule_send(qp); 526 hfi1_schedule_send(qp);
530 } 527 }
531 spin_unlock(&qp->s_lock); 528 spin_unlock_irqrestore(&qp->s_lock, flags);
532} 529}
533 530
534/** 531/**
diff --git a/drivers/staging/rdma/hfi1/qp.h b/drivers/infiniband/hw/hfi1/qp.h
index e7bc8d6cf681..e7bc8d6cf681 100644
--- a/drivers/staging/rdma/hfi1/qp.h
+++ b/drivers/infiniband/hw/hfi1/qp.h
diff --git a/drivers/staging/rdma/hfi1/qsfp.c b/drivers/infiniband/hw/hfi1/qsfp.c
index 2441669f0817..2441669f0817 100644
--- a/drivers/staging/rdma/hfi1/qsfp.c
+++ b/drivers/infiniband/hw/hfi1/qsfp.c
diff --git a/drivers/staging/rdma/hfi1/qsfp.h b/drivers/infiniband/hw/hfi1/qsfp.h
index dadc66c442b9..dadc66c442b9 100644
--- a/drivers/staging/rdma/hfi1/qsfp.h
+++ b/drivers/infiniband/hw/hfi1/qsfp.h
diff --git a/drivers/staging/rdma/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c
index 792f15eb8efe..792f15eb8efe 100644
--- a/drivers/staging/rdma/hfi1/rc.c
+++ b/drivers/infiniband/hw/hfi1/rc.c
diff --git a/drivers/staging/rdma/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c
index a659aec3c3c6..a659aec3c3c6 100644
--- a/drivers/staging/rdma/hfi1/ruc.c
+++ b/drivers/infiniband/hw/hfi1/ruc.c
diff --git a/drivers/staging/rdma/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c
index abb8ebc1fcac..f9befc05b349 100644
--- a/drivers/staging/rdma/hfi1/sdma.c
+++ b/drivers/infiniband/hw/hfi1/sdma.c
@@ -134,6 +134,7 @@ static const char * const sdma_state_names[] = {
134 [sdma_state_s99_running] = "s99_Running", 134 [sdma_state_s99_running] = "s99_Running",
135}; 135};
136 136
137#ifdef CONFIG_SDMA_VERBOSITY
137static const char * const sdma_event_names[] = { 138static const char * const sdma_event_names[] = {
138 [sdma_event_e00_go_hw_down] = "e00_GoHwDown", 139 [sdma_event_e00_go_hw_down] = "e00_GoHwDown",
139 [sdma_event_e10_go_hw_start] = "e10_GoHwStart", 140 [sdma_event_e10_go_hw_start] = "e10_GoHwStart",
@@ -150,6 +151,7 @@ static const char * const sdma_event_names[] = {
150 [sdma_event_e85_link_down] = "e85_LinkDown", 151 [sdma_event_e85_link_down] = "e85_LinkDown",
151 [sdma_event_e90_sw_halted] = "e90_SwHalted", 152 [sdma_event_e90_sw_halted] = "e90_SwHalted",
152}; 153};
154#endif
153 155
154static const struct sdma_set_state_action sdma_action_table[] = { 156static const struct sdma_set_state_action sdma_action_table[] = {
155 [sdma_state_s00_hw_down] = { 157 [sdma_state_s00_hw_down] = {
@@ -376,7 +378,7 @@ static inline void complete_tx(struct sdma_engine *sde,
376 sdma_txclean(sde->dd, tx); 378 sdma_txclean(sde->dd, tx);
377 if (complete) 379 if (complete)
378 (*complete)(tx, res); 380 (*complete)(tx, res);
379 if (iowait_sdma_dec(wait) && wait) 381 if (wait && iowait_sdma_dec(wait))
380 iowait_drain_wakeup(wait); 382 iowait_drain_wakeup(wait);
381} 383}
382 384
diff --git a/drivers/staging/rdma/hfi1/sdma.h b/drivers/infiniband/hw/hfi1/sdma.h
index 8f50c99fe711..8f50c99fe711 100644
--- a/drivers/staging/rdma/hfi1/sdma.h
+++ b/drivers/infiniband/hw/hfi1/sdma.h
diff --git a/drivers/staging/rdma/hfi1/sdma_txreq.h b/drivers/infiniband/hw/hfi1/sdma_txreq.h
index bf7d777d756e..bf7d777d756e 100644
--- a/drivers/staging/rdma/hfi1/sdma_txreq.h
+++ b/drivers/infiniband/hw/hfi1/sdma_txreq.h
diff --git a/drivers/staging/rdma/hfi1/sysfs.c b/drivers/infiniband/hw/hfi1/sysfs.c
index 8cd6df8634ad..91fc2aed6aed 100644
--- a/drivers/staging/rdma/hfi1/sysfs.c
+++ b/drivers/infiniband/hw/hfi1/sysfs.c
@@ -721,8 +721,8 @@ int hfi1_create_port_files(struct ib_device *ibdev, u8 port_num,
721 } 721 }
722 722
723 dd_dev_info(dd, 723 dd_dev_info(dd,
724 "IB%u: Congestion Control Agent enabled for port %d\n", 724 "Congestion Control Agent enabled for port %d\n",
725 dd->unit, port_num); 725 port_num);
726 726
727 return 0; 727 return 0;
728 728
diff --git a/drivers/staging/rdma/hfi1/trace.c b/drivers/infiniband/hw/hfi1/trace.c
index 8b62fefcf903..79b2952c0dfb 100644
--- a/drivers/staging/rdma/hfi1/trace.c
+++ b/drivers/infiniband/hw/hfi1/trace.c
@@ -66,6 +66,7 @@ u8 ibhdr_exhdr_len(struct hfi1_ib_header *hdr)
66#define RETH_PRN "reth vaddr 0x%.16llx rkey 0x%.8x dlen 0x%.8x" 66#define RETH_PRN "reth vaddr 0x%.16llx rkey 0x%.8x dlen 0x%.8x"
67#define AETH_PRN "aeth syn 0x%.2x %s msn 0x%.8x" 67#define AETH_PRN "aeth syn 0x%.2x %s msn 0x%.8x"
68#define DETH_PRN "deth qkey 0x%.8x sqpn 0x%.6x" 68#define DETH_PRN "deth qkey 0x%.8x sqpn 0x%.6x"
69#define IETH_PRN "ieth rkey 0x%.8x"
69#define ATOMICACKETH_PRN "origdata %lld" 70#define ATOMICACKETH_PRN "origdata %lld"
70#define ATOMICETH_PRN "vaddr 0x%llx rkey 0x%.8x sdata %lld cdata %lld" 71#define ATOMICETH_PRN "vaddr 0x%llx rkey 0x%.8x sdata %lld cdata %lld"
71 72
@@ -166,6 +167,12 @@ const char *parse_everbs_hdrs(
166 be32_to_cpu(eh->ud.deth[0]), 167 be32_to_cpu(eh->ud.deth[0]),
167 be32_to_cpu(eh->ud.deth[1]) & RVT_QPN_MASK); 168 be32_to_cpu(eh->ud.deth[1]) & RVT_QPN_MASK);
168 break; 169 break;
170 /* ieth */
171 case OP(RC, SEND_LAST_WITH_INVALIDATE):
172 case OP(RC, SEND_ONLY_WITH_INVALIDATE):
173 trace_seq_printf(p, IETH_PRN,
174 be32_to_cpu(eh->ieth));
175 break;
169 } 176 }
170 trace_seq_putc(p, 0); 177 trace_seq_putc(p, 0);
171 return ret; 178 return ret;
@@ -233,3 +240,4 @@ __hfi1_trace_fn(FIRMWARE);
233__hfi1_trace_fn(RCVCTRL); 240__hfi1_trace_fn(RCVCTRL);
234__hfi1_trace_fn(TID); 241__hfi1_trace_fn(TID);
235__hfi1_trace_fn(MMU); 242__hfi1_trace_fn(MMU);
243__hfi1_trace_fn(IOCTL);
diff --git a/drivers/staging/rdma/hfi1/trace.h b/drivers/infiniband/hw/hfi1/trace.h
index 963dc948c38a..28c1d0832886 100644
--- a/drivers/staging/rdma/hfi1/trace.h
+++ b/drivers/infiniband/hw/hfi1/trace.h
@@ -74,8 +74,8 @@ __print_symbolic(etype, \
74 74
75TRACE_EVENT(hfi1_rcvhdr, 75TRACE_EVENT(hfi1_rcvhdr,
76 TP_PROTO(struct hfi1_devdata *dd, 76 TP_PROTO(struct hfi1_devdata *dd,
77 u64 eflags,
78 u32 ctxt, 77 u32 ctxt,
78 u64 eflags,
79 u32 etype, 79 u32 etype,
80 u32 hlen, 80 u32 hlen,
81 u32 tlen, 81 u32 tlen,
@@ -392,6 +392,8 @@ __print_symbolic(opcode, \
392 ib_opcode_name(RC_ATOMIC_ACKNOWLEDGE), \ 392 ib_opcode_name(RC_ATOMIC_ACKNOWLEDGE), \
393 ib_opcode_name(RC_COMPARE_SWAP), \ 393 ib_opcode_name(RC_COMPARE_SWAP), \
394 ib_opcode_name(RC_FETCH_ADD), \ 394 ib_opcode_name(RC_FETCH_ADD), \
395 ib_opcode_name(RC_SEND_LAST_WITH_INVALIDATE), \
396 ib_opcode_name(RC_SEND_ONLY_WITH_INVALIDATE), \
395 ib_opcode_name(UC_SEND_FIRST), \ 397 ib_opcode_name(UC_SEND_FIRST), \
396 ib_opcode_name(UC_SEND_MIDDLE), \ 398 ib_opcode_name(UC_SEND_MIDDLE), \
397 ib_opcode_name(UC_SEND_LAST), \ 399 ib_opcode_name(UC_SEND_LAST), \
@@ -1341,6 +1343,7 @@ __hfi1_trace_def(FIRMWARE);
1341__hfi1_trace_def(RCVCTRL); 1343__hfi1_trace_def(RCVCTRL);
1342__hfi1_trace_def(TID); 1344__hfi1_trace_def(TID);
1343__hfi1_trace_def(MMU); 1345__hfi1_trace_def(MMU);
1346__hfi1_trace_def(IOCTL);
1344 1347
1345#define hfi1_cdbg(which, fmt, ...) \ 1348#define hfi1_cdbg(which, fmt, ...) \
1346 __hfi1_trace_##which(__func__, fmt, ##__VA_ARGS__) 1349 __hfi1_trace_##which(__func__, fmt, ##__VA_ARGS__)
diff --git a/drivers/staging/rdma/hfi1/twsi.c b/drivers/infiniband/hw/hfi1/twsi.c
index e82e52a63d35..e82e52a63d35 100644
--- a/drivers/staging/rdma/hfi1/twsi.c
+++ b/drivers/infiniband/hw/hfi1/twsi.c
diff --git a/drivers/staging/rdma/hfi1/twsi.h b/drivers/infiniband/hw/hfi1/twsi.h
index 5b8a5b5e7eae..5b8a5b5e7eae 100644
--- a/drivers/staging/rdma/hfi1/twsi.h
+++ b/drivers/infiniband/hw/hfi1/twsi.h
diff --git a/drivers/staging/rdma/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c
index df773d433297..df773d433297 100644
--- a/drivers/staging/rdma/hfi1/uc.c
+++ b/drivers/infiniband/hw/hfi1/uc.c
diff --git a/drivers/staging/rdma/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c
index 1e503ad0bebb..1e503ad0bebb 100644
--- a/drivers/staging/rdma/hfi1/ud.c
+++ b/drivers/infiniband/hw/hfi1/ud.c
diff --git a/drivers/staging/rdma/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c
index 1b640a35b3fe..1b640a35b3fe 100644
--- a/drivers/staging/rdma/hfi1/user_exp_rcv.c
+++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c
diff --git a/drivers/staging/rdma/hfi1/user_exp_rcv.h b/drivers/infiniband/hw/hfi1/user_exp_rcv.h
index 9bc8d9fba87e..9bc8d9fba87e 100644
--- a/drivers/staging/rdma/hfi1/user_exp_rcv.h
+++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.h
diff --git a/drivers/staging/rdma/hfi1/user_pages.c b/drivers/infiniband/hw/hfi1/user_pages.c
index 88e10b5f55f1..88e10b5f55f1 100644
--- a/drivers/staging/rdma/hfi1/user_pages.c
+++ b/drivers/infiniband/hw/hfi1/user_pages.c
diff --git a/drivers/staging/rdma/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c
index 0014c9c0e967..29f4795f866c 100644
--- a/drivers/staging/rdma/hfi1/user_sdma.c
+++ b/drivers/infiniband/hw/hfi1/user_sdma.c
@@ -166,6 +166,8 @@ static unsigned initial_pkt_count = 8;
166 166
167#define SDMA_IOWAIT_TIMEOUT 1000 /* in milliseconds */ 167#define SDMA_IOWAIT_TIMEOUT 1000 /* in milliseconds */
168 168
169struct sdma_mmu_node;
170
169struct user_sdma_iovec { 171struct user_sdma_iovec {
170 struct list_head list; 172 struct list_head list;
171 struct iovec iov; 173 struct iovec iov;
@@ -178,6 +180,7 @@ struct user_sdma_iovec {
178 * which we last left off. 180 * which we last left off.
179 */ 181 */
180 u64 offset; 182 u64 offset;
183 struct sdma_mmu_node *node;
181}; 184};
182 185
183#define SDMA_CACHE_NODE_EVICT BIT(0) 186#define SDMA_CACHE_NODE_EVICT BIT(0)
@@ -507,6 +510,7 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
507 struct sdma_req_info info; 510 struct sdma_req_info info;
508 struct user_sdma_request *req; 511 struct user_sdma_request *req;
509 u8 opcode, sc, vl; 512 u8 opcode, sc, vl;
513 int req_queued = 0;
510 514
511 if (iovec[idx].iov_len < sizeof(info) + sizeof(req->hdr)) { 515 if (iovec[idx].iov_len < sizeof(info) + sizeof(req->hdr)) {
512 hfi1_cdbg( 516 hfi1_cdbg(
@@ -703,6 +707,7 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
703 707
704 set_comp_state(pq, cq, info.comp_idx, QUEUED, 0); 708 set_comp_state(pq, cq, info.comp_idx, QUEUED, 0);
705 atomic_inc(&pq->n_reqs); 709 atomic_inc(&pq->n_reqs);
710 req_queued = 1;
706 /* Send the first N packets in the request to buy us some time */ 711 /* Send the first N packets in the request to buy us some time */
707 ret = user_sdma_send_pkts(req, pcount); 712 ret = user_sdma_send_pkts(req, pcount);
708 if (unlikely(ret < 0 && ret != -EBUSY)) { 713 if (unlikely(ret < 0 && ret != -EBUSY)) {
@@ -747,7 +752,8 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
747 return 0; 752 return 0;
748free_req: 753free_req:
749 user_sdma_free_request(req, true); 754 user_sdma_free_request(req, true);
750 pq_update(pq); 755 if (req_queued)
756 pq_update(pq);
751 set_comp_state(pq, cq, info.comp_idx, ERROR, req->status); 757 set_comp_state(pq, cq, info.comp_idx, ERROR, req->status);
752 return ret; 758 return ret;
753} 759}
@@ -1153,6 +1159,7 @@ retry:
1153 } 1159 }
1154 iovec->pages = node->pages; 1160 iovec->pages = node->pages;
1155 iovec->npages = npages; 1161 iovec->npages = npages;
1162 iovec->node = node;
1156 1163
1157 ret = hfi1_mmu_rb_insert(&req->pq->sdma_rb_root, &node->rb); 1164 ret = hfi1_mmu_rb_insert(&req->pq->sdma_rb_root, &node->rb);
1158 if (ret) { 1165 if (ret) {
@@ -1519,18 +1526,13 @@ static void user_sdma_free_request(struct user_sdma_request *req, bool unpin)
1519 } 1526 }
1520 if (req->data_iovs) { 1527 if (req->data_iovs) {
1521 struct sdma_mmu_node *node; 1528 struct sdma_mmu_node *node;
1522 struct mmu_rb_node *mnode;
1523 int i; 1529 int i;
1524 1530
1525 for (i = 0; i < req->data_iovs; i++) { 1531 for (i = 0; i < req->data_iovs; i++) {
1526 mnode = hfi1_mmu_rb_search( 1532 node = req->iovs[i].node;
1527 &req->pq->sdma_rb_root, 1533 if (!node)
1528 (unsigned long)req->iovs[i].iov.iov_base,
1529 req->iovs[i].iov.iov_len);
1530 if (!mnode || IS_ERR(mnode))
1531 continue; 1534 continue;
1532 1535
1533 node = container_of(mnode, struct sdma_mmu_node, rb);
1534 if (unpin) 1536 if (unpin)
1535 hfi1_mmu_rb_remove(&req->pq->sdma_rb_root, 1537 hfi1_mmu_rb_remove(&req->pq->sdma_rb_root,
1536 &node->rb); 1538 &node->rb);
diff --git a/drivers/staging/rdma/hfi1/user_sdma.h b/drivers/infiniband/hw/hfi1/user_sdma.h
index b9240e351161..b9240e351161 100644
--- a/drivers/staging/rdma/hfi1/user_sdma.h
+++ b/drivers/infiniband/hw/hfi1/user_sdma.h
diff --git a/drivers/staging/rdma/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c
index 9cdc85fa366f..849c4b9399d4 100644
--- a/drivers/staging/rdma/hfi1/verbs.c
+++ b/drivers/infiniband/hw/hfi1/verbs.c
@@ -52,7 +52,6 @@
52#include <linux/utsname.h> 52#include <linux/utsname.h>
53#include <linux/rculist.h> 53#include <linux/rculist.h>
54#include <linux/mm.h> 54#include <linux/mm.h>
55#include <linux/random.h>
56#include <linux/vmalloc.h> 55#include <linux/vmalloc.h>
57 56
58#include "hfi.h" 57#include "hfi.h"
@@ -336,6 +335,8 @@ const u8 hdr_len_by_opcode[256] = {
336 [IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE] = 12 + 8 + 4, 335 [IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE] = 12 + 8 + 4,
337 [IB_OPCODE_RC_COMPARE_SWAP] = 12 + 8 + 28, 336 [IB_OPCODE_RC_COMPARE_SWAP] = 12 + 8 + 28,
338 [IB_OPCODE_RC_FETCH_ADD] = 12 + 8 + 28, 337 [IB_OPCODE_RC_FETCH_ADD] = 12 + 8 + 28,
338 [IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE] = 12 + 8 + 4,
339 [IB_OPCODE_RC_SEND_ONLY_WITH_INVALIDATE] = 12 + 8 + 4,
339 /* UC */ 340 /* UC */
340 [IB_OPCODE_UC_SEND_FIRST] = 12 + 8, 341 [IB_OPCODE_UC_SEND_FIRST] = 12 + 8,
341 [IB_OPCODE_UC_SEND_MIDDLE] = 12 + 8, 342 [IB_OPCODE_UC_SEND_MIDDLE] = 12 + 8,
@@ -946,7 +947,6 @@ static int pio_wait(struct rvt_qp *qp,
946 947
947 dev->n_piowait += !!(flag & RVT_S_WAIT_PIO); 948 dev->n_piowait += !!(flag & RVT_S_WAIT_PIO);
948 dev->n_piodrain += !!(flag & RVT_S_WAIT_PIO_DRAIN); 949 dev->n_piodrain += !!(flag & RVT_S_WAIT_PIO_DRAIN);
949 dev->n_piowait++;
950 qp->s_flags |= flag; 950 qp->s_flags |= flag;
951 was_empty = list_empty(&sc->piowait); 951 was_empty = list_empty(&sc->piowait);
952 list_add_tail(&priv->s_iowait.list, &sc->piowait); 952 list_add_tail(&priv->s_iowait.list, &sc->piowait);
diff --git a/drivers/staging/rdma/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h
index 3ee223983b20..488356775627 100644
--- a/drivers/staging/rdma/hfi1/verbs.h
+++ b/drivers/infiniband/hw/hfi1/verbs.h
@@ -152,6 +152,7 @@ union ib_ehdrs {
152 } at; 152 } at;
153 __be32 imm_data; 153 __be32 imm_data;
154 __be32 aeth; 154 __be32 aeth;
155 __be32 ieth;
155 struct ib_atomic_eth atomic_eth; 156 struct ib_atomic_eth atomic_eth;
156} __packed; 157} __packed;
157 158
diff --git a/drivers/staging/rdma/hfi1/verbs_txreq.c b/drivers/infiniband/hw/hfi1/verbs_txreq.c
index bc95c4112c61..bc95c4112c61 100644
--- a/drivers/staging/rdma/hfi1/verbs_txreq.c
+++ b/drivers/infiniband/hw/hfi1/verbs_txreq.c
diff --git a/drivers/staging/rdma/hfi1/verbs_txreq.h b/drivers/infiniband/hw/hfi1/verbs_txreq.h
index 1cf69b2fe4a5..1cf69b2fe4a5 100644
--- a/drivers/staging/rdma/hfi1/verbs_txreq.h
+++ b/drivers/infiniband/hw/hfi1/verbs_txreq.h
diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
index 4a740f7a0519..02a735b64208 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
@@ -2361,58 +2361,130 @@ static int i40iw_port_immutable(struct ib_device *ibdev, u8 port_num,
2361 return 0; 2361 return 0;
2362} 2362}
2363 2363
2364static const char * const i40iw_hw_stat_names[] = {
2365 // 32bit names
2366 [I40IW_HW_STAT_INDEX_IP4RXDISCARD] = "ip4InDiscards",
2367 [I40IW_HW_STAT_INDEX_IP4RXTRUNC] = "ip4InTruncatedPkts",
2368 [I40IW_HW_STAT_INDEX_IP4TXNOROUTE] = "ip4OutNoRoutes",
2369 [I40IW_HW_STAT_INDEX_IP6RXDISCARD] = "ip6InDiscards",
2370 [I40IW_HW_STAT_INDEX_IP6RXTRUNC] = "ip6InTruncatedPkts",
2371 [I40IW_HW_STAT_INDEX_IP6TXNOROUTE] = "ip6OutNoRoutes",
2372 [I40IW_HW_STAT_INDEX_TCPRTXSEG] = "tcpRetransSegs",
2373 [I40IW_HW_STAT_INDEX_TCPRXOPTERR] = "tcpInOptErrors",
2374 [I40IW_HW_STAT_INDEX_TCPRXPROTOERR] = "tcpInProtoErrors",
2375 // 64bit names
2376 [I40IW_HW_STAT_INDEX_IP4RXOCTS + I40IW_HW_STAT_INDEX_MAX_32] =
2377 "ip4InOctets",
2378 [I40IW_HW_STAT_INDEX_IP4RXPKTS + I40IW_HW_STAT_INDEX_MAX_32] =
2379 "ip4InPkts",
2380 [I40IW_HW_STAT_INDEX_IP4RXFRAGS + I40IW_HW_STAT_INDEX_MAX_32] =
2381 "ip4InReasmRqd",
2382 [I40IW_HW_STAT_INDEX_IP4RXMCPKTS + I40IW_HW_STAT_INDEX_MAX_32] =
2383 "ip4InMcastPkts",
2384 [I40IW_HW_STAT_INDEX_IP4TXOCTS + I40IW_HW_STAT_INDEX_MAX_32] =
2385 "ip4OutOctets",
2386 [I40IW_HW_STAT_INDEX_IP4TXPKTS + I40IW_HW_STAT_INDEX_MAX_32] =
2387 "ip4OutPkts",
2388 [I40IW_HW_STAT_INDEX_IP4TXFRAGS + I40IW_HW_STAT_INDEX_MAX_32] =
2389 "ip4OutSegRqd",
2390 [I40IW_HW_STAT_INDEX_IP4TXMCPKTS + I40IW_HW_STAT_INDEX_MAX_32] =
2391 "ip4OutMcastPkts",
2392 [I40IW_HW_STAT_INDEX_IP6RXOCTS + I40IW_HW_STAT_INDEX_MAX_32] =
2393 "ip6InOctets",
2394 [I40IW_HW_STAT_INDEX_IP6RXPKTS + I40IW_HW_STAT_INDEX_MAX_32] =
2395 "ip6InPkts",
2396 [I40IW_HW_STAT_INDEX_IP6RXFRAGS + I40IW_HW_STAT_INDEX_MAX_32] =
2397 "ip6InReasmRqd",
2398 [I40IW_HW_STAT_INDEX_IP6RXMCPKTS + I40IW_HW_STAT_INDEX_MAX_32] =
2399 "ip6InMcastPkts",
2400 [I40IW_HW_STAT_INDEX_IP6TXOCTS + I40IW_HW_STAT_INDEX_MAX_32] =
2401 "ip6OutOctets",
2402 [I40IW_HW_STAT_INDEX_IP6TXPKTS + I40IW_HW_STAT_INDEX_MAX_32] =
2403 "ip6OutPkts",
2404 [I40IW_HW_STAT_INDEX_IP6TXFRAGS + I40IW_HW_STAT_INDEX_MAX_32] =
2405 "ip6OutSegRqd",
2406 [I40IW_HW_STAT_INDEX_IP6TXMCPKTS + I40IW_HW_STAT_INDEX_MAX_32] =
2407 "ip6OutMcastPkts",
2408 [I40IW_HW_STAT_INDEX_TCPRXSEGS + I40IW_HW_STAT_INDEX_MAX_32] =
2409 "tcpInSegs",
2410 [I40IW_HW_STAT_INDEX_TCPTXSEG + I40IW_HW_STAT_INDEX_MAX_32] =
2411 "tcpOutSegs",
2412 [I40IW_HW_STAT_INDEX_RDMARXRDS + I40IW_HW_STAT_INDEX_MAX_32] =
2413 "iwInRdmaReads",
2414 [I40IW_HW_STAT_INDEX_RDMARXSNDS + I40IW_HW_STAT_INDEX_MAX_32] =
2415 "iwInRdmaSends",
2416 [I40IW_HW_STAT_INDEX_RDMARXWRS + I40IW_HW_STAT_INDEX_MAX_32] =
2417 "iwInRdmaWrites",
2418 [I40IW_HW_STAT_INDEX_RDMATXRDS + I40IW_HW_STAT_INDEX_MAX_32] =
2419 "iwOutRdmaReads",
2420 [I40IW_HW_STAT_INDEX_RDMATXSNDS + I40IW_HW_STAT_INDEX_MAX_32] =
2421 "iwOutRdmaSends",
2422 [I40IW_HW_STAT_INDEX_RDMATXWRS + I40IW_HW_STAT_INDEX_MAX_32] =
2423 "iwOutRdmaWrites",
2424 [I40IW_HW_STAT_INDEX_RDMAVBND + I40IW_HW_STAT_INDEX_MAX_32] =
2425 "iwRdmaBnd",
2426 [I40IW_HW_STAT_INDEX_RDMAVINV + I40IW_HW_STAT_INDEX_MAX_32] =
2427 "iwRdmaInv"
2428};
2429
2364/** 2430/**
2365 * i40iw_get_protocol_stats - Populates the rdma_stats structure 2431 * i40iw_alloc_hw_stats - Allocate a hw stats structure
2366 * @ibdev: ib dev struct 2432 * @ibdev: device pointer from stack
2367 * @stats: iw protocol stats struct 2433 * @port_num: port number
2368 */ 2434 */
2369static int i40iw_get_protocol_stats(struct ib_device *ibdev, 2435static struct rdma_hw_stats *i40iw_alloc_hw_stats(struct ib_device *ibdev,
2370 union rdma_protocol_stats *stats) 2436 u8 port_num)
2437{
2438 struct i40iw_device *iwdev = to_iwdev(ibdev);
2439 struct i40iw_sc_dev *dev = &iwdev->sc_dev;
2440 int num_counters = I40IW_HW_STAT_INDEX_MAX_32 +
2441 I40IW_HW_STAT_INDEX_MAX_64;
2442 unsigned long lifespan = RDMA_HW_STATS_DEFAULT_LIFESPAN;
2443
2444 BUILD_BUG_ON(ARRAY_SIZE(i40iw_hw_stat_names) !=
2445 (I40IW_HW_STAT_INDEX_MAX_32 +
2446 I40IW_HW_STAT_INDEX_MAX_64));
2447
2448 /*
2449 * PFs get the default update lifespan, but VFs only update once
2450 * per second
2451 */
2452 if (!dev->is_pf)
2453 lifespan = 1000;
2454 return rdma_alloc_hw_stats_struct(i40iw_hw_stat_names, num_counters,
2455 lifespan);
2456}
2457
2458/**
2459 * i40iw_get_hw_stats - Populates the rdma_hw_stats structure
2460 * @ibdev: device pointer from stack
2461 * @stats: stats pointer from stack
2462 * @port_num: port number
2463 * @index: which hw counter the stack is requesting we update
2464 */
2465static int i40iw_get_hw_stats(struct ib_device *ibdev,
2466 struct rdma_hw_stats *stats,
2467 u8 port_num, int index)
2371{ 2468{
2372 struct i40iw_device *iwdev = to_iwdev(ibdev); 2469 struct i40iw_device *iwdev = to_iwdev(ibdev);
2373 struct i40iw_sc_dev *dev = &iwdev->sc_dev; 2470 struct i40iw_sc_dev *dev = &iwdev->sc_dev;
2374 struct i40iw_dev_pestat *devstat = &dev->dev_pestat; 2471 struct i40iw_dev_pestat *devstat = &dev->dev_pestat;
2375 struct i40iw_dev_hw_stats *hw_stats = &devstat->hw_stats; 2472 struct i40iw_dev_hw_stats *hw_stats = &devstat->hw_stats;
2376 struct timespec curr_time;
2377 static struct timespec last_rd_time = {0, 0};
2378 unsigned long flags; 2473 unsigned long flags;
2379 2474
2380 curr_time = current_kernel_time();
2381 memset(stats, 0, sizeof(*stats));
2382
2383 if (dev->is_pf) { 2475 if (dev->is_pf) {
2384 spin_lock_irqsave(&devstat->stats_lock, flags); 2476 spin_lock_irqsave(&devstat->stats_lock, flags);
2385 devstat->ops.iw_hw_stat_read_all(devstat, 2477 devstat->ops.iw_hw_stat_read_all(devstat,
2386 &devstat->hw_stats); 2478 &devstat->hw_stats);
2387 spin_unlock_irqrestore(&devstat->stats_lock, flags); 2479 spin_unlock_irqrestore(&devstat->stats_lock, flags);
2388 } else { 2480 } else {
2389 if (((u64)curr_time.tv_sec - (u64)last_rd_time.tv_sec) > 1) 2481 if (i40iw_vchnl_vf_get_pe_stats(dev, &devstat->hw_stats))
2390 if (i40iw_vchnl_vf_get_pe_stats(dev, &devstat->hw_stats)) 2482 return -ENOSYS;
2391 return -ENOSYS;
2392 } 2483 }
2393 2484
2394 stats->iw.ipInReceives = hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_IP4RXPKTS] + 2485 memcpy(&stats->value[0], &hw_stats, sizeof(*hw_stats));
2395 hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_IP6RXPKTS]; 2486
2396 stats->iw.ipInTruncatedPkts = hw_stats->stat_value_32[I40IW_HW_STAT_INDEX_IP4RXTRUNC] + 2487 return stats->num_counters;
2397 hw_stats->stat_value_32[I40IW_HW_STAT_INDEX_IP6RXTRUNC];
2398 stats->iw.ipInDiscards = hw_stats->stat_value_32[I40IW_HW_STAT_INDEX_IP4RXDISCARD] +
2399 hw_stats->stat_value_32[I40IW_HW_STAT_INDEX_IP6RXDISCARD];
2400 stats->iw.ipOutNoRoutes = hw_stats->stat_value_32[I40IW_HW_STAT_INDEX_IP4TXNOROUTE] +
2401 hw_stats->stat_value_32[I40IW_HW_STAT_INDEX_IP6TXNOROUTE];
2402 stats->iw.ipReasmReqds = hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_IP4RXFRAGS] +
2403 hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_IP6RXFRAGS];
2404 stats->iw.ipFragCreates = hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_IP4TXFRAGS] +
2405 hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_IP6TXFRAGS];
2406 stats->iw.ipInMcastPkts = hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_IP4RXMCPKTS] +
2407 hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_IP6RXMCPKTS];
2408 stats->iw.ipOutMcastPkts = hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_IP4TXMCPKTS] +
2409 hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_IP6TXMCPKTS];
2410 stats->iw.tcpOutSegs = hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_TCPTXSEG];
2411 stats->iw.tcpInSegs = hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_TCPRXSEGS];
2412 stats->iw.tcpRetransSegs = hw_stats->stat_value_32[I40IW_HW_STAT_INDEX_TCPRTXSEG];
2413
2414 last_rd_time = curr_time;
2415 return 0;
2416} 2488}
2417 2489
2418/** 2490/**
@@ -2551,7 +2623,8 @@ static struct i40iw_ib_device *i40iw_init_rdma_device(struct i40iw_device *iwdev
2551 iwibdev->ibdev.get_dma_mr = i40iw_get_dma_mr; 2623 iwibdev->ibdev.get_dma_mr = i40iw_get_dma_mr;
2552 iwibdev->ibdev.reg_user_mr = i40iw_reg_user_mr; 2624 iwibdev->ibdev.reg_user_mr = i40iw_reg_user_mr;
2553 iwibdev->ibdev.dereg_mr = i40iw_dereg_mr; 2625 iwibdev->ibdev.dereg_mr = i40iw_dereg_mr;
2554 iwibdev->ibdev.get_protocol_stats = i40iw_get_protocol_stats; 2626 iwibdev->ibdev.alloc_hw_stats = i40iw_alloc_hw_stats;
2627 iwibdev->ibdev.get_hw_stats = i40iw_get_hw_stats;
2555 iwibdev->ibdev.query_device = i40iw_query_device; 2628 iwibdev->ibdev.query_device = i40iw_query_device;
2556 iwibdev->ibdev.create_ah = i40iw_create_ah; 2629 iwibdev->ibdev.create_ah = i40iw_create_ah;
2557 iwibdev->ibdev.destroy_ah = i40iw_destroy_ah; 2630 iwibdev->ibdev.destroy_ah = i40iw_destroy_ah;
diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c
index 82d7c4bf5970..ce4034071f9c 100644
--- a/drivers/infiniband/hw/qib/qib_iba7322.c
+++ b/drivers/infiniband/hw/qib/qib_iba7322.c
@@ -1308,21 +1308,6 @@ static const struct qib_hwerror_msgs qib_7322p_error_msgs[] = {
1308 SYM_LSB(IntMask, fldname##17IntMask)), \ 1308 SYM_LSB(IntMask, fldname##17IntMask)), \
1309 .msg = #fldname "_C", .sz = sizeof(#fldname "_C") } 1309 .msg = #fldname "_C", .sz = sizeof(#fldname "_C") }
1310 1310
1311static const struct qib_hwerror_msgs qib_7322_intr_msgs[] = {
1312 INTR_AUTO_P(SDmaInt),
1313 INTR_AUTO_P(SDmaProgressInt),
1314 INTR_AUTO_P(SDmaIdleInt),
1315 INTR_AUTO_P(SDmaCleanupDone),
1316 INTR_AUTO_C(RcvUrg),
1317 INTR_AUTO_P(ErrInt),
1318 INTR_AUTO(ErrInt), /* non-port-specific errs */
1319 INTR_AUTO(AssertGPIOInt),
1320 INTR_AUTO_P(SendDoneInt),
1321 INTR_AUTO(SendBufAvailInt),
1322 INTR_AUTO_C(RcvAvail),
1323 { .mask = 0, .sz = 0 }
1324};
1325
1326#define TXSYMPTOM_AUTO_P(fldname) \ 1311#define TXSYMPTOM_AUTO_P(fldname) \
1327 { .mask = SYM_MASK(SendHdrErrSymptom_0, fldname), \ 1312 { .mask = SYM_MASK(SendHdrErrSymptom_0, fldname), \
1328 .msg = #fldname, .sz = sizeof(#fldname) } 1313 .msg = #fldname, .sz = sizeof(#fldname) }
diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c
index 0bd18375d7df..d2ac29861af5 100644
--- a/drivers/infiniband/hw/qib/qib_mad.c
+++ b/drivers/infiniband/hw/qib/qib_mad.c
@@ -1172,11 +1172,13 @@ static int pma_get_classportinfo(struct ib_pma_mad *pmp,
1172 * Set the most significant bit of CM2 to indicate support for 1172 * Set the most significant bit of CM2 to indicate support for
1173 * congestion statistics 1173 * congestion statistics
1174 */ 1174 */
1175 p->reserved[0] = dd->psxmitwait_supported << 7; 1175 ib_set_cpi_capmask2(p,
1176 dd->psxmitwait_supported <<
1177 (31 - IB_CLASS_PORT_INFO_RESP_TIME_FIELD_SIZE));
1176 /* 1178 /*
1177 * Expected response time is 4.096 usec. * 2^18 == 1.073741824 sec. 1179 * Expected response time is 4.096 usec. * 2^18 == 1.073741824 sec.
1178 */ 1180 */
1179 p->resp_time_value = 18; 1181 ib_set_cpi_resp_time(p, 18);
1180 1182
1181 return reply((struct ib_smp *) pmp); 1183 return reply((struct ib_smp *) pmp);
1182} 1184}
diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h
index 6888f03c6d61..4f878151f81f 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.h
+++ b/drivers/infiniband/hw/qib/qib_verbs.h
@@ -159,6 +159,7 @@ struct qib_other_headers {
159 } at; 159 } at;
160 __be32 imm_data; 160 __be32 imm_data;
161 __be32 aeth; 161 __be32 aeth;
162 __be32 ieth;
162 struct ib_atomic_eth atomic_eth; 163 struct ib_atomic_eth atomic_eth;
163 } u; 164 } u;
164} __packed; 165} __packed;
diff --git a/drivers/infiniband/sw/rdmavt/cq.c b/drivers/infiniband/sw/rdmavt/cq.c
index b1ffc8b4a6c0..6ca6fa80dd6e 100644
--- a/drivers/infiniband/sw/rdmavt/cq.c
+++ b/drivers/infiniband/sw/rdmavt/cq.c
@@ -525,6 +525,7 @@ int rvt_driver_cq_init(struct rvt_dev_info *rdi)
525 return PTR_ERR(task); 525 return PTR_ERR(task);
526 } 526 }
527 527
528 set_user_nice(task, MIN_NICE);
528 cpu = cpumask_first(cpumask_of_node(rdi->dparms.node)); 529 cpu = cpumask_first(cpumask_of_node(rdi->dparms.node));
529 kthread_bind(task, cpu); 530 kthread_bind(task, cpu);
530 wake_up_process(task); 531 wake_up_process(task);
diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c
index 0ff765bfd619..0f4d4500f45e 100644
--- a/drivers/infiniband/sw/rdmavt/mr.c
+++ b/drivers/infiniband/sw/rdmavt/mr.c
@@ -124,11 +124,13 @@ static int rvt_init_mregion(struct rvt_mregion *mr, struct ib_pd *pd,
124 int count) 124 int count)
125{ 125{
126 int m, i = 0; 126 int m, i = 0;
127 struct rvt_dev_info *dev = ib_to_rvt(pd->device);
127 128
128 mr->mapsz = 0; 129 mr->mapsz = 0;
129 m = (count + RVT_SEGSZ - 1) / RVT_SEGSZ; 130 m = (count + RVT_SEGSZ - 1) / RVT_SEGSZ;
130 for (; i < m; i++) { 131 for (; i < m; i++) {
131 mr->map[i] = kzalloc(sizeof(*mr->map[0]), GFP_KERNEL); 132 mr->map[i] = kzalloc_node(sizeof(*mr->map[0]), GFP_KERNEL,
133 dev->dparms.node);
132 if (!mr->map[i]) { 134 if (!mr->map[i]) {
133 rvt_deinit_mregion(mr); 135 rvt_deinit_mregion(mr);
134 return -ENOMEM; 136 return -ENOMEM;
diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c
index 0f12c211c385..5fa4d4d81ee0 100644
--- a/drivers/infiniband/sw/rdmavt/qp.c
+++ b/drivers/infiniband/sw/rdmavt/qp.c
@@ -397,6 +397,7 @@ static void free_qpn(struct rvt_qpn_table *qpt, u32 qpn)
397static void rvt_clear_mr_refs(struct rvt_qp *qp, int clr_sends) 397static void rvt_clear_mr_refs(struct rvt_qp *qp, int clr_sends)
398{ 398{
399 unsigned n; 399 unsigned n;
400 struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
400 401
401 if (test_and_clear_bit(RVT_R_REWIND_SGE, &qp->r_aflags)) 402 if (test_and_clear_bit(RVT_R_REWIND_SGE, &qp->r_aflags))
402 rvt_put_ss(&qp->s_rdma_read_sge); 403 rvt_put_ss(&qp->s_rdma_read_sge);
@@ -431,7 +432,7 @@ static void rvt_clear_mr_refs(struct rvt_qp *qp, int clr_sends)
431 if (qp->ibqp.qp_type != IB_QPT_RC) 432 if (qp->ibqp.qp_type != IB_QPT_RC)
432 return; 433 return;
433 434
434 for (n = 0; n < ARRAY_SIZE(qp->s_ack_queue); n++) { 435 for (n = 0; n < rvt_max_atomic(rdi); n++) {
435 struct rvt_ack_entry *e = &qp->s_ack_queue[n]; 436 struct rvt_ack_entry *e = &qp->s_ack_queue[n];
436 437
437 if (e->opcode == IB_OPCODE_RC_RDMA_READ_REQUEST && 438 if (e->opcode == IB_OPCODE_RC_RDMA_READ_REQUEST &&
@@ -569,7 +570,12 @@ static void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp,
569 qp->s_ssn = 1; 570 qp->s_ssn = 1;
570 qp->s_lsn = 0; 571 qp->s_lsn = 0;
571 qp->s_mig_state = IB_MIG_MIGRATED; 572 qp->s_mig_state = IB_MIG_MIGRATED;
572 memset(qp->s_ack_queue, 0, sizeof(qp->s_ack_queue)); 573 if (qp->s_ack_queue)
574 memset(
575 qp->s_ack_queue,
576 0,
577 rvt_max_atomic(rdi) *
578 sizeof(*qp->s_ack_queue));
573 qp->r_head_ack_queue = 0; 579 qp->r_head_ack_queue = 0;
574 qp->s_tail_ack_queue = 0; 580 qp->s_tail_ack_queue = 0;
575 qp->s_num_rd_atomic = 0; 581 qp->s_num_rd_atomic = 0;
@@ -653,9 +659,9 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd,
653 if (gfp == GFP_NOIO) 659 if (gfp == GFP_NOIO)
654 swq = __vmalloc( 660 swq = __vmalloc(
655 (init_attr->cap.max_send_wr + 1) * sz, 661 (init_attr->cap.max_send_wr + 1) * sz,
656 gfp, PAGE_KERNEL); 662 gfp | __GFP_ZERO, PAGE_KERNEL);
657 else 663 else
658 swq = vmalloc_node( 664 swq = vzalloc_node(
659 (init_attr->cap.max_send_wr + 1) * sz, 665 (init_attr->cap.max_send_wr + 1) * sz,
660 rdi->dparms.node); 666 rdi->dparms.node);
661 if (!swq) 667 if (!swq)
@@ -677,6 +683,16 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd,
677 goto bail_swq; 683 goto bail_swq;
678 684
679 RCU_INIT_POINTER(qp->next, NULL); 685 RCU_INIT_POINTER(qp->next, NULL);
686 if (init_attr->qp_type == IB_QPT_RC) {
687 qp->s_ack_queue =
688 kzalloc_node(
689 sizeof(*qp->s_ack_queue) *
690 rvt_max_atomic(rdi),
691 gfp,
692 rdi->dparms.node);
693 if (!qp->s_ack_queue)
694 goto bail_qp;
695 }
680 696
681 /* 697 /*
682 * Driver needs to set up it's private QP structure and do any 698 * Driver needs to set up it's private QP structure and do any
@@ -704,9 +720,9 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd,
704 qp->r_rq.wq = __vmalloc( 720 qp->r_rq.wq = __vmalloc(
705 sizeof(struct rvt_rwq) + 721 sizeof(struct rvt_rwq) +
706 qp->r_rq.size * sz, 722 qp->r_rq.size * sz,
707 gfp, PAGE_KERNEL); 723 gfp | __GFP_ZERO, PAGE_KERNEL);
708 else 724 else
709 qp->r_rq.wq = vmalloc_node( 725 qp->r_rq.wq = vzalloc_node(
710 sizeof(struct rvt_rwq) + 726 sizeof(struct rvt_rwq) +
711 qp->r_rq.size * sz, 727 qp->r_rq.size * sz,
712 rdi->dparms.node); 728 rdi->dparms.node);
@@ -857,6 +873,7 @@ bail_driver_priv:
857 rdi->driver_f.qp_priv_free(rdi, qp); 873 rdi->driver_f.qp_priv_free(rdi, qp);
858 874
859bail_qp: 875bail_qp:
876 kfree(qp->s_ack_queue);
860 kfree(qp); 877 kfree(qp);
861 878
862bail_swq: 879bail_swq:
@@ -1284,6 +1301,7 @@ int rvt_destroy_qp(struct ib_qp *ibqp)
1284 vfree(qp->r_rq.wq); 1301 vfree(qp->r_rq.wq);
1285 vfree(qp->s_wq); 1302 vfree(qp->s_wq);
1286 rdi->driver_f.qp_priv_free(rdi, qp); 1303 rdi->driver_f.qp_priv_free(rdi, qp);
1304 kfree(qp->s_ack_queue);
1287 kfree(qp); 1305 kfree(qp);
1288 return 0; 1306 return 0;
1289} 1307}
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index caec8e9c4666..bab7db6fa9ab 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -92,6 +92,8 @@ enum {
92 IPOIB_FLAG_UMCAST = 10, 92 IPOIB_FLAG_UMCAST = 10,
93 IPOIB_STOP_NEIGH_GC = 11, 93 IPOIB_STOP_NEIGH_GC = 11,
94 IPOIB_NEIGH_TBL_FLUSH = 12, 94 IPOIB_NEIGH_TBL_FLUSH = 12,
95 IPOIB_FLAG_DEV_ADDR_SET = 13,
96 IPOIB_FLAG_DEV_ADDR_CTRL = 14,
95 97
96 IPOIB_MAX_BACKOFF_SECONDS = 16, 98 IPOIB_MAX_BACKOFF_SECONDS = 16,
97 99
@@ -392,6 +394,7 @@ struct ipoib_dev_priv {
392 struct ipoib_ethtool_st ethtool; 394 struct ipoib_ethtool_st ethtool;
393 struct timer_list poll_timer; 395 struct timer_list poll_timer;
394 unsigned max_send_sge; 396 unsigned max_send_sge;
397 bool sm_fullmember_sendonly_support;
395}; 398};
396 399
397struct ipoib_ah { 400struct ipoib_ah {
@@ -476,6 +479,7 @@ void ipoib_reap_ah(struct work_struct *work);
476 479
477void ipoib_mark_paths_invalid(struct net_device *dev); 480void ipoib_mark_paths_invalid(struct net_device *dev);
478void ipoib_flush_paths(struct net_device *dev); 481void ipoib_flush_paths(struct net_device *dev);
482int ipoib_check_sm_sendonly_fullmember_support(struct ipoib_dev_priv *priv);
479struct ipoib_dev_priv *ipoib_intf_alloc(const char *format); 483struct ipoib_dev_priv *ipoib_intf_alloc(const char *format);
480 484
481int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port); 485int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 418e5a1c8744..45c40a17d6a6 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -997,6 +997,106 @@ static inline int update_child_pkey(struct ipoib_dev_priv *priv)
997 return 0; 997 return 0;
998} 998}
999 999
1000/*
1001 * returns true if the device address of the ipoib interface has changed and the
1002 * new address is a valid one (i.e in the gid table), return false otherwise.
1003 */
1004static bool ipoib_dev_addr_changed_valid(struct ipoib_dev_priv *priv)
1005{
1006 union ib_gid search_gid;
1007 union ib_gid gid0;
1008 union ib_gid *netdev_gid;
1009 int err;
1010 u16 index;
1011 u8 port;
1012 bool ret = false;
1013
1014 netdev_gid = (union ib_gid *)(priv->dev->dev_addr + 4);
1015 if (ib_query_gid(priv->ca, priv->port, 0, &gid0, NULL))
1016 return false;
1017
1018 netif_addr_lock(priv->dev);
1019
1020 /* The subnet prefix may have changed, update it now so we won't have
1021 * to do it later
1022 */
1023 priv->local_gid.global.subnet_prefix = gid0.global.subnet_prefix;
1024 netdev_gid->global.subnet_prefix = gid0.global.subnet_prefix;
1025 search_gid.global.subnet_prefix = gid0.global.subnet_prefix;
1026
1027 search_gid.global.interface_id = priv->local_gid.global.interface_id;
1028
1029 netif_addr_unlock(priv->dev);
1030
1031 err = ib_find_gid(priv->ca, &search_gid, IB_GID_TYPE_IB,
1032 priv->dev, &port, &index);
1033
1034 netif_addr_lock(priv->dev);
1035
1036 if (search_gid.global.interface_id !=
1037 priv->local_gid.global.interface_id)
1038 /* There was a change while we were looking up the gid, bail
1039 * here and let the next work sort this out
1040 */
1041 goto out;
1042
1043 /* The next section of code needs some background:
1044 * Per IB spec the port GUID can't change if the HCA is powered on.
1045 * port GUID is the basis for GID at index 0 which is the basis for
1046 * the default device address of a ipoib interface.
1047 *
1048 * so it seems the flow should be:
1049 * if user_changed_dev_addr && gid in gid tbl
1050 * set bit dev_addr_set
1051 * return true
1052 * else
1053 * return false
1054 *
1055 * The issue is that there are devices that don't follow the spec,
1056 * they change the port GUID when the HCA is powered, so in order
1057 * not to break userspace applications, We need to check if the
1058 * user wanted to control the device address and we assume that
1059 * if he sets the device address back to be based on GID index 0,
1060 * he no longer wishs to control it.
1061 *
1062 * If the user doesn't control the the device address,
1063 * IPOIB_FLAG_DEV_ADDR_SET is set and ib_find_gid failed it means
1064 * the port GUID has changed and GID at index 0 has changed
1065 * so we need to change priv->local_gid and priv->dev->dev_addr
1066 * to reflect the new GID.
1067 */
1068 if (!test_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags)) {
1069 if (!err && port == priv->port) {
1070 set_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags);
1071 if (index == 0)
1072 clear_bit(IPOIB_FLAG_DEV_ADDR_CTRL,
1073 &priv->flags);
1074 else
1075 set_bit(IPOIB_FLAG_DEV_ADDR_CTRL, &priv->flags);
1076 ret = true;
1077 } else {
1078 ret = false;
1079 }
1080 } else {
1081 if (!err && port == priv->port) {
1082 ret = true;
1083 } else {
1084 if (!test_bit(IPOIB_FLAG_DEV_ADDR_CTRL, &priv->flags)) {
1085 memcpy(&priv->local_gid, &gid0,
1086 sizeof(priv->local_gid));
1087 memcpy(priv->dev->dev_addr + 4, &gid0,
1088 sizeof(priv->local_gid));
1089 ret = true;
1090 }
1091 }
1092 }
1093
1094out:
1095 netif_addr_unlock(priv->dev);
1096
1097 return ret;
1098}
1099
1000static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, 1100static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
1001 enum ipoib_flush_level level, 1101 enum ipoib_flush_level level,
1002 int nesting) 1102 int nesting)
@@ -1018,6 +1118,9 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
1018 1118
1019 if (!test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags) && 1119 if (!test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags) &&
1020 level != IPOIB_FLUSH_HEAVY) { 1120 level != IPOIB_FLUSH_HEAVY) {
1121 /* Make sure the dev_addr is set even if not flushing */
1122 if (level == IPOIB_FLUSH_LIGHT)
1123 ipoib_dev_addr_changed_valid(priv);
1021 ipoib_dbg(priv, "Not flushing - IPOIB_FLAG_INITIALIZED not set.\n"); 1124 ipoib_dbg(priv, "Not flushing - IPOIB_FLAG_INITIALIZED not set.\n");
1022 return; 1125 return;
1023 } 1126 }
@@ -1029,7 +1132,8 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
1029 update_parent_pkey(priv); 1132 update_parent_pkey(priv);
1030 else 1133 else
1031 update_child_pkey(priv); 1134 update_child_pkey(priv);
1032 } 1135 } else if (level == IPOIB_FLUSH_LIGHT)
1136 ipoib_dev_addr_changed_valid(priv);
1033 ipoib_dbg(priv, "Not flushing - IPOIB_FLAG_ADMIN_UP not set.\n"); 1137 ipoib_dbg(priv, "Not flushing - IPOIB_FLAG_ADMIN_UP not set.\n");
1034 return; 1138 return;
1035 } 1139 }
@@ -1081,7 +1185,8 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
1081 if (test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) { 1185 if (test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) {
1082 if (level >= IPOIB_FLUSH_NORMAL) 1186 if (level >= IPOIB_FLUSH_NORMAL)
1083 ipoib_ib_dev_up(dev); 1187 ipoib_ib_dev_up(dev);
1084 ipoib_mcast_restart_task(&priv->restart_task); 1188 if (ipoib_dev_addr_changed_valid(priv))
1189 ipoib_mcast_restart_task(&priv->restart_task);
1085 } 1190 }
1086} 1191}
1087 1192
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index b940ef1c19c7..2d7c16346648 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -99,6 +99,7 @@ static struct net_device *ipoib_get_net_dev_by_params(
99 struct ib_device *dev, u8 port, u16 pkey, 99 struct ib_device *dev, u8 port, u16 pkey,
100 const union ib_gid *gid, const struct sockaddr *addr, 100 const union ib_gid *gid, const struct sockaddr *addr,
101 void *client_data); 101 void *client_data);
102static int ipoib_set_mac(struct net_device *dev, void *addr);
102 103
103static struct ib_client ipoib_client = { 104static struct ib_client ipoib_client = {
104 .name = "ipoib", 105 .name = "ipoib",
@@ -117,6 +118,8 @@ int ipoib_open(struct net_device *dev)
117 118
118 set_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags); 119 set_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags);
119 120
121 priv->sm_fullmember_sendonly_support = false;
122
120 if (ipoib_ib_dev_open(dev)) { 123 if (ipoib_ib_dev_open(dev)) {
121 if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) 124 if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags))
122 return 0; 125 return 0;
@@ -629,6 +632,77 @@ void ipoib_mark_paths_invalid(struct net_device *dev)
629 spin_unlock_irq(&priv->lock); 632 spin_unlock_irq(&priv->lock);
630} 633}
631 634
635struct classport_info_context {
636 struct ipoib_dev_priv *priv;
637 struct completion done;
638 struct ib_sa_query *sa_query;
639};
640
641static void classport_info_query_cb(int status, struct ib_class_port_info *rec,
642 void *context)
643{
644 struct classport_info_context *cb_ctx = context;
645 struct ipoib_dev_priv *priv;
646
647 WARN_ON(!context);
648
649 priv = cb_ctx->priv;
650
651 if (status || !rec) {
652 pr_debug("device: %s failed query classport_info status: %d\n",
653 priv->dev->name, status);
654 /* keeps the default, will try next mcast_restart */
655 priv->sm_fullmember_sendonly_support = false;
656 goto out;
657 }
658
659 if (ib_get_cpi_capmask2(rec) &
660 IB_SA_CAP_MASK2_SENDONLY_FULL_MEM_SUPPORT) {
661 pr_debug("device: %s enabled fullmember-sendonly for sendonly MCG\n",
662 priv->dev->name);
663 priv->sm_fullmember_sendonly_support = true;
664 } else {
665 pr_debug("device: %s disabled fullmember-sendonly for sendonly MCG\n",
666 priv->dev->name);
667 priv->sm_fullmember_sendonly_support = false;
668 }
669
670out:
671 complete(&cb_ctx->done);
672}
673
674int ipoib_check_sm_sendonly_fullmember_support(struct ipoib_dev_priv *priv)
675{
676 struct classport_info_context *callback_context;
677 int ret;
678
679 callback_context = kmalloc(sizeof(*callback_context), GFP_KERNEL);
680 if (!callback_context)
681 return -ENOMEM;
682
683 callback_context->priv = priv;
684 init_completion(&callback_context->done);
685
686 ret = ib_sa_classport_info_rec_query(&ipoib_sa_client,
687 priv->ca, priv->port, 3000,
688 GFP_KERNEL,
689 classport_info_query_cb,
690 callback_context,
691 &callback_context->sa_query);
692 if (ret < 0) {
693 pr_info("%s failed to send ib_sa_classport_info query, ret: %d\n",
694 priv->dev->name, ret);
695 kfree(callback_context);
696 return ret;
697 }
698
699 /* waiting for the callback to finish before returnning */
700 wait_for_completion(&callback_context->done);
701 kfree(callback_context);
702
703 return ret;
704}
705
632void ipoib_flush_paths(struct net_device *dev) 706void ipoib_flush_paths(struct net_device *dev)
633{ 707{
634 struct ipoib_dev_priv *priv = netdev_priv(dev); 708 struct ipoib_dev_priv *priv = netdev_priv(dev);
@@ -1649,6 +1723,7 @@ static const struct net_device_ops ipoib_netdev_ops_pf = {
1649 .ndo_get_vf_config = ipoib_get_vf_config, 1723 .ndo_get_vf_config = ipoib_get_vf_config,
1650 .ndo_get_vf_stats = ipoib_get_vf_stats, 1724 .ndo_get_vf_stats = ipoib_get_vf_stats,
1651 .ndo_set_vf_guid = ipoib_set_vf_guid, 1725 .ndo_set_vf_guid = ipoib_set_vf_guid,
1726 .ndo_set_mac_address = ipoib_set_mac,
1652}; 1727};
1653 1728
1654static const struct net_device_ops ipoib_netdev_ops_vf = { 1729static const struct net_device_ops ipoib_netdev_ops_vf = {
@@ -1771,6 +1846,70 @@ int ipoib_add_umcast_attr(struct net_device *dev)
1771 return device_create_file(&dev->dev, &dev_attr_umcast); 1846 return device_create_file(&dev->dev, &dev_attr_umcast);
1772} 1847}
1773 1848
1849static void set_base_guid(struct ipoib_dev_priv *priv, union ib_gid *gid)
1850{
1851 struct ipoib_dev_priv *child_priv;
1852 struct net_device *netdev = priv->dev;
1853
1854 netif_addr_lock(netdev);
1855
1856 memcpy(&priv->local_gid.global.interface_id,
1857 &gid->global.interface_id,
1858 sizeof(gid->global.interface_id));
1859 memcpy(netdev->dev_addr + 4, &priv->local_gid, sizeof(priv->local_gid));
1860 clear_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags);
1861
1862 netif_addr_unlock(netdev);
1863
1864 if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
1865 down_read(&priv->vlan_rwsem);
1866 list_for_each_entry(child_priv, &priv->child_intfs, list)
1867 set_base_guid(child_priv, gid);
1868 up_read(&priv->vlan_rwsem);
1869 }
1870}
1871
1872static int ipoib_check_lladdr(struct net_device *dev,
1873 struct sockaddr_storage *ss)
1874{
1875 union ib_gid *gid = (union ib_gid *)(ss->__data + 4);
1876 int ret = 0;
1877
1878 netif_addr_lock(dev);
1879
1880 /* Make sure the QPN, reserved and subnet prefix match the current
1881 * lladdr, it also makes sure the lladdr is unicast.
1882 */
1883 if (memcmp(dev->dev_addr, ss->__data,
1884 4 + sizeof(gid->global.subnet_prefix)) ||
1885 gid->global.interface_id == 0)
1886 ret = -EINVAL;
1887
1888 netif_addr_unlock(dev);
1889
1890 return ret;
1891}
1892
1893static int ipoib_set_mac(struct net_device *dev, void *addr)
1894{
1895 struct ipoib_dev_priv *priv = netdev_priv(dev);
1896 struct sockaddr_storage *ss = addr;
1897 int ret;
1898
1899 if (!(dev->priv_flags & IFF_LIVE_ADDR_CHANGE) && netif_running(dev))
1900 return -EBUSY;
1901
1902 ret = ipoib_check_lladdr(dev, ss);
1903 if (ret)
1904 return ret;
1905
1906 set_base_guid(priv, (union ib_gid *)(ss->__data + 4));
1907
1908 queue_work(ipoib_workqueue, &priv->flush_light);
1909
1910 return 0;
1911}
1912
1774static ssize_t create_child(struct device *dev, 1913static ssize_t create_child(struct device *dev,
1775 struct device_attribute *attr, 1914 struct device_attribute *attr,
1776 const char *buf, size_t count) 1915 const char *buf, size_t count)
@@ -1894,6 +2033,7 @@ static struct net_device *ipoib_add_port(const char *format,
1894 goto device_init_failed; 2033 goto device_init_failed;
1895 } else 2034 } else
1896 memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw, sizeof (union ib_gid)); 2035 memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw, sizeof (union ib_gid));
2036 set_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags);
1897 2037
1898 result = ipoib_dev_init(priv->dev, hca, port); 2038 result = ipoib_dev_init(priv->dev, hca, port);
1899 if (result < 0) { 2039 if (result < 0) {
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index 25889311b1e9..82fbc9442608 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -64,6 +64,9 @@ struct ipoib_mcast_iter {
64 unsigned int send_only; 64 unsigned int send_only;
65}; 65};
66 66
67/* join state that allows creating mcg with sendonly member request */
68#define SENDONLY_FULLMEMBER_JOIN 8
69
67/* 70/*
68 * This should be called with the priv->lock held 71 * This should be called with the priv->lock held
69 */ 72 */
@@ -326,12 +329,23 @@ void ipoib_mcast_carrier_on_task(struct work_struct *work)
326 struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, 329 struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
327 carrier_on_task); 330 carrier_on_task);
328 struct ib_port_attr attr; 331 struct ib_port_attr attr;
332 int ret;
329 333
330 if (ib_query_port(priv->ca, priv->port, &attr) || 334 if (ib_query_port(priv->ca, priv->port, &attr) ||
331 attr.state != IB_PORT_ACTIVE) { 335 attr.state != IB_PORT_ACTIVE) {
332 ipoib_dbg(priv, "Keeping carrier off until IB port is active\n"); 336 ipoib_dbg(priv, "Keeping carrier off until IB port is active\n");
333 return; 337 return;
334 } 338 }
339 /*
340 * Check if can send sendonly MCG's with sendonly-fullmember join state.
341 * It done here after the successfully join to the broadcast group,
342 * because the broadcast group must always be joined first and is always
343 * re-joined if the SM changes substantially.
344 */
345 ret = ipoib_check_sm_sendonly_fullmember_support(priv);
346 if (ret < 0)
347 pr_debug("%s failed query sm support for sendonly-fullmember (ret: %d)\n",
348 priv->dev->name, ret);
335 349
336 /* 350 /*
337 * Take rtnl_lock to avoid racing with ipoib_stop() and 351 * Take rtnl_lock to avoid racing with ipoib_stop() and
@@ -515,22 +529,20 @@ static int ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast)
515 rec.hop_limit = priv->broadcast->mcmember.hop_limit; 529 rec.hop_limit = priv->broadcast->mcmember.hop_limit;
516 530
517 /* 531 /*
518 * Send-only IB Multicast joins do not work at the core 532 * Send-only IB Multicast joins work at the core IB layer but
519 * IB layer yet, so we can't use them here. However, 533 * require specific SM support.
520 * we are emulating an Ethernet multicast send, which 534 * We can use such joins here only if the current SM supports that feature.
521 * does not require a multicast subscription and will 535 * However, if not, we emulate an Ethernet multicast send,
522 * still send properly. The most appropriate thing to 536 * which does not require a multicast subscription and will
537 * still send properly. The most appropriate thing to
523 * do is to create the group if it doesn't exist as that 538 * do is to create the group if it doesn't exist as that
524 * most closely emulates the behavior, from a user space 539 * most closely emulates the behavior, from a user space
525 * application perspecitive, of Ethernet multicast 540 * application perspective, of Ethernet multicast operation.
526 * operation. For now, we do a full join, maybe later
527 * when the core IB layers support send only joins we
528 * will use them.
529 */ 541 */
530#if 0 542 if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) &&
531 if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) 543 priv->sm_fullmember_sendonly_support)
532 rec.join_state = 4; 544 /* SM supports sendonly-fullmember, otherwise fallback to full-member */
533#endif 545 rec.join_state = SENDONLY_FULLMEMBER_JOIN;
534 } 546 }
535 spin_unlock_irq(&priv->lock); 547 spin_unlock_irq(&priv->lock);
536 548
@@ -570,11 +582,13 @@ void ipoib_mcast_join_task(struct work_struct *work)
570 return; 582 return;
571 } 583 }
572 priv->local_lid = port_attr.lid; 584 priv->local_lid = port_attr.lid;
585 netif_addr_lock(dev);
573 586
574 if (ib_query_gid(priv->ca, priv->port, 0, &priv->local_gid, NULL)) 587 if (!test_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags)) {
575 ipoib_warn(priv, "ib_query_gid() failed\n"); 588 netif_addr_unlock(dev);
576 else 589 return;
577 memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw, sizeof (union ib_gid)); 590 }
591 netif_addr_unlock(dev);
578 592
579 spin_lock_irq(&priv->lock); 593 spin_lock_irq(&priv->lock);
580 if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) 594 if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags))
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
index b809c373e40e..1e7cbbaa15bd 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
@@ -307,5 +307,8 @@ void ipoib_event(struct ib_event_handler *handler,
307 queue_work(ipoib_workqueue, &priv->flush_normal); 307 queue_work(ipoib_workqueue, &priv->flush_normal);
308 } else if (record->event == IB_EVENT_PKEY_CHANGE) { 308 } else if (record->event == IB_EVENT_PKEY_CHANGE) {
309 queue_work(ipoib_workqueue, &priv->flush_heavy); 309 queue_work(ipoib_workqueue, &priv->flush_heavy);
310 } else if (record->event == IB_EVENT_GID_CHANGE &&
311 !test_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags)) {
312 queue_work(ipoib_workqueue, &priv->flush_light);
310 } 313 }
311} 314}
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
index fca1a882de27..64a35595eab8 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
@@ -68,6 +68,8 @@ int __ipoib_vlan_add(struct ipoib_dev_priv *ppriv, struct ipoib_dev_priv *priv,
68 priv->pkey = pkey; 68 priv->pkey = pkey;
69 69
70 memcpy(priv->dev->dev_addr, ppriv->dev->dev_addr, INFINIBAND_ALEN); 70 memcpy(priv->dev->dev_addr, ppriv->dev->dev_addr, INFINIBAND_ALEN);
71 memcpy(&priv->local_gid, &ppriv->local_gid, sizeof(priv->local_gid));
72 set_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags);
71 priv->dev->broadcast[8] = pkey >> 8; 73 priv->dev->broadcast[8] = pkey >> 8;
72 priv->dev->broadcast[9] = pkey & 0xff; 74 priv->dev->broadcast[9] = pkey & 0xff;
73 75
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
index 2843f1ae75bd..887ebadd4774 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
@@ -254,8 +254,8 @@ static void srpt_get_class_port_info(struct ib_dm_mad *mad)
254 memset(cif, 0, sizeof(*cif)); 254 memset(cif, 0, sizeof(*cif));
255 cif->base_version = 1; 255 cif->base_version = 1;
256 cif->class_version = 1; 256 cif->class_version = 1;
257 cif->resp_time_value = 20;
258 257
258 ib_set_cpi_resp_time(cif, 20);
259 mad->mad_hdr.status = 0; 259 mad->mad_hdr.status = 0;
260} 260}
261 261
diff --git a/drivers/staging/Kconfig b/drivers/staging/Kconfig
index 5bac28a3944e..7c197d1a1231 100644
--- a/drivers/staging/Kconfig
+++ b/drivers/staging/Kconfig
@@ -66,8 +66,6 @@ source "drivers/staging/nvec/Kconfig"
66 66
67source "drivers/staging/media/Kconfig" 67source "drivers/staging/media/Kconfig"
68 68
69source "drivers/staging/rdma/Kconfig"
70
71source "drivers/staging/android/Kconfig" 69source "drivers/staging/android/Kconfig"
72 70
73source "drivers/staging/board/Kconfig" 71source "drivers/staging/board/Kconfig"
diff --git a/drivers/staging/Makefile b/drivers/staging/Makefile
index a954242b0f2c..a470c7276142 100644
--- a/drivers/staging/Makefile
+++ b/drivers/staging/Makefile
@@ -23,7 +23,6 @@ obj-$(CONFIG_FB_XGI) += xgifb/
23obj-$(CONFIG_USB_EMXX) += emxx_udc/ 23obj-$(CONFIG_USB_EMXX) += emxx_udc/
24obj-$(CONFIG_SPEAKUP) += speakup/ 24obj-$(CONFIG_SPEAKUP) += speakup/
25obj-$(CONFIG_MFD_NVEC) += nvec/ 25obj-$(CONFIG_MFD_NVEC) += nvec/
26obj-$(CONFIG_STAGING_RDMA) += rdma/
27obj-$(CONFIG_ANDROID) += android/ 26obj-$(CONFIG_ANDROID) += android/
28obj-$(CONFIG_STAGING_BOARD) += board/ 27obj-$(CONFIG_STAGING_BOARD) += board/
29obj-$(CONFIG_LTE_GDM724X) += gdm724x/ 28obj-$(CONFIG_LTE_GDM724X) += gdm724x/
diff --git a/drivers/staging/rdma/Kconfig b/drivers/staging/rdma/Kconfig
deleted file mode 100644
index f1f3ecadf0fb..000000000000
--- a/drivers/staging/rdma/Kconfig
+++ /dev/null
@@ -1,27 +0,0 @@
1menuconfig STAGING_RDMA
2 tristate "RDMA staging drivers"
3 depends on INFINIBAND
4 depends on PCI || BROKEN
5 depends on HAS_IOMEM
6 depends on NET
7 depends on INET
8 default n
9 ---help---
10 This option allows you to select a number of RDMA drivers that
11 fall into one of two categories: deprecated drivers being held
12 here before finally being removed or new drivers that still need
13 some work before being moved to the normal RDMA driver area.
14
15 If you wish to work on these drivers, to help improve them, or
16 to report problems you have with them, please use the
17 linux-rdma@vger.kernel.org mailing list.
18
19 If in doubt, say N here.
20
21
22# Please keep entries in alphabetic order
23if STAGING_RDMA
24
25source "drivers/staging/rdma/hfi1/Kconfig"
26
27endif
diff --git a/drivers/staging/rdma/Makefile b/drivers/staging/rdma/Makefile
deleted file mode 100644
index 8c7fc1de48a7..000000000000
--- a/drivers/staging/rdma/Makefile
+++ /dev/null
@@ -1,2 +0,0 @@
1# Entries for RDMA_STAGING tree
2obj-$(CONFIG_INFINIBAND_HFI1) += hfi1/
diff --git a/drivers/staging/rdma/hfi1/TODO b/drivers/staging/rdma/hfi1/TODO
deleted file mode 100644
index 4c6f1d7d2eaf..000000000000
--- a/drivers/staging/rdma/hfi1/TODO
+++ /dev/null
@@ -1,6 +0,0 @@
1July, 2015
2
3- Remove unneeded file entries in sysfs
4- Remove software processing of IB protocol and place in library for use
5 by qib, ipath (if still present), hfi1, and eventually soft-roce
6- Replace incorrect uAPI
diff --git a/drivers/staging/rdma/hfi1/diag.c b/drivers/staging/rdma/hfi1/diag.c
deleted file mode 100644
index bb2409ad891a..000000000000
--- a/drivers/staging/rdma/hfi1/diag.c
+++ /dev/null
@@ -1,1925 +0,0 @@
1/*
2 * Copyright(c) 2015, 2016 Intel Corporation.
3 *
4 * This file is provided under a dual BSD/GPLv2 license. When using or
5 * redistributing this file, you may do so under either license.
6 *
7 * GPL LICENSE SUMMARY
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of version 2 of the GNU General Public License as
11 * published by the Free Software Foundation.
12 *
13 * This program is distributed in the hope that it will be useful, but
14 * WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * General Public License for more details.
17 *
18 * BSD LICENSE
19 *
20 * Redistribution and use in source and binary forms, with or without
21 * modification, are permitted provided that the following conditions
22 * are met:
23 *
24 * - Redistributions of source code must retain the above copyright
25 * notice, this list of conditions and the following disclaimer.
26 * - Redistributions in binary form must reproduce the above copyright
27 * notice, this list of conditions and the following disclaimer in
28 * the documentation and/or other materials provided with the
29 * distribution.
30 * - Neither the name of Intel Corporation nor the names of its
31 * contributors may be used to endorse or promote products derived
32 * from this software without specific prior written permission.
33 *
34 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
35 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
36 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
37 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
38 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
39 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
40 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
41 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
42 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
43 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
44 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
45 *
46 */
47
48/*
49 * This file contains support for diagnostic functions. It is accessed by
50 * opening the hfi1_diag device, normally minor number 129. Diagnostic use
51 * of the chip may render the chip or board unusable until the driver
52 * is unloaded, or in some cases, until the system is rebooted.
53 *
54 * Accesses to the chip through this interface are not similar to going
55 * through the /sys/bus/pci resource mmap interface.
56 */
57
58#include <linux/io.h>
59#include <linux/pci.h>
60#include <linux/poll.h>
61#include <linux/vmalloc.h>
62#include <linux/export.h>
63#include <linux/fs.h>
64#include <linux/uaccess.h>
65#include <linux/module.h>
66#include <rdma/ib_smi.h>
67#include "hfi.h"
68#include "device.h"
69#include "common.h"
70#include "verbs_txreq.h"
71#include "trace.h"
72
73#undef pr_fmt
74#define pr_fmt(fmt) DRIVER_NAME ": " fmt
75#define snoop_dbg(fmt, ...) \
76 hfi1_cdbg(SNOOP, fmt, ##__VA_ARGS__)
77
78/* Snoop option mask */
79#define SNOOP_DROP_SEND BIT(0)
80#define SNOOP_USE_METADATA BIT(1)
81#define SNOOP_SET_VL0TOVL15 BIT(2)
82
83static u8 snoop_flags;
84
85/*
86 * Extract packet length from LRH header.
87 * This is in Dwords so multiply by 4 to get size in bytes
88 */
89#define HFI1_GET_PKT_LEN(x) (((be16_to_cpu((x)->lrh[2]) & 0xFFF)) << 2)
90
91enum hfi1_filter_status {
92 HFI1_FILTER_HIT,
93 HFI1_FILTER_ERR,
94 HFI1_FILTER_MISS
95};
96
97/* snoop processing functions */
98rhf_rcv_function_ptr snoop_rhf_rcv_functions[8] = {
99 [RHF_RCV_TYPE_EXPECTED] = snoop_recv_handler,
100 [RHF_RCV_TYPE_EAGER] = snoop_recv_handler,
101 [RHF_RCV_TYPE_IB] = snoop_recv_handler,
102 [RHF_RCV_TYPE_ERROR] = snoop_recv_handler,
103 [RHF_RCV_TYPE_BYPASS] = snoop_recv_handler,
104 [RHF_RCV_TYPE_INVALID5] = process_receive_invalid,
105 [RHF_RCV_TYPE_INVALID6] = process_receive_invalid,
106 [RHF_RCV_TYPE_INVALID7] = process_receive_invalid
107};
108
109/* Snoop packet structure */
110struct snoop_packet {
111 struct list_head list;
112 u32 total_len;
113 u8 data[];
114};
115
116/* Do not make these an enum or it will blow up the capture_md */
117#define PKT_DIR_EGRESS 0x0
118#define PKT_DIR_INGRESS 0x1
119
120/* Packet capture metadata returned to the user with the packet. */
121struct capture_md {
122 u8 port;
123 u8 dir;
124 u8 reserved[6];
125 union {
126 u64 pbc;
127 u64 rhf;
128 } u;
129};
130
131static atomic_t diagpkt_count = ATOMIC_INIT(0);
132static struct cdev diagpkt_cdev;
133static struct device *diagpkt_device;
134
135static ssize_t diagpkt_write(struct file *fp, const char __user *data,
136 size_t count, loff_t *off);
137
138static const struct file_operations diagpkt_file_ops = {
139 .owner = THIS_MODULE,
140 .write = diagpkt_write,
141 .llseek = noop_llseek,
142};
143
144/*
145 * This is used for communication with user space for snoop extended IOCTLs
146 */
147struct hfi1_link_info {
148 __be64 node_guid;
149 u8 port_mode;
150 u8 port_state;
151 u16 link_speed_active;
152 u16 link_width_active;
153 u16 vl15_init;
154 u8 port_number;
155 /*
156 * Add padding to make this a full IB SMP payload. Note: changing the
157 * size of this structure will make the IOCTLs created with _IOWR
158 * change.
159 * Be sure to run tests on all IOCTLs when making changes to this
160 * structure.
161 */
162 u8 res[47];
163};
164
165/*
166 * This starts our ioctl sequence numbers *way* off from the ones
167 * defined in ib_core.
168 */
169#define SNOOP_CAPTURE_VERSION 0x1
170
171#define IB_IOCTL_MAGIC 0x1b /* See Documentation/ioctl-number.txt */
172#define HFI1_SNOOP_IOC_MAGIC IB_IOCTL_MAGIC
173#define HFI1_SNOOP_IOC_BASE_SEQ 0x80
174
175#define HFI1_SNOOP_IOCGETLINKSTATE \
176 _IO(HFI1_SNOOP_IOC_MAGIC, HFI1_SNOOP_IOC_BASE_SEQ)
177#define HFI1_SNOOP_IOCSETLINKSTATE \
178 _IO(HFI1_SNOOP_IOC_MAGIC, HFI1_SNOOP_IOC_BASE_SEQ + 1)
179#define HFI1_SNOOP_IOCCLEARQUEUE \
180 _IO(HFI1_SNOOP_IOC_MAGIC, HFI1_SNOOP_IOC_BASE_SEQ + 2)
181#define HFI1_SNOOP_IOCCLEARFILTER \
182 _IO(HFI1_SNOOP_IOC_MAGIC, HFI1_SNOOP_IOC_BASE_SEQ + 3)
183#define HFI1_SNOOP_IOCSETFILTER \
184 _IO(HFI1_SNOOP_IOC_MAGIC, HFI1_SNOOP_IOC_BASE_SEQ + 4)
185#define HFI1_SNOOP_IOCGETVERSION \
186 _IO(HFI1_SNOOP_IOC_MAGIC, HFI1_SNOOP_IOC_BASE_SEQ + 5)
187#define HFI1_SNOOP_IOCSET_OPTS \
188 _IO(HFI1_SNOOP_IOC_MAGIC, HFI1_SNOOP_IOC_BASE_SEQ + 6)
189
190/*
191 * These offsets +6/+7 could change, but these are already known and used
192 * IOCTL numbers so don't change them without a good reason.
193 */
194#define HFI1_SNOOP_IOCGETLINKSTATE_EXTRA \
195 _IOWR(HFI1_SNOOP_IOC_MAGIC, HFI1_SNOOP_IOC_BASE_SEQ + 6, \
196 struct hfi1_link_info)
197#define HFI1_SNOOP_IOCSETLINKSTATE_EXTRA \
198 _IOWR(HFI1_SNOOP_IOC_MAGIC, HFI1_SNOOP_IOC_BASE_SEQ + 7, \
199 struct hfi1_link_info)
200
201static int hfi1_snoop_open(struct inode *in, struct file *fp);
202static ssize_t hfi1_snoop_read(struct file *fp, char __user *data,
203 size_t pkt_len, loff_t *off);
204static ssize_t hfi1_snoop_write(struct file *fp, const char __user *data,
205 size_t count, loff_t *off);
206static long hfi1_ioctl(struct file *fp, unsigned int cmd, unsigned long arg);
207static unsigned int hfi1_snoop_poll(struct file *fp,
208 struct poll_table_struct *wait);
209static int hfi1_snoop_release(struct inode *in, struct file *fp);
210
211struct hfi1_packet_filter_command {
212 int opcode;
213 int length;
214 void *value_ptr;
215};
216
217/* Can't re-use PKT_DIR_*GRESS here because 0 means no packets for this */
218#define HFI1_SNOOP_INGRESS 0x1
219#define HFI1_SNOOP_EGRESS 0x2
220
221enum hfi1_packet_filter_opcodes {
222 FILTER_BY_LID,
223 FILTER_BY_DLID,
224 FILTER_BY_MAD_MGMT_CLASS,
225 FILTER_BY_QP_NUMBER,
226 FILTER_BY_PKT_TYPE,
227 FILTER_BY_SERVICE_LEVEL,
228 FILTER_BY_PKEY,
229 FILTER_BY_DIRECTION,
230};
231
232static const struct file_operations snoop_file_ops = {
233 .owner = THIS_MODULE,
234 .open = hfi1_snoop_open,
235 .read = hfi1_snoop_read,
236 .unlocked_ioctl = hfi1_ioctl,
237 .poll = hfi1_snoop_poll,
238 .write = hfi1_snoop_write,
239 .release = hfi1_snoop_release
240};
241
242struct hfi1_filter_array {
243 int (*filter)(void *, void *, void *);
244};
245
246static int hfi1_filter_lid(void *ibhdr, void *packet_data, void *value);
247static int hfi1_filter_dlid(void *ibhdr, void *packet_data, void *value);
248static int hfi1_filter_mad_mgmt_class(void *ibhdr, void *packet_data,
249 void *value);
250static int hfi1_filter_qp_number(void *ibhdr, void *packet_data, void *value);
251static int hfi1_filter_ibpacket_type(void *ibhdr, void *packet_data,
252 void *value);
253static int hfi1_filter_ib_service_level(void *ibhdr, void *packet_data,
254 void *value);
255static int hfi1_filter_ib_pkey(void *ibhdr, void *packet_data, void *value);
256static int hfi1_filter_direction(void *ibhdr, void *packet_data, void *value);
257
258static const struct hfi1_filter_array hfi1_filters[] = {
259 { hfi1_filter_lid },
260 { hfi1_filter_dlid },
261 { hfi1_filter_mad_mgmt_class },
262 { hfi1_filter_qp_number },
263 { hfi1_filter_ibpacket_type },
264 { hfi1_filter_ib_service_level },
265 { hfi1_filter_ib_pkey },
266 { hfi1_filter_direction },
267};
268
269#define HFI1_MAX_FILTERS ARRAY_SIZE(hfi1_filters)
270#define HFI1_DIAG_MINOR_BASE 129
271
272static int hfi1_snoop_add(struct hfi1_devdata *dd, const char *name);
273
274int hfi1_diag_add(struct hfi1_devdata *dd)
275{
276 char name[16];
277 int ret = 0;
278
279 snprintf(name, sizeof(name), "%s_diagpkt%d", class_name(),
280 dd->unit);
281 /*
282 * Do this for each device as opposed to the normal diagpkt
283 * interface which is one per host
284 */
285 ret = hfi1_snoop_add(dd, name);
286 if (ret)
287 dd_dev_err(dd, "Unable to init snoop/capture device");
288
289 snprintf(name, sizeof(name), "%s_diagpkt", class_name());
290 if (atomic_inc_return(&diagpkt_count) == 1) {
291 ret = hfi1_cdev_init(HFI1_DIAGPKT_MINOR, name,
292 &diagpkt_file_ops, &diagpkt_cdev,
293 &diagpkt_device, false);
294 }
295
296 return ret;
297}
298
299/* this must be called w/ dd->snoop_in_lock held */
300static void drain_snoop_list(struct list_head *queue)
301{
302 struct list_head *pos, *q;
303 struct snoop_packet *packet;
304
305 list_for_each_safe(pos, q, queue) {
306 packet = list_entry(pos, struct snoop_packet, list);
307 list_del(pos);
308 kfree(packet);
309 }
310}
311
312static void hfi1_snoop_remove(struct hfi1_devdata *dd)
313{
314 unsigned long flags = 0;
315
316 spin_lock_irqsave(&dd->hfi1_snoop.snoop_lock, flags);
317 drain_snoop_list(&dd->hfi1_snoop.queue);
318 hfi1_cdev_cleanup(&dd->hfi1_snoop.cdev, &dd->hfi1_snoop.class_dev);
319 spin_unlock_irqrestore(&dd->hfi1_snoop.snoop_lock, flags);
320}
321
322void hfi1_diag_remove(struct hfi1_devdata *dd)
323{
324 hfi1_snoop_remove(dd);
325 if (atomic_dec_and_test(&diagpkt_count))
326 hfi1_cdev_cleanup(&diagpkt_cdev, &diagpkt_device);
327 hfi1_cdev_cleanup(&dd->diag_cdev, &dd->diag_device);
328}
329
330/*
331 * Allocated structure shared between the credit return mechanism and
332 * diagpkt_send().
333 */
334struct diagpkt_wait {
335 struct completion credits_returned;
336 int code;
337 atomic_t count;
338};
339
340/*
341 * When each side is finished with the structure, they call this.
342 * The last user frees the structure.
343 */
344static void put_diagpkt_wait(struct diagpkt_wait *wait)
345{
346 if (atomic_dec_and_test(&wait->count))
347 kfree(wait);
348}
349
350/*
351 * Callback from the credit return code. Set the complete, which
352 * will let diapkt_send() continue.
353 */
354static void diagpkt_complete(void *arg, int code)
355{
356 struct diagpkt_wait *wait = (struct diagpkt_wait *)arg;
357
358 wait->code = code;
359 complete(&wait->credits_returned);
360 put_diagpkt_wait(wait); /* finished with the structure */
361}
362
363/**
364 * diagpkt_send - send a packet
365 * @dp: diag packet descriptor
366 */
367static ssize_t diagpkt_send(struct diag_pkt *dp)
368{
369 struct hfi1_devdata *dd;
370 struct send_context *sc;
371 struct pio_buf *pbuf;
372 u32 *tmpbuf = NULL;
373 ssize_t ret = 0;
374 u32 pkt_len, total_len;
375 pio_release_cb credit_cb = NULL;
376 void *credit_arg = NULL;
377 struct diagpkt_wait *wait = NULL;
378 int trycount = 0;
379
380 dd = hfi1_lookup(dp->unit);
381 if (!dd || !(dd->flags & HFI1_PRESENT) || !dd->kregbase) {
382 ret = -ENODEV;
383 goto bail;
384 }
385 if (!(dd->flags & HFI1_INITTED)) {
386 /* no hardware, freeze, etc. */
387 ret = -ENODEV;
388 goto bail;
389 }
390
391 if (dp->version != _DIAG_PKT_VERS) {
392 dd_dev_err(dd, "Invalid version %u for diagpkt_write\n",
393 dp->version);
394 ret = -EINVAL;
395 goto bail;
396 }
397
398 /* send count must be an exact number of dwords */
399 if (dp->len & 3) {
400 ret = -EINVAL;
401 goto bail;
402 }
403
404 /* there is only port 1 */
405 if (dp->port != 1) {
406 ret = -EINVAL;
407 goto bail;
408 }
409
410 /* need a valid context */
411 if (dp->sw_index >= dd->num_send_contexts) {
412 ret = -EINVAL;
413 goto bail;
414 }
415 /* can only use kernel contexts */
416 if (dd->send_contexts[dp->sw_index].type != SC_KERNEL &&
417 dd->send_contexts[dp->sw_index].type != SC_VL15) {
418 ret = -EINVAL;
419 goto bail;
420 }
421 /* must be allocated */
422 sc = dd->send_contexts[dp->sw_index].sc;
423 if (!sc) {
424 ret = -EINVAL;
425 goto bail;
426 }
427 /* must be enabled */
428 if (!(sc->flags & SCF_ENABLED)) {
429 ret = -EINVAL;
430 goto bail;
431 }
432
433 /* allocate a buffer and copy the data in */
434 tmpbuf = vmalloc(dp->len);
435 if (!tmpbuf) {
436 ret = -ENOMEM;
437 goto bail;
438 }
439
440 if (copy_from_user(tmpbuf,
441 (const void __user *)(unsigned long)dp->data,
442 dp->len)) {
443 ret = -EFAULT;
444 goto bail;
445 }
446
447 /*
448 * pkt_len is how much data we have to write, includes header and data.
449 * total_len is length of the packet in Dwords plus the PBC should not
450 * include the CRC.
451 */
452 pkt_len = dp->len >> 2;
453 total_len = pkt_len + 2; /* PBC + packet */
454
455 /* if 0, fill in a default */
456 if (dp->pbc == 0) {
457 struct hfi1_pportdata *ppd = dd->pport;
458
459 hfi1_cdbg(PKT, "Generating PBC");
460 dp->pbc = create_pbc(ppd, 0, 0, 0, total_len);
461 } else {
462 hfi1_cdbg(PKT, "Using passed in PBC");
463 }
464
465 hfi1_cdbg(PKT, "Egress PBC content is 0x%llx", dp->pbc);
466
467 /*
468 * The caller wants to wait until the packet is sent and to
469 * check for errors. The best we can do is wait until
470 * the buffer credits are returned and check if any packet
471 * error has occurred. If there are any late errors, this
472 * could miss it. If there are other senders who generate
473 * an error, this may find it. However, in general, it
474 * should catch most.
475 */
476 if (dp->flags & F_DIAGPKT_WAIT) {
477 /* always force a credit return */
478 dp->pbc |= PBC_CREDIT_RETURN;
479 /* turn on credit return interrupts */
480 sc_add_credit_return_intr(sc);
481 wait = kmalloc(sizeof(*wait), GFP_KERNEL);
482 if (!wait) {
483 ret = -ENOMEM;
484 goto bail;
485 }
486 init_completion(&wait->credits_returned);
487 atomic_set(&wait->count, 2);
488 wait->code = PRC_OK;
489
490 credit_cb = diagpkt_complete;
491 credit_arg = wait;
492 }
493
494retry:
495 pbuf = sc_buffer_alloc(sc, total_len, credit_cb, credit_arg);
496 if (!pbuf) {
497 if (trycount == 0) {
498 /* force a credit return and try again */
499 sc_return_credits(sc);
500 trycount = 1;
501 goto retry;
502 }
503 /*
504 * No send buffer means no credit callback. Undo
505 * the wait set-up that was done above. We free wait
506 * because the callback will never be called.
507 */
508 if (dp->flags & F_DIAGPKT_WAIT) {
509 sc_del_credit_return_intr(sc);
510 kfree(wait);
511 wait = NULL;
512 }
513 ret = -ENOSPC;
514 goto bail;
515 }
516
517 pio_copy(dd, pbuf, dp->pbc, tmpbuf, pkt_len);
518 /* no flush needed as the HW knows the packet size */
519
520 ret = sizeof(*dp);
521
522 if (dp->flags & F_DIAGPKT_WAIT) {
523 /* wait for credit return */
524 ret = wait_for_completion_interruptible(
525 &wait->credits_returned);
526 /*
527 * If the wait returns an error, the wait was interrupted,
528 * e.g. with a ^C in the user program. The callback is
529 * still pending. This is OK as the wait structure is
530 * kmalloc'ed and the structure will free itself when
531 * all users are done with it.
532 *
533 * A context disable occurs on a send context restart, so
534 * include that in the list of errors below to check for.
535 * NOTE: PRC_FILL_ERR is at best informational and cannot
536 * be depended on.
537 */
538 if (!ret && (((wait->code & PRC_STATUS_ERR) ||
539 (wait->code & PRC_FILL_ERR) ||
540 (wait->code & PRC_SC_DISABLE))))
541 ret = -EIO;
542
543 put_diagpkt_wait(wait); /* finished with the structure */
544 sc_del_credit_return_intr(sc);
545 }
546
547bail:
548 vfree(tmpbuf);
549 return ret;
550}
551
552static ssize_t diagpkt_write(struct file *fp, const char __user *data,
553 size_t count, loff_t *off)
554{
555 struct hfi1_devdata *dd;
556 struct send_context *sc;
557 u8 vl;
558
559 struct diag_pkt dp;
560
561 if (count != sizeof(dp))
562 return -EINVAL;
563
564 if (copy_from_user(&dp, data, sizeof(dp)))
565 return -EFAULT;
566
567 /*
568 * The Send Context is derived from the PbcVL value
569 * if PBC is populated
570 */
571 if (dp.pbc) {
572 dd = hfi1_lookup(dp.unit);
573 if (!dd)
574 return -ENODEV;
575 vl = (dp.pbc >> PBC_VL_SHIFT) & PBC_VL_MASK;
576 sc = dd->vld[vl].sc;
577 if (sc) {
578 dp.sw_index = sc->sw_index;
579 hfi1_cdbg(
580 PKT,
581 "Packet sent over VL %d via Send Context %u(%u)",
582 vl, sc->sw_index, sc->hw_context);
583 }
584 }
585
586 return diagpkt_send(&dp);
587}
588
589static int hfi1_snoop_add(struct hfi1_devdata *dd, const char *name)
590{
591 int ret = 0;
592
593 dd->hfi1_snoop.mode_flag = 0;
594 spin_lock_init(&dd->hfi1_snoop.snoop_lock);
595 INIT_LIST_HEAD(&dd->hfi1_snoop.queue);
596 init_waitqueue_head(&dd->hfi1_snoop.waitq);
597
598 ret = hfi1_cdev_init(HFI1_SNOOP_CAPTURE_BASE + dd->unit, name,
599 &snoop_file_ops,
600 &dd->hfi1_snoop.cdev, &dd->hfi1_snoop.class_dev,
601 false);
602
603 if (ret) {
604 dd_dev_err(dd, "Couldn't create %s device: %d", name, ret);
605 hfi1_cdev_cleanup(&dd->hfi1_snoop.cdev,
606 &dd->hfi1_snoop.class_dev);
607 }
608
609 return ret;
610}
611
612static struct hfi1_devdata *hfi1_dd_from_sc_inode(struct inode *in)
613{
614 int unit = iminor(in) - HFI1_SNOOP_CAPTURE_BASE;
615 struct hfi1_devdata *dd;
616
617 dd = hfi1_lookup(unit);
618 return dd;
619}
620
621/* clear or restore send context integrity checks */
622static void adjust_integrity_checks(struct hfi1_devdata *dd)
623{
624 struct send_context *sc;
625 unsigned long sc_flags;
626 int i;
627
628 spin_lock_irqsave(&dd->sc_lock, sc_flags);
629 for (i = 0; i < dd->num_send_contexts; i++) {
630 int enable;
631
632 sc = dd->send_contexts[i].sc;
633
634 if (!sc)
635 continue; /* not allocated */
636
637 enable = likely(!HFI1_CAP_IS_KSET(NO_INTEGRITY)) &&
638 dd->hfi1_snoop.mode_flag != HFI1_PORT_SNOOP_MODE;
639
640 set_pio_integrity(sc);
641
642 if (enable) /* take HFI_CAP_* flags into account */
643 hfi1_init_ctxt(sc);
644 }
645 spin_unlock_irqrestore(&dd->sc_lock, sc_flags);
646}
647
648static int hfi1_snoop_open(struct inode *in, struct file *fp)
649{
650 int ret;
651 int mode_flag = 0;
652 unsigned long flags = 0;
653 struct hfi1_devdata *dd;
654 struct list_head *queue;
655
656 mutex_lock(&hfi1_mutex);
657
658 dd = hfi1_dd_from_sc_inode(in);
659 if (!dd) {
660 ret = -ENODEV;
661 goto bail;
662 }
663
664 /*
665 * File mode determines snoop or capture. Some existing user
666 * applications expect the capture device to be able to be opened RDWR
667 * because they expect a dedicated capture device. For this reason we
668 * support a module param to force capture mode even if the file open
669 * mode matches snoop.
670 */
671 if ((fp->f_flags & O_ACCMODE) == O_RDONLY) {
672 snoop_dbg("Capture Enabled");
673 mode_flag = HFI1_PORT_CAPTURE_MODE;
674 } else if ((fp->f_flags & O_ACCMODE) == O_RDWR) {
675 snoop_dbg("Snoop Enabled");
676 mode_flag = HFI1_PORT_SNOOP_MODE;
677 } else {
678 snoop_dbg("Invalid");
679 ret = -EINVAL;
680 goto bail;
681 }
682 queue = &dd->hfi1_snoop.queue;
683
684 /*
685 * We are not supporting snoop and capture at the same time.
686 */
687 spin_lock_irqsave(&dd->hfi1_snoop.snoop_lock, flags);
688 if (dd->hfi1_snoop.mode_flag) {
689 ret = -EBUSY;
690 spin_unlock_irqrestore(&dd->hfi1_snoop.snoop_lock, flags);
691 goto bail;
692 }
693
694 dd->hfi1_snoop.mode_flag = mode_flag;
695 drain_snoop_list(queue);
696
697 dd->hfi1_snoop.filter_callback = NULL;
698 dd->hfi1_snoop.filter_value = NULL;
699
700 /*
701 * Send side packet integrity checks are not helpful when snooping so
702 * disable and re-enable when we stop snooping.
703 */
704 if (mode_flag == HFI1_PORT_SNOOP_MODE) {
705 /* clear after snoop mode is on */
706 adjust_integrity_checks(dd); /* clear */
707
708 /*
709 * We also do not want to be doing the DLID LMC check for
710 * ingressed packets.
711 */
712 dd->hfi1_snoop.dcc_cfg = read_csr(dd, DCC_CFG_PORT_CONFIG1);
713 write_csr(dd, DCC_CFG_PORT_CONFIG1,
714 (dd->hfi1_snoop.dcc_cfg >> 32) << 32);
715 }
716
717 /*
718 * As soon as we set these function pointers the recv and send handlers
719 * are active. This is a race condition so we must make sure to drain
720 * the queue and init filter values above. Technically we should add
721 * locking here but all that will happen is on recv a packet will get
722 * allocated and get stuck on the snoop_lock before getting added to the
723 * queue. Same goes for send.
724 */
725 dd->rhf_rcv_function_map = snoop_rhf_rcv_functions;
726 dd->process_pio_send = snoop_send_pio_handler;
727 dd->process_dma_send = snoop_send_pio_handler;
728 dd->pio_inline_send = snoop_inline_pio_send;
729
730 spin_unlock_irqrestore(&dd->hfi1_snoop.snoop_lock, flags);
731 ret = 0;
732
733bail:
734 mutex_unlock(&hfi1_mutex);
735
736 return ret;
737}
738
739static int hfi1_snoop_release(struct inode *in, struct file *fp)
740{
741 unsigned long flags = 0;
742 struct hfi1_devdata *dd;
743 int mode_flag;
744
745 dd = hfi1_dd_from_sc_inode(in);
746 if (!dd)
747 return -ENODEV;
748
749 spin_lock_irqsave(&dd->hfi1_snoop.snoop_lock, flags);
750
751 /* clear the snoop mode before re-adjusting send context CSRs */
752 mode_flag = dd->hfi1_snoop.mode_flag;
753 dd->hfi1_snoop.mode_flag = 0;
754
755 /*
756 * Drain the queue and clear the filters we are done with it. Don't
757 * forget to restore the packet integrity checks
758 */
759 drain_snoop_list(&dd->hfi1_snoop.queue);
760 if (mode_flag == HFI1_PORT_SNOOP_MODE) {
761 /* restore after snoop mode is clear */
762 adjust_integrity_checks(dd); /* restore */
763
764 /*
765 * Also should probably reset the DCC_CONFIG1 register for DLID
766 * checking on incoming packets again. Use the value saved when
767 * opening the snoop device.
768 */
769 write_csr(dd, DCC_CFG_PORT_CONFIG1, dd->hfi1_snoop.dcc_cfg);
770 }
771
772 dd->hfi1_snoop.filter_callback = NULL;
773 kfree(dd->hfi1_snoop.filter_value);
774 dd->hfi1_snoop.filter_value = NULL;
775
776 /*
777 * User is done snooping and capturing, return control to the normal
778 * handler. Re-enable SDMA handling.
779 */
780 dd->rhf_rcv_function_map = dd->normal_rhf_rcv_functions;
781 dd->process_pio_send = hfi1_verbs_send_pio;
782 dd->process_dma_send = hfi1_verbs_send_dma;
783 dd->pio_inline_send = pio_copy;
784
785 spin_unlock_irqrestore(&dd->hfi1_snoop.snoop_lock, flags);
786
787 snoop_dbg("snoop/capture device released");
788
789 return 0;
790}
791
792static unsigned int hfi1_snoop_poll(struct file *fp,
793 struct poll_table_struct *wait)
794{
795 int ret = 0;
796 unsigned long flags = 0;
797
798 struct hfi1_devdata *dd;
799
800 dd = hfi1_dd_from_sc_inode(fp->f_inode);
801 if (!dd)
802 return -ENODEV;
803
804 spin_lock_irqsave(&dd->hfi1_snoop.snoop_lock, flags);
805
806 poll_wait(fp, &dd->hfi1_snoop.waitq, wait);
807 if (!list_empty(&dd->hfi1_snoop.queue))
808 ret |= POLLIN | POLLRDNORM;
809
810 spin_unlock_irqrestore(&dd->hfi1_snoop.snoop_lock, flags);
811 return ret;
812}
813
814static ssize_t hfi1_snoop_write(struct file *fp, const char __user *data,
815 size_t count, loff_t *off)
816{
817 struct diag_pkt dpkt;
818 struct hfi1_devdata *dd;
819 size_t ret;
820 u8 byte_two, sl, sc5, sc4, vl, byte_one;
821 struct send_context *sc;
822 u32 len;
823 u64 pbc;
824 struct hfi1_ibport *ibp;
825 struct hfi1_pportdata *ppd;
826
827 dd = hfi1_dd_from_sc_inode(fp->f_inode);
828 if (!dd)
829 return -ENODEV;
830
831 ppd = dd->pport;
832 snoop_dbg("received %lu bytes from user", count);
833
834 memset(&dpkt, 0, sizeof(struct diag_pkt));
835 dpkt.version = _DIAG_PKT_VERS;
836 dpkt.unit = dd->unit;
837 dpkt.port = 1;
838
839 if (likely(!(snoop_flags & SNOOP_USE_METADATA))) {
840 /*
841 * We need to generate the PBC and not let diagpkt_send do it,
842 * to do this we need the VL and the length in dwords.
843 * The VL can be determined by using the SL and looking up the
844 * SC. Then the SC can be converted into VL. The exception to
845 * this is those packets which are from an SMI queue pair.
846 * Since we can't detect anything about the QP here we have to
847 * rely on the SC. If its 0xF then we assume its SMI and
848 * do not look at the SL.
849 */
850 if (copy_from_user(&byte_one, data, 1))
851 return -EINVAL;
852
853 if (copy_from_user(&byte_two, data + 1, 1))
854 return -EINVAL;
855
856 sc4 = (byte_one >> 4) & 0xf;
857 if (sc4 == 0xF) {
858 snoop_dbg("Detected VL15 packet ignoring SL in packet");
859 vl = sc4;
860 } else {
861 sl = (byte_two >> 4) & 0xf;
862 ibp = to_iport(&dd->verbs_dev.rdi.ibdev, 1);
863 sc5 = ibp->sl_to_sc[sl];
864 vl = sc_to_vlt(dd, sc5);
865 if (vl != sc4) {
866 snoop_dbg("VL %d does not match SC %d of packet",
867 vl, sc4);
868 return -EINVAL;
869 }
870 }
871
872 sc = dd->vld[vl].sc; /* Look up the context based on VL */
873 if (sc) {
874 dpkt.sw_index = sc->sw_index;
875 snoop_dbg("Sending on context %u(%u)", sc->sw_index,
876 sc->hw_context);
877 } else {
878 snoop_dbg("Could not find context for vl %d", vl);
879 return -EINVAL;
880 }
881
882 len = (count >> 2) + 2; /* Add in PBC */
883 pbc = create_pbc(ppd, 0, 0, vl, len);
884 } else {
885 if (copy_from_user(&pbc, data, sizeof(pbc)))
886 return -EINVAL;
887 vl = (pbc >> PBC_VL_SHIFT) & PBC_VL_MASK;
888 sc = dd->vld[vl].sc; /* Look up the context based on VL */
889 if (sc) {
890 dpkt.sw_index = sc->sw_index;
891 } else {
892 snoop_dbg("Could not find context for vl %d", vl);
893 return -EINVAL;
894 }
895 data += sizeof(pbc);
896 count -= sizeof(pbc);
897 }
898 dpkt.len = count;
899 dpkt.data = (unsigned long)data;
900
901 snoop_dbg("PBC: vl=0x%llx Length=0x%llx",
902 (pbc >> 12) & 0xf,
903 (pbc & 0xfff));
904
905 dpkt.pbc = pbc;
906 ret = diagpkt_send(&dpkt);
907 /*
908 * diagpkt_send only returns number of bytes in the diagpkt so patch
909 * that up here before returning.
910 */
911 if (ret == sizeof(dpkt))
912 return count;
913
914 return ret;
915}
916
917static ssize_t hfi1_snoop_read(struct file *fp, char __user *data,
918 size_t pkt_len, loff_t *off)
919{
920 ssize_t ret = 0;
921 unsigned long flags = 0;
922 struct snoop_packet *packet = NULL;
923 struct hfi1_devdata *dd;
924
925 dd = hfi1_dd_from_sc_inode(fp->f_inode);
926 if (!dd)
927 return -ENODEV;
928
929 spin_lock_irqsave(&dd->hfi1_snoop.snoop_lock, flags);
930
931 while (list_empty(&dd->hfi1_snoop.queue)) {
932 spin_unlock_irqrestore(&dd->hfi1_snoop.snoop_lock, flags);
933
934 if (fp->f_flags & O_NONBLOCK)
935 return -EAGAIN;
936
937 if (wait_event_interruptible(
938 dd->hfi1_snoop.waitq,
939 !list_empty(&dd->hfi1_snoop.queue)))
940 return -EINTR;
941
942 spin_lock_irqsave(&dd->hfi1_snoop.snoop_lock, flags);
943 }
944
945 if (!list_empty(&dd->hfi1_snoop.queue)) {
946 packet = list_entry(dd->hfi1_snoop.queue.next,
947 struct snoop_packet, list);
948 list_del(&packet->list);
949 spin_unlock_irqrestore(&dd->hfi1_snoop.snoop_lock, flags);
950 if (pkt_len >= packet->total_len) {
951 if (copy_to_user(data, packet->data,
952 packet->total_len))
953 ret = -EFAULT;
954 else
955 ret = packet->total_len;
956 } else {
957 ret = -EINVAL;
958 }
959
960 kfree(packet);
961 } else {
962 spin_unlock_irqrestore(&dd->hfi1_snoop.snoop_lock, flags);
963 }
964
965 return ret;
966}
967
968/**
969 * hfi1_assign_snoop_link_credits -- Set up credits for VL15 and others
970 * @ppd : ptr to hfi1 port data
971 * @value : options from user space
972 *
973 * Assumes the rest of the CM credit registers are zero from a
974 * previous global or credit reset.
975 * Leave shared count at zero for both global and all vls.
976 * In snoop mode ideally we don't use shared credits
977 * Reserve 8.5k for VL15
978 * If total credits less than 8.5kbytes return error.
979 * Divide the rest of the credits across VL0 to VL7 and if
980 * each of these levels has less than 34 credits (at least 2048 + 128 bytes)
981 * return with an error.
982 * The credit registers will be reset to zero on link negotiation or link up
983 * so this function should be activated from user space only if the port has
984 * gone past link negotiation and link up.
985 *
986 * Return -- 0 if successful else error condition
987 *
988 */
989static long hfi1_assign_snoop_link_credits(struct hfi1_pportdata *ppd,
990 int value)
991{
992#define OPA_MIN_PER_VL_CREDITS 34 /* 2048 + 128 bytes */
993 struct buffer_control t;
994 int i;
995 struct hfi1_devdata *dd = ppd->dd;
996 u16 total_credits = (value >> 16) & 0xffff;
997 u16 vl15_credits = dd->vl15_init / 2;
998 u16 per_vl_credits;
999 __be16 be_per_vl_credits;
1000
1001 if (!(ppd->host_link_state & HLS_UP))
1002 goto err_exit;
1003 if (total_credits < vl15_credits)
1004 goto err_exit;
1005
1006 per_vl_credits = (total_credits - vl15_credits) / TXE_NUM_DATA_VL;
1007
1008 if (per_vl_credits < OPA_MIN_PER_VL_CREDITS)
1009 goto err_exit;
1010
1011 memset(&t, 0, sizeof(t));
1012 be_per_vl_credits = cpu_to_be16(per_vl_credits);
1013
1014 for (i = 0; i < TXE_NUM_DATA_VL; i++)
1015 t.vl[i].dedicated = be_per_vl_credits;
1016
1017 t.vl[15].dedicated = cpu_to_be16(vl15_credits);
1018 return set_buffer_control(ppd, &t);
1019
1020err_exit:
1021 snoop_dbg("port_state = 0x%x, total_credits = %d, vl15_credits = %d",
1022 ppd->host_link_state, total_credits, vl15_credits);
1023
1024 return -EINVAL;
1025}
1026
1027static long hfi1_ioctl(struct file *fp, unsigned int cmd, unsigned long arg)
1028{
1029 struct hfi1_devdata *dd;
1030 void *filter_value = NULL;
1031 long ret = 0;
1032 int value = 0;
1033 u8 phys_state = 0;
1034 u8 link_state = 0;
1035 u16 dev_state = 0;
1036 unsigned long flags = 0;
1037 unsigned long *argp = NULL;
1038 struct hfi1_packet_filter_command filter_cmd = {0};
1039 int mode_flag = 0;
1040 struct hfi1_pportdata *ppd = NULL;
1041 unsigned int index;
1042 struct hfi1_link_info link_info;
1043 int read_cmd, write_cmd, read_ok, write_ok;
1044
1045 dd = hfi1_dd_from_sc_inode(fp->f_inode);
1046 if (!dd)
1047 return -ENODEV;
1048
1049 mode_flag = dd->hfi1_snoop.mode_flag;
1050 read_cmd = _IOC_DIR(cmd) & _IOC_READ;
1051 write_cmd = _IOC_DIR(cmd) & _IOC_WRITE;
1052 write_ok = access_ok(VERIFY_WRITE, (void __user *)arg, _IOC_SIZE(cmd));
1053 read_ok = access_ok(VERIFY_READ, (void __user *)arg, _IOC_SIZE(cmd));
1054
1055 if ((read_cmd && !write_ok) || (write_cmd && !read_ok))
1056 return -EFAULT;
1057
1058 if (!capable(CAP_SYS_ADMIN))
1059 return -EPERM;
1060
1061 if ((mode_flag & HFI1_PORT_CAPTURE_MODE) &&
1062 (cmd != HFI1_SNOOP_IOCCLEARQUEUE) &&
1063 (cmd != HFI1_SNOOP_IOCCLEARFILTER) &&
1064 (cmd != HFI1_SNOOP_IOCSETFILTER))
1065 /* Capture devices are allowed only 3 operations
1066 * 1.Clear capture queue
1067 * 2.Clear capture filter
1068 * 3.Set capture filter
1069 * Other are invalid.
1070 */
1071 return -EINVAL;
1072
1073 switch (cmd) {
1074 case HFI1_SNOOP_IOCSETLINKSTATE_EXTRA:
1075 memset(&link_info, 0, sizeof(link_info));
1076
1077 if (copy_from_user(&link_info,
1078 (struct hfi1_link_info __user *)arg,
1079 sizeof(link_info)))
1080 return -EFAULT;
1081
1082 value = link_info.port_state;
1083 index = link_info.port_number;
1084 if (index > dd->num_pports - 1)
1085 return -EINVAL;
1086
1087 ppd = &dd->pport[index];
1088 if (!ppd)
1089 return -EINVAL;
1090
1091 /* What we want to transition to */
1092 phys_state = (value >> 4) & 0xF;
1093 link_state = value & 0xF;
1094 snoop_dbg("Setting link state 0x%x", value);
1095
1096 switch (link_state) {
1097 case IB_PORT_NOP:
1098 if (phys_state == 0)
1099 break;
1100 /* fall through */
1101 case IB_PORT_DOWN:
1102 switch (phys_state) {
1103 case 0:
1104 dev_state = HLS_DN_DOWNDEF;
1105 break;
1106 case 2:
1107 dev_state = HLS_DN_POLL;
1108 break;
1109 case 3:
1110 dev_state = HLS_DN_DISABLE;
1111 break;
1112 default:
1113 return -EINVAL;
1114 }
1115 ret = set_link_state(ppd, dev_state);
1116 break;
1117 case IB_PORT_ARMED:
1118 ret = set_link_state(ppd, HLS_UP_ARMED);
1119 if (!ret)
1120 send_idle_sma(dd, SMA_IDLE_ARM);
1121 break;
1122 case IB_PORT_ACTIVE:
1123 ret = set_link_state(ppd, HLS_UP_ACTIVE);
1124 if (!ret)
1125 send_idle_sma(dd, SMA_IDLE_ACTIVE);
1126 break;
1127 default:
1128 return -EINVAL;
1129 }
1130
1131 if (ret)
1132 break;
1133 /* fall through */
1134 case HFI1_SNOOP_IOCGETLINKSTATE:
1135 case HFI1_SNOOP_IOCGETLINKSTATE_EXTRA:
1136 if (cmd == HFI1_SNOOP_IOCGETLINKSTATE_EXTRA) {
1137 memset(&link_info, 0, sizeof(link_info));
1138 if (copy_from_user(&link_info,
1139 (struct hfi1_link_info __user *)arg,
1140 sizeof(link_info)))
1141 return -EFAULT;
1142 index = link_info.port_number;
1143 } else {
1144 ret = __get_user(index, (int __user *)arg);
1145 if (ret != 0)
1146 break;
1147 }
1148
1149 if (index > dd->num_pports - 1)
1150 return -EINVAL;
1151
1152 ppd = &dd->pport[index];
1153 if (!ppd)
1154 return -EINVAL;
1155
1156 value = hfi1_ibphys_portstate(ppd);
1157 value <<= 4;
1158 value |= driver_lstate(ppd);
1159
1160 snoop_dbg("Link port | Link State: %d", value);
1161
1162 if ((cmd == HFI1_SNOOP_IOCGETLINKSTATE_EXTRA) ||
1163 (cmd == HFI1_SNOOP_IOCSETLINKSTATE_EXTRA)) {
1164 link_info.port_state = value;
1165 link_info.node_guid = cpu_to_be64(ppd->guid);
1166 link_info.link_speed_active =
1167 ppd->link_speed_active;
1168 link_info.link_width_active =
1169 ppd->link_width_active;
1170 if (copy_to_user((struct hfi1_link_info __user *)arg,
1171 &link_info, sizeof(link_info)))
1172 return -EFAULT;
1173 } else {
1174 ret = __put_user(value, (int __user *)arg);
1175 }
1176 break;
1177
1178 case HFI1_SNOOP_IOCCLEARQUEUE:
1179 snoop_dbg("Clearing snoop queue");
1180 spin_lock_irqsave(&dd->hfi1_snoop.snoop_lock, flags);
1181 drain_snoop_list(&dd->hfi1_snoop.queue);
1182 spin_unlock_irqrestore(&dd->hfi1_snoop.snoop_lock, flags);
1183 break;
1184
1185 case HFI1_SNOOP_IOCCLEARFILTER:
1186 snoop_dbg("Clearing filter");
1187 spin_lock_irqsave(&dd->hfi1_snoop.snoop_lock, flags);
1188 if (dd->hfi1_snoop.filter_callback) {
1189 /* Drain packets first */
1190 drain_snoop_list(&dd->hfi1_snoop.queue);
1191 dd->hfi1_snoop.filter_callback = NULL;
1192 }
1193 kfree(dd->hfi1_snoop.filter_value);
1194 dd->hfi1_snoop.filter_value = NULL;
1195 spin_unlock_irqrestore(&dd->hfi1_snoop.snoop_lock, flags);
1196 break;
1197
1198 case HFI1_SNOOP_IOCSETFILTER:
1199 snoop_dbg("Setting filter");
1200 /* just copy command structure */
1201 argp = (unsigned long *)arg;
1202 if (copy_from_user(&filter_cmd, (void __user *)argp,
1203 sizeof(filter_cmd)))
1204 return -EFAULT;
1205
1206 if (filter_cmd.opcode >= HFI1_MAX_FILTERS) {
1207 pr_alert("Invalid opcode in request\n");
1208 return -EINVAL;
1209 }
1210
1211 snoop_dbg("Opcode %d Len %d Ptr %p",
1212 filter_cmd.opcode, filter_cmd.length,
1213 filter_cmd.value_ptr);
1214
1215 filter_value = kcalloc(filter_cmd.length, sizeof(u8),
1216 GFP_KERNEL);
1217 if (!filter_value)
1218 return -ENOMEM;
1219
1220 /* copy remaining data from userspace */
1221 if (copy_from_user((u8 *)filter_value,
1222 (void __user *)filter_cmd.value_ptr,
1223 filter_cmd.length)) {
1224 kfree(filter_value);
1225 return -EFAULT;
1226 }
1227 /* Drain packets first */
1228 spin_lock_irqsave(&dd->hfi1_snoop.snoop_lock, flags);
1229 drain_snoop_list(&dd->hfi1_snoop.queue);
1230 dd->hfi1_snoop.filter_callback =
1231 hfi1_filters[filter_cmd.opcode].filter;
1232 /* just in case we see back to back sets */
1233 kfree(dd->hfi1_snoop.filter_value);
1234 dd->hfi1_snoop.filter_value = filter_value;
1235 spin_unlock_irqrestore(&dd->hfi1_snoop.snoop_lock, flags);
1236 break;
1237 case HFI1_SNOOP_IOCGETVERSION:
1238 value = SNOOP_CAPTURE_VERSION;
1239 snoop_dbg("Getting version: %d", value);
1240 ret = __put_user(value, (int __user *)arg);
1241 break;
1242 case HFI1_SNOOP_IOCSET_OPTS:
1243 snoop_flags = 0;
1244 ret = __get_user(value, (int __user *)arg);
1245 if (ret != 0)
1246 break;
1247
1248 snoop_dbg("Setting snoop option %d", value);
1249 if (value & SNOOP_DROP_SEND)
1250 snoop_flags |= SNOOP_DROP_SEND;
1251 if (value & SNOOP_USE_METADATA)
1252 snoop_flags |= SNOOP_USE_METADATA;
1253 if (value & (SNOOP_SET_VL0TOVL15)) {
1254 ppd = &dd->pport[0]; /* first port will do */
1255 ret = hfi1_assign_snoop_link_credits(ppd, value);
1256 }
1257 break;
1258 default:
1259 return -ENOTTY;
1260 }
1261
1262 return ret;
1263}
1264
1265static void snoop_list_add_tail(struct snoop_packet *packet,
1266 struct hfi1_devdata *dd)
1267{
1268 unsigned long flags = 0;
1269
1270 spin_lock_irqsave(&dd->hfi1_snoop.snoop_lock, flags);
1271 if (likely((dd->hfi1_snoop.mode_flag & HFI1_PORT_SNOOP_MODE) ||
1272 (dd->hfi1_snoop.mode_flag & HFI1_PORT_CAPTURE_MODE))) {
1273 list_add_tail(&packet->list, &dd->hfi1_snoop.queue);
1274 snoop_dbg("Added packet to list");
1275 }
1276
1277 /*
1278 * Technically we can could have closed the snoop device while waiting
1279 * on the above lock and it is gone now. The snoop mode_flag will
1280 * prevent us from adding the packet to the queue though.
1281 */
1282
1283 spin_unlock_irqrestore(&dd->hfi1_snoop.snoop_lock, flags);
1284 wake_up_interruptible(&dd->hfi1_snoop.waitq);
1285}
1286
1287static inline int hfi1_filter_check(void *val, const char *msg)
1288{
1289 if (!val) {
1290 snoop_dbg("Error invalid %s value for filter", msg);
1291 return HFI1_FILTER_ERR;
1292 }
1293 return 0;
1294}
1295
1296static int hfi1_filter_lid(void *ibhdr, void *packet_data, void *value)
1297{
1298 struct hfi1_ib_header *hdr;
1299 int ret;
1300
1301 ret = hfi1_filter_check(ibhdr, "header");
1302 if (ret)
1303 return ret;
1304 ret = hfi1_filter_check(value, "user");
1305 if (ret)
1306 return ret;
1307 hdr = (struct hfi1_ib_header *)ibhdr;
1308
1309 if (*((u16 *)value) == be16_to_cpu(hdr->lrh[3])) /* matches slid */
1310 return HFI1_FILTER_HIT; /* matched */
1311
1312 return HFI1_FILTER_MISS; /* Not matched */
1313}
1314
1315static int hfi1_filter_dlid(void *ibhdr, void *packet_data, void *value)
1316{
1317 struct hfi1_ib_header *hdr;
1318 int ret;
1319
1320 ret = hfi1_filter_check(ibhdr, "header");
1321 if (ret)
1322 return ret;
1323 ret = hfi1_filter_check(value, "user");
1324 if (ret)
1325 return ret;
1326
1327 hdr = (struct hfi1_ib_header *)ibhdr;
1328
1329 if (*((u16 *)value) == be16_to_cpu(hdr->lrh[1]))
1330 return HFI1_FILTER_HIT;
1331
1332 return HFI1_FILTER_MISS;
1333}
1334
1335/* Not valid for outgoing packets, send handler passes null for data*/
1336static int hfi1_filter_mad_mgmt_class(void *ibhdr, void *packet_data,
1337 void *value)
1338{
1339 struct hfi1_ib_header *hdr;
1340 struct hfi1_other_headers *ohdr = NULL;
1341 struct ib_smp *smp = NULL;
1342 u32 qpn = 0;
1343 int ret;
1344
1345 ret = hfi1_filter_check(ibhdr, "header");
1346 if (ret)
1347 return ret;
1348 ret = hfi1_filter_check(packet_data, "packet_data");
1349 if (ret)
1350 return ret;
1351 ret = hfi1_filter_check(value, "user");
1352 if (ret)
1353 return ret;
1354
1355 hdr = (struct hfi1_ib_header *)ibhdr;
1356
1357 /* Check for GRH */
1358 if ((be16_to_cpu(hdr->lrh[0]) & 3) == HFI1_LRH_BTH)
1359 ohdr = &hdr->u.oth; /* LRH + BTH + DETH */
1360 else
1361 ohdr = &hdr->u.l.oth; /* LRH + GRH + BTH + DETH */
1362
1363 qpn = be32_to_cpu(ohdr->bth[1]) & 0x00FFFFFF;
1364 if (qpn <= 1) {
1365 smp = (struct ib_smp *)packet_data;
1366 if (*((u8 *)value) == smp->mgmt_class)
1367 return HFI1_FILTER_HIT;
1368 else
1369 return HFI1_FILTER_MISS;
1370 }
1371 return HFI1_FILTER_ERR;
1372}
1373
1374static int hfi1_filter_qp_number(void *ibhdr, void *packet_data, void *value)
1375{
1376 struct hfi1_ib_header *hdr;
1377 struct hfi1_other_headers *ohdr = NULL;
1378 int ret;
1379
1380 ret = hfi1_filter_check(ibhdr, "header");
1381 if (ret)
1382 return ret;
1383 ret = hfi1_filter_check(value, "user");
1384 if (ret)
1385 return ret;
1386
1387 hdr = (struct hfi1_ib_header *)ibhdr;
1388
1389 /* Check for GRH */
1390 if ((be16_to_cpu(hdr->lrh[0]) & 3) == HFI1_LRH_BTH)
1391 ohdr = &hdr->u.oth; /* LRH + BTH + DETH */
1392 else
1393 ohdr = &hdr->u.l.oth; /* LRH + GRH + BTH + DETH */
1394 if (*((u32 *)value) == (be32_to_cpu(ohdr->bth[1]) & 0x00FFFFFF))
1395 return HFI1_FILTER_HIT;
1396
1397 return HFI1_FILTER_MISS;
1398}
1399
1400static int hfi1_filter_ibpacket_type(void *ibhdr, void *packet_data,
1401 void *value)
1402{
1403 u32 lnh = 0;
1404 u8 opcode = 0;
1405 struct hfi1_ib_header *hdr;
1406 struct hfi1_other_headers *ohdr = NULL;
1407 int ret;
1408
1409 ret = hfi1_filter_check(ibhdr, "header");
1410 if (ret)
1411 return ret;
1412 ret = hfi1_filter_check(value, "user");
1413 if (ret)
1414 return ret;
1415
1416 hdr = (struct hfi1_ib_header *)ibhdr;
1417
1418 lnh = (be16_to_cpu(hdr->lrh[0]) & 3);
1419
1420 if (lnh == HFI1_LRH_BTH)
1421 ohdr = &hdr->u.oth;
1422 else if (lnh == HFI1_LRH_GRH)
1423 ohdr = &hdr->u.l.oth;
1424 else
1425 return HFI1_FILTER_ERR;
1426
1427 opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
1428
1429 if (*((u8 *)value) == ((opcode >> 5) & 0x7))
1430 return HFI1_FILTER_HIT;
1431
1432 return HFI1_FILTER_MISS;
1433}
1434
1435static int hfi1_filter_ib_service_level(void *ibhdr, void *packet_data,
1436 void *value)
1437{
1438 struct hfi1_ib_header *hdr;
1439 int ret;
1440
1441 ret = hfi1_filter_check(ibhdr, "header");
1442 if (ret)
1443 return ret;
1444 ret = hfi1_filter_check(value, "user");
1445 if (ret)
1446 return ret;
1447
1448 hdr = (struct hfi1_ib_header *)ibhdr;
1449
1450 if ((*((u8 *)value)) == ((be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF))
1451 return HFI1_FILTER_HIT;
1452
1453 return HFI1_FILTER_MISS;
1454}
1455
1456static int hfi1_filter_ib_pkey(void *ibhdr, void *packet_data, void *value)
1457{
1458 u32 lnh = 0;
1459 struct hfi1_ib_header *hdr;
1460 struct hfi1_other_headers *ohdr = NULL;
1461 int ret;
1462
1463 ret = hfi1_filter_check(ibhdr, "header");
1464 if (ret)
1465 return ret;
1466 ret = hfi1_filter_check(value, "user");
1467 if (ret)
1468 return ret;
1469
1470 hdr = (struct hfi1_ib_header *)ibhdr;
1471
1472 lnh = (be16_to_cpu(hdr->lrh[0]) & 3);
1473 if (lnh == HFI1_LRH_BTH)
1474 ohdr = &hdr->u.oth;
1475 else if (lnh == HFI1_LRH_GRH)
1476 ohdr = &hdr->u.l.oth;
1477 else
1478 return HFI1_FILTER_ERR;
1479
1480 /* P_key is 16-bit entity, however top most bit indicates
1481 * type of membership. 0 for limited and 1 for Full.
1482 * Limited members cannot accept information from other
1483 * Limited members, but communication is allowed between
1484 * every other combination of membership.
1485 * Hence we'll omit comparing top-most bit while filtering
1486 */
1487
1488 if ((*(u16 *)value & 0x7FFF) ==
1489 ((be32_to_cpu(ohdr->bth[0])) & 0x7FFF))
1490 return HFI1_FILTER_HIT;
1491
1492 return HFI1_FILTER_MISS;
1493}
1494
1495/*
1496 * If packet_data is NULL then this is coming from one of the send functions.
1497 * Thus we know if its an ingressed or egressed packet.
1498 */
1499static int hfi1_filter_direction(void *ibhdr, void *packet_data, void *value)
1500{
1501 u8 user_dir = *(u8 *)value;
1502 int ret;
1503
1504 ret = hfi1_filter_check(value, "user");
1505 if (ret)
1506 return ret;
1507
1508 if (packet_data) {
1509 /* Incoming packet */
1510 if (user_dir & HFI1_SNOOP_INGRESS)
1511 return HFI1_FILTER_HIT;
1512 } else {
1513 /* Outgoing packet */
1514 if (user_dir & HFI1_SNOOP_EGRESS)
1515 return HFI1_FILTER_HIT;
1516 }
1517
1518 return HFI1_FILTER_MISS;
1519}
1520
1521/*
1522 * Allocate a snoop packet. The structure that is stored in the ring buffer, not
1523 * to be confused with an hfi packet type.
1524 */
1525static struct snoop_packet *allocate_snoop_packet(u32 hdr_len,
1526 u32 data_len,
1527 u32 md_len)
1528{
1529 struct snoop_packet *packet;
1530
1531 packet = kzalloc(sizeof(*packet) + hdr_len + data_len
1532 + md_len,
1533 GFP_ATOMIC | __GFP_NOWARN);
1534 if (likely(packet))
1535 INIT_LIST_HEAD(&packet->list);
1536
1537 return packet;
1538}
1539
1540/*
1541 * Instead of having snoop and capture code intermixed with the recv functions,
1542 * both the interrupt handler and hfi1_ib_rcv() we are going to hijack the call
1543 * and land in here for snoop/capture but if not enabled the call will go
1544 * through as before. This gives us a single point to constrain all of the snoop
1545 * snoop recv logic. There is nothing special that needs to happen for bypass
1546 * packets. This routine should not try to look into the packet. It just copied
1547 * it. There is no guarantee for filters when it comes to bypass packets as
1548 * there is no specific support. Bottom line is this routine does now even know
1549 * what a bypass packet is.
1550 */
1551int snoop_recv_handler(struct hfi1_packet *packet)
1552{
1553 struct hfi1_pportdata *ppd = packet->rcd->ppd;
1554 struct hfi1_ib_header *hdr = packet->hdr;
1555 int header_size = packet->hlen;
1556 void *data = packet->ebuf;
1557 u32 tlen = packet->tlen;
1558 struct snoop_packet *s_packet = NULL;
1559 int ret;
1560 int snoop_mode = 0;
1561 u32 md_len = 0;
1562 struct capture_md md;
1563
1564 snoop_dbg("PACKET IN: hdr size %d tlen %d data %p", header_size, tlen,
1565 data);
1566
1567 trace_snoop_capture(ppd->dd, header_size, hdr, tlen - header_size,
1568 data);
1569
1570 if (!ppd->dd->hfi1_snoop.filter_callback) {
1571 snoop_dbg("filter not set");
1572 ret = HFI1_FILTER_HIT;
1573 } else {
1574 ret = ppd->dd->hfi1_snoop.filter_callback(hdr, data,
1575 ppd->dd->hfi1_snoop.filter_value);
1576 }
1577
1578 switch (ret) {
1579 case HFI1_FILTER_ERR:
1580 snoop_dbg("Error in filter call");
1581 break;
1582 case HFI1_FILTER_MISS:
1583 snoop_dbg("Filter Miss");
1584 break;
1585 case HFI1_FILTER_HIT:
1586
1587 if (ppd->dd->hfi1_snoop.mode_flag & HFI1_PORT_SNOOP_MODE)
1588 snoop_mode = 1;
1589 if ((snoop_mode == 0) ||
1590 unlikely(snoop_flags & SNOOP_USE_METADATA))
1591 md_len = sizeof(struct capture_md);
1592
1593 s_packet = allocate_snoop_packet(header_size,
1594 tlen - header_size,
1595 md_len);
1596
1597 if (unlikely(!s_packet)) {
1598 dd_dev_warn_ratelimited(ppd->dd, "Unable to allocate snoop/capture packet\n");
1599 break;
1600 }
1601
1602 if (md_len > 0) {
1603 memset(&md, 0, sizeof(struct capture_md));
1604 md.port = 1;
1605 md.dir = PKT_DIR_INGRESS;
1606 md.u.rhf = packet->rhf;
1607 memcpy(s_packet->data, &md, md_len);
1608 }
1609
1610 /* We should always have a header */
1611 if (hdr) {
1612 memcpy(s_packet->data + md_len, hdr, header_size);
1613 } else {
1614 dd_dev_err(ppd->dd, "Unable to copy header to snoop/capture packet\n");
1615 kfree(s_packet);
1616 break;
1617 }
1618
1619 /*
1620 * Packets with no data are possible. If there is no data needed
1621 * to take care of the last 4 bytes which are normally included
1622 * with data buffers and are included in tlen. Since we kzalloc
1623 * the buffer we do not need to set any values but if we decide
1624 * not to use kzalloc we should zero them.
1625 */
1626 if (data)
1627 memcpy(s_packet->data + header_size + md_len, data,
1628 tlen - header_size);
1629
1630 s_packet->total_len = tlen + md_len;
1631 snoop_list_add_tail(s_packet, ppd->dd);
1632
1633 /*
1634 * If we are snooping the packet not capturing then throw away
1635 * after adding to the list.
1636 */
1637 snoop_dbg("Capturing packet");
1638 if (ppd->dd->hfi1_snoop.mode_flag & HFI1_PORT_SNOOP_MODE) {
1639 snoop_dbg("Throwing packet away");
1640 /*
1641 * If we are dropping the packet we still may need to
1642 * handle the case where error flags are set, this is
1643 * normally done by the type specific handler but that
1644 * won't be called in this case.
1645 */
1646 if (unlikely(rhf_err_flags(packet->rhf)))
1647 handle_eflags(packet);
1648
1649 /* throw the packet on the floor */
1650 return RHF_RCV_CONTINUE;
1651 }
1652 break;
1653 default:
1654 break;
1655 }
1656
1657 /*
1658 * We do not care what type of packet came in here - just pass it off
1659 * to the normal handler.
1660 */
1661 return ppd->dd->normal_rhf_rcv_functions[rhf_rcv_type(packet->rhf)]
1662 (packet);
1663}
1664
1665/*
1666 * Handle snooping and capturing packets when sdma is being used.
1667 */
1668int snoop_send_dma_handler(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
1669 u64 pbc)
1670{
1671 pr_alert("Snooping/Capture of Send DMA Packets Is Not Supported!\n");
1672 snoop_dbg("Unsupported Operation");
1673 return hfi1_verbs_send_dma(qp, ps, 0);
1674}
1675
1676/*
1677 * Handle snooping and capturing packets when pio is being used. Does not handle
1678 * bypass packets. The only way to send a bypass packet currently is to use the
1679 * diagpkt interface. When that interface is enable snoop/capture is not.
1680 */
1681int snoop_send_pio_handler(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
1682 u64 pbc)
1683{
1684 u32 hdrwords = qp->s_hdrwords;
1685 struct rvt_sge_state *ss = qp->s_cur_sge;
1686 u32 len = qp->s_cur_size;
1687 u32 dwords = (len + 3) >> 2;
1688 u32 plen = hdrwords + dwords + 2; /* includes pbc */
1689 struct hfi1_pportdata *ppd = ps->ppd;
1690 struct snoop_packet *s_packet = NULL;
1691 u32 *hdr = (u32 *)&ps->s_txreq->phdr.hdr;
1692 u32 length = 0;
1693 struct rvt_sge_state temp_ss;
1694 void *data = NULL;
1695 void *data_start = NULL;
1696 int ret;
1697 int snoop_mode = 0;
1698 int md_len = 0;
1699 struct capture_md md;
1700 u32 vl;
1701 u32 hdr_len = hdrwords << 2;
1702 u32 tlen = HFI1_GET_PKT_LEN(&ps->s_txreq->phdr.hdr);
1703
1704 md.u.pbc = 0;
1705
1706 snoop_dbg("PACKET OUT: hdrword %u len %u plen %u dwords %u tlen %u",
1707 hdrwords, len, plen, dwords, tlen);
1708 if (ppd->dd->hfi1_snoop.mode_flag & HFI1_PORT_SNOOP_MODE)
1709 snoop_mode = 1;
1710 if ((snoop_mode == 0) ||
1711 unlikely(snoop_flags & SNOOP_USE_METADATA))
1712 md_len = sizeof(struct capture_md);
1713
1714 /* not using ss->total_len as arg 2 b/c that does not count CRC */
1715 s_packet = allocate_snoop_packet(hdr_len, tlen - hdr_len, md_len);
1716
1717 if (unlikely(!s_packet)) {
1718 dd_dev_warn_ratelimited(ppd->dd, "Unable to allocate snoop/capture packet\n");
1719 goto out;
1720 }
1721
1722 s_packet->total_len = tlen + md_len;
1723
1724 if (md_len > 0) {
1725 memset(&md, 0, sizeof(struct capture_md));
1726 md.port = 1;
1727 md.dir = PKT_DIR_EGRESS;
1728 if (likely(pbc == 0)) {
1729 vl = be16_to_cpu(ps->s_txreq->phdr.hdr.lrh[0]) >> 12;
1730 md.u.pbc = create_pbc(ppd, 0, qp->s_srate, vl, plen);
1731 } else {
1732 md.u.pbc = 0;
1733 }
1734 memcpy(s_packet->data, &md, md_len);
1735 } else {
1736 md.u.pbc = pbc;
1737 }
1738
1739 /* Copy header */
1740 if (likely(hdr)) {
1741 memcpy(s_packet->data + md_len, hdr, hdr_len);
1742 } else {
1743 dd_dev_err(ppd->dd,
1744 "Unable to copy header to snoop/capture packet\n");
1745 kfree(s_packet);
1746 goto out;
1747 }
1748
1749 if (ss) {
1750 data = s_packet->data + hdr_len + md_len;
1751 data_start = data;
1752
1753 /*
1754 * Copy SGE State
1755 * The update_sge() function below will not modify the
1756 * individual SGEs in the array. It will make a copy each time
1757 * and operate on that. So we only need to copy this instance
1758 * and it won't impact PIO.
1759 */
1760 temp_ss = *ss;
1761 length = len;
1762
1763 snoop_dbg("Need to copy %d bytes", length);
1764 while (length) {
1765 void *addr = temp_ss.sge.vaddr;
1766 u32 slen = temp_ss.sge.length;
1767
1768 if (slen > length) {
1769 slen = length;
1770 snoop_dbg("slen %d > len %d", slen, length);
1771 }
1772 snoop_dbg("copy %d to %p", slen, addr);
1773 memcpy(data, addr, slen);
1774 update_sge(&temp_ss, slen);
1775 length -= slen;
1776 data += slen;
1777 snoop_dbg("data is now %p bytes left %d", data, length);
1778 }
1779 snoop_dbg("Completed SGE copy");
1780 }
1781
1782 /*
1783 * Why do the filter check down here? Because the event tracing has its
1784 * own filtering and we need to have the walked the SGE list.
1785 */
1786 if (!ppd->dd->hfi1_snoop.filter_callback) {
1787 snoop_dbg("filter not set\n");
1788 ret = HFI1_FILTER_HIT;
1789 } else {
1790 ret = ppd->dd->hfi1_snoop.filter_callback(
1791 &ps->s_txreq->phdr.hdr,
1792 NULL,
1793 ppd->dd->hfi1_snoop.filter_value);
1794 }
1795
1796 switch (ret) {
1797 case HFI1_FILTER_ERR:
1798 snoop_dbg("Error in filter call");
1799 /* fall through */
1800 case HFI1_FILTER_MISS:
1801 snoop_dbg("Filter Miss");
1802 kfree(s_packet);
1803 break;
1804 case HFI1_FILTER_HIT:
1805 snoop_dbg("Capturing packet");
1806 snoop_list_add_tail(s_packet, ppd->dd);
1807
1808 if (unlikely((snoop_flags & SNOOP_DROP_SEND) &&
1809 (ppd->dd->hfi1_snoop.mode_flag &
1810 HFI1_PORT_SNOOP_MODE))) {
1811 unsigned long flags;
1812
1813 snoop_dbg("Dropping packet");
1814 if (qp->s_wqe) {
1815 spin_lock_irqsave(&qp->s_lock, flags);
1816 hfi1_send_complete(
1817 qp,
1818 qp->s_wqe,
1819 IB_WC_SUCCESS);
1820 spin_unlock_irqrestore(&qp->s_lock, flags);
1821 } else if (qp->ibqp.qp_type == IB_QPT_RC) {
1822 spin_lock_irqsave(&qp->s_lock, flags);
1823 hfi1_rc_send_complete(qp,
1824 &ps->s_txreq->phdr.hdr);
1825 spin_unlock_irqrestore(&qp->s_lock, flags);
1826 }
1827
1828 /*
1829 * If snoop is dropping the packet we need to put the
1830 * txreq back because no one else will.
1831 */
1832 hfi1_put_txreq(ps->s_txreq);
1833 return 0;
1834 }
1835 break;
1836 default:
1837 kfree(s_packet);
1838 break;
1839 }
1840out:
1841 return hfi1_verbs_send_pio(qp, ps, md.u.pbc);
1842}
1843
1844/*
1845 * Callers of this must pass a hfi1_ib_header type for the from ptr. Currently
1846 * this can be used anywhere, but the intention is for inline ACKs for RC and
1847 * CCA packets. We don't restrict this usage though.
1848 */
1849void snoop_inline_pio_send(struct hfi1_devdata *dd, struct pio_buf *pbuf,
1850 u64 pbc, const void *from, size_t count)
1851{
1852 int snoop_mode = 0;
1853 int md_len = 0;
1854 struct capture_md md;
1855 struct snoop_packet *s_packet = NULL;
1856
1857 /*
1858 * count is in dwords so we need to convert to bytes.
1859 * We also need to account for CRC which would be tacked on by hardware.
1860 */
1861 int packet_len = (count << 2) + 4;
1862 int ret;
1863
1864 snoop_dbg("ACK OUT: len %d", packet_len);
1865
1866 if (!dd->hfi1_snoop.filter_callback) {
1867 snoop_dbg("filter not set");
1868 ret = HFI1_FILTER_HIT;
1869 } else {
1870 ret = dd->hfi1_snoop.filter_callback(
1871 (struct hfi1_ib_header *)from,
1872 NULL,
1873 dd->hfi1_snoop.filter_value);
1874 }
1875
1876 switch (ret) {
1877 case HFI1_FILTER_ERR:
1878 snoop_dbg("Error in filter call");
1879 /* fall through */
1880 case HFI1_FILTER_MISS:
1881 snoop_dbg("Filter Miss");
1882 break;
1883 case HFI1_FILTER_HIT:
1884 snoop_dbg("Capturing packet");
1885 if (dd->hfi1_snoop.mode_flag & HFI1_PORT_SNOOP_MODE)
1886 snoop_mode = 1;
1887 if ((snoop_mode == 0) ||
1888 unlikely(snoop_flags & SNOOP_USE_METADATA))
1889 md_len = sizeof(struct capture_md);
1890
1891 s_packet = allocate_snoop_packet(packet_len, 0, md_len);
1892
1893 if (unlikely(!s_packet)) {
1894 dd_dev_warn_ratelimited(dd, "Unable to allocate snoop/capture packet\n");
1895 goto inline_pio_out;
1896 }
1897
1898 s_packet->total_len = packet_len + md_len;
1899
1900 /* Fill in the metadata for the packet */
1901 if (md_len > 0) {
1902 memset(&md, 0, sizeof(struct capture_md));
1903 md.port = 1;
1904 md.dir = PKT_DIR_EGRESS;
1905 md.u.pbc = pbc;
1906 memcpy(s_packet->data, &md, md_len);
1907 }
1908
1909 /* Add the packet data which is a single buffer */
1910 memcpy(s_packet->data + md_len, from, packet_len);
1911
1912 snoop_list_add_tail(s_packet, dd);
1913
1914 if (unlikely((snoop_flags & SNOOP_DROP_SEND) && snoop_mode)) {
1915 snoop_dbg("Dropping packet");
1916 return;
1917 }
1918 break;
1919 default:
1920 break;
1921 }
1922
1923inline_pio_out:
1924 pio_copy(dd, pbuf, pbc, from, count);
1925}
diff --git a/drivers/staging/rdma/hfi1/eprom.c b/drivers/staging/rdma/hfi1/eprom.c
deleted file mode 100644
index bd8771570f81..000000000000
--- a/drivers/staging/rdma/hfi1/eprom.c
+++ /dev/null
@@ -1,471 +0,0 @@
1/*
2 * Copyright(c) 2015, 2016 Intel Corporation.
3 *
4 * This file is provided under a dual BSD/GPLv2 license. When using or
5 * redistributing this file, you may do so under either license.
6 *
7 * GPL LICENSE SUMMARY
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of version 2 of the GNU General Public License as
11 * published by the Free Software Foundation.
12 *
13 * This program is distributed in the hope that it will be useful, but
14 * WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * General Public License for more details.
17 *
18 * BSD LICENSE
19 *
20 * Redistribution and use in source and binary forms, with or without
21 * modification, are permitted provided that the following conditions
22 * are met:
23 *
24 * - Redistributions of source code must retain the above copyright
25 * notice, this list of conditions and the following disclaimer.
26 * - Redistributions in binary form must reproduce the above copyright
27 * notice, this list of conditions and the following disclaimer in
28 * the documentation and/or other materials provided with the
29 * distribution.
30 * - Neither the name of Intel Corporation nor the names of its
31 * contributors may be used to endorse or promote products derived
32 * from this software without specific prior written permission.
33 *
34 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
35 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
36 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
37 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
38 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
39 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
40 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
41 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
42 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
43 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
44 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
45 *
46 */
47#include <linux/delay.h>
48#include "hfi.h"
49#include "common.h"
50#include "eprom.h"
51
52/*
53 * The EPROM is logically divided into three partitions:
54 * partition 0: the first 128K, visible from PCI ROM BAR
55 * partition 1: 4K config file (sector size)
56 * partition 2: the rest
57 */
58#define P0_SIZE (128 * 1024)
59#define P1_SIZE (4 * 1024)
60#define P1_START P0_SIZE
61#define P2_START (P0_SIZE + P1_SIZE)
62
63/* erase sizes supported by the controller */
64#define SIZE_4KB (4 * 1024)
65#define MASK_4KB (SIZE_4KB - 1)
66
67#define SIZE_32KB (32 * 1024)
68#define MASK_32KB (SIZE_32KB - 1)
69
70#define SIZE_64KB (64 * 1024)
71#define MASK_64KB (SIZE_64KB - 1)
72
73/* controller page size, in bytes */
74#define EP_PAGE_SIZE 256
75#define EEP_PAGE_MASK (EP_PAGE_SIZE - 1)
76
77/* controller commands */
78#define CMD_SHIFT 24
79#define CMD_NOP (0)
80#define CMD_PAGE_PROGRAM(addr) ((0x02 << CMD_SHIFT) | addr)
81#define CMD_READ_DATA(addr) ((0x03 << CMD_SHIFT) | addr)
82#define CMD_READ_SR1 ((0x05 << CMD_SHIFT))
83#define CMD_WRITE_ENABLE ((0x06 << CMD_SHIFT))
84#define CMD_SECTOR_ERASE_4KB(addr) ((0x20 << CMD_SHIFT) | addr)
85#define CMD_SECTOR_ERASE_32KB(addr) ((0x52 << CMD_SHIFT) | addr)
86#define CMD_CHIP_ERASE ((0x60 << CMD_SHIFT))
87#define CMD_READ_MANUF_DEV_ID ((0x90 << CMD_SHIFT))
88#define CMD_RELEASE_POWERDOWN_NOID ((0xab << CMD_SHIFT))
89#define CMD_SECTOR_ERASE_64KB(addr) ((0xd8 << CMD_SHIFT) | addr)
90
91/* controller interface speeds */
92#define EP_SPEED_FULL 0x2 /* full speed */
93
94/* controller status register 1 bits */
95#define SR1_BUSY 0x1ull /* the BUSY bit in SR1 */
96
97/* sleep length while waiting for controller */
98#define WAIT_SLEEP_US 100 /* must be larger than 5 (see usage) */
99#define COUNT_DELAY_SEC(n) ((n) * (1000000 / WAIT_SLEEP_US))
100
101/* GPIO pins */
102#define EPROM_WP_N BIT_ULL(14) /* EPROM write line */
103
104/*
105 * How long to wait for the EPROM to become available, in ms.
106 * The spec 32 Mb EPROM takes around 40s to erase then write.
107 * Double it for safety.
108 */
109#define EPROM_TIMEOUT 80000 /* ms */
110
111/*
112 * Turn on external enable line that allows writing on the flash.
113 */
114static void write_enable(struct hfi1_devdata *dd)
115{
116 /* raise signal */
117 write_csr(dd, ASIC_GPIO_OUT, read_csr(dd, ASIC_GPIO_OUT) | EPROM_WP_N);
118 /* raise enable */
119 write_csr(dd, ASIC_GPIO_OE, read_csr(dd, ASIC_GPIO_OE) | EPROM_WP_N);
120}
121
122/*
123 * Turn off external enable line that allows writing on the flash.
124 */
125static void write_disable(struct hfi1_devdata *dd)
126{
127 /* lower signal */
128 write_csr(dd, ASIC_GPIO_OUT, read_csr(dd, ASIC_GPIO_OUT) & ~EPROM_WP_N);
129 /* lower enable */
130 write_csr(dd, ASIC_GPIO_OE, read_csr(dd, ASIC_GPIO_OE) & ~EPROM_WP_N);
131}
132
133/*
134 * Wait for the device to become not busy. Must be called after all
135 * write or erase operations.
136 */
137static int wait_for_not_busy(struct hfi1_devdata *dd)
138{
139 unsigned long count = 0;
140 u64 reg;
141 int ret = 0;
142
143 /* starts page mode */
144 write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_READ_SR1);
145 while (1) {
146 udelay(WAIT_SLEEP_US);
147 usleep_range(WAIT_SLEEP_US - 5, WAIT_SLEEP_US + 5);
148 count++;
149 reg = read_csr(dd, ASIC_EEP_DATA);
150 if ((reg & SR1_BUSY) == 0)
151 break;
152 /* 200s is the largest time for a 128Mb device */
153 if (count > COUNT_DELAY_SEC(200)) {
154 dd_dev_err(dd, "waited too long for SPI FLASH busy to clear - failing\n");
155 ret = -ETIMEDOUT;
156 break; /* break, not goto - must stop page mode */
157 }
158 }
159
160 /* stop page mode with a NOP */
161 write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_NOP);
162
163 return ret;
164}
165
166/*
167 * Read the device ID from the SPI controller.
168 */
169static u32 read_device_id(struct hfi1_devdata *dd)
170{
171 /* read the Manufacture Device ID */
172 write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_READ_MANUF_DEV_ID);
173 return (u32)read_csr(dd, ASIC_EEP_DATA);
174}
175
176/*
177 * Erase the whole flash.
178 */
179static int erase_chip(struct hfi1_devdata *dd)
180{
181 int ret;
182
183 write_enable(dd);
184
185 write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_WRITE_ENABLE);
186 write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_CHIP_ERASE);
187 ret = wait_for_not_busy(dd);
188
189 write_disable(dd);
190
191 return ret;
192}
193
194/*
195 * Erase a range.
196 */
197static int erase_range(struct hfi1_devdata *dd, u32 start, u32 len)
198{
199 u32 end = start + len;
200 int ret = 0;
201
202 if (end < start)
203 return -EINVAL;
204
205 /* check the end points for the minimum erase */
206 if ((start & MASK_4KB) || (end & MASK_4KB)) {
207 dd_dev_err(dd,
208 "%s: non-aligned range (0x%x,0x%x) for a 4KB erase\n",
209 __func__, start, end);
210 return -EINVAL;
211 }
212
213 write_enable(dd);
214
215 while (start < end) {
216 write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_WRITE_ENABLE);
217 /* check in order of largest to smallest */
218 if (((start & MASK_64KB) == 0) && (start + SIZE_64KB <= end)) {
219 write_csr(dd, ASIC_EEP_ADDR_CMD,
220 CMD_SECTOR_ERASE_64KB(start));
221 start += SIZE_64KB;
222 } else if (((start & MASK_32KB) == 0) &&
223 (start + SIZE_32KB <= end)) {
224 write_csr(dd, ASIC_EEP_ADDR_CMD,
225 CMD_SECTOR_ERASE_32KB(start));
226 start += SIZE_32KB;
227 } else { /* 4KB will work */
228 write_csr(dd, ASIC_EEP_ADDR_CMD,
229 CMD_SECTOR_ERASE_4KB(start));
230 start += SIZE_4KB;
231 }
232 ret = wait_for_not_busy(dd);
233 if (ret)
234 goto done;
235 }
236
237done:
238 write_disable(dd);
239
240 return ret;
241}
242
243/*
244 * Read a 256 byte (64 dword) EPROM page.
245 * All callers have verified the offset is at a page boundary.
246 */
247static void read_page(struct hfi1_devdata *dd, u32 offset, u32 *result)
248{
249 int i;
250
251 write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_READ_DATA(offset));
252 for (i = 0; i < EP_PAGE_SIZE / sizeof(u32); i++)
253 result[i] = (u32)read_csr(dd, ASIC_EEP_DATA);
254 write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_NOP); /* close open page */
255}
256
257/*
258 * Read length bytes starting at offset. Copy to user address addr.
259 */
260static int read_length(struct hfi1_devdata *dd, u32 start, u32 len, u64 addr)
261{
262 u32 offset;
263 u32 buffer[EP_PAGE_SIZE / sizeof(u32)];
264 int ret = 0;
265
266 /* reject anything not on an EPROM page boundary */
267 if ((start & EEP_PAGE_MASK) || (len & EEP_PAGE_MASK))
268 return -EINVAL;
269
270 for (offset = 0; offset < len; offset += EP_PAGE_SIZE) {
271 read_page(dd, start + offset, buffer);
272 if (copy_to_user((void __user *)(addr + offset),
273 buffer, EP_PAGE_SIZE)) {
274 ret = -EFAULT;
275 goto done;
276 }
277 }
278
279done:
280 return ret;
281}
282
283/*
284 * Write a 256 byte (64 dword) EPROM page.
285 * All callers have verified the offset is at a page boundary.
286 */
287static int write_page(struct hfi1_devdata *dd, u32 offset, u32 *data)
288{
289 int i;
290
291 write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_WRITE_ENABLE);
292 write_csr(dd, ASIC_EEP_DATA, data[0]);
293 write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_PAGE_PROGRAM(offset));
294 for (i = 1; i < EP_PAGE_SIZE / sizeof(u32); i++)
295 write_csr(dd, ASIC_EEP_DATA, data[i]);
296 /* will close the open page */
297 return wait_for_not_busy(dd);
298}
299
300/*
301 * Write length bytes starting at offset. Read from user address addr.
302 */
303static int write_length(struct hfi1_devdata *dd, u32 start, u32 len, u64 addr)
304{
305 u32 offset;
306 u32 buffer[EP_PAGE_SIZE / sizeof(u32)];
307 int ret = 0;
308
309 /* reject anything not on an EPROM page boundary */
310 if ((start & EEP_PAGE_MASK) || (len & EEP_PAGE_MASK))
311 return -EINVAL;
312
313 write_enable(dd);
314
315 for (offset = 0; offset < len; offset += EP_PAGE_SIZE) {
316 if (copy_from_user(buffer, (void __user *)(addr + offset),
317 EP_PAGE_SIZE)) {
318 ret = -EFAULT;
319 goto done;
320 }
321 ret = write_page(dd, start + offset, buffer);
322 if (ret)
323 goto done;
324 }
325
326done:
327 write_disable(dd);
328 return ret;
329}
330
331/* convert an range composite to a length, in bytes */
332static inline u32 extract_rlen(u32 composite)
333{
334 return (composite & 0xffff) * EP_PAGE_SIZE;
335}
336
337/* convert an range composite to a start, in bytes */
338static inline u32 extract_rstart(u32 composite)
339{
340 return (composite >> 16) * EP_PAGE_SIZE;
341}
342
343/*
344 * Perform the given operation on the EPROM. Called from user space. The
345 * user credentials have already been checked.
346 *
347 * Return 0 on success, -ERRNO on error
348 */
349int handle_eprom_command(struct file *fp, const struct hfi1_cmd *cmd)
350{
351 struct hfi1_devdata *dd;
352 u32 dev_id;
353 u32 rlen; /* range length */
354 u32 rstart; /* range start */
355 int i_minor;
356 int ret = 0;
357
358 /*
359 * Map the device file to device data using the relative minor.
360 * The device file minor number is the unit number + 1. 0 is
361 * the generic device file - reject it.
362 */
363 i_minor = iminor(file_inode(fp)) - HFI1_USER_MINOR_BASE;
364 if (i_minor <= 0)
365 return -EINVAL;
366 dd = hfi1_lookup(i_minor - 1);
367 if (!dd) {
368 pr_err("%s: cannot find unit %d!\n", __func__, i_minor);
369 return -EINVAL;
370 }
371
372 /* some devices do not have an EPROM */
373 if (!dd->eprom_available)
374 return -EOPNOTSUPP;
375
376 ret = acquire_chip_resource(dd, CR_EPROM, EPROM_TIMEOUT);
377 if (ret) {
378 dd_dev_err(dd, "%s: unable to acquire EPROM resource\n",
379 __func__);
380 goto done_asic;
381 }
382
383 dd_dev_info(dd, "%s: cmd: type %d, len 0x%x, addr 0x%016llx\n",
384 __func__, cmd->type, cmd->len, cmd->addr);
385
386 switch (cmd->type) {
387 case HFI1_CMD_EP_INFO:
388 if (cmd->len != sizeof(u32)) {
389 ret = -ERANGE;
390 break;
391 }
392 dev_id = read_device_id(dd);
393 /* addr points to a u32 user buffer */
394 if (copy_to_user((void __user *)cmd->addr, &dev_id,
395 sizeof(u32)))
396 ret = -EFAULT;
397 break;
398
399 case HFI1_CMD_EP_ERASE_CHIP:
400 ret = erase_chip(dd);
401 break;
402
403 case HFI1_CMD_EP_ERASE_RANGE:
404 rlen = extract_rlen(cmd->len);
405 rstart = extract_rstart(cmd->len);
406 ret = erase_range(dd, rstart, rlen);
407 break;
408
409 case HFI1_CMD_EP_READ_RANGE:
410 rlen = extract_rlen(cmd->len);
411 rstart = extract_rstart(cmd->len);
412 ret = read_length(dd, rstart, rlen, cmd->addr);
413 break;
414
415 case HFI1_CMD_EP_WRITE_RANGE:
416 rlen = extract_rlen(cmd->len);
417 rstart = extract_rstart(cmd->len);
418 ret = write_length(dd, rstart, rlen, cmd->addr);
419 break;
420
421 default:
422 dd_dev_err(dd, "%s: unexpected command %d\n",
423 __func__, cmd->type);
424 ret = -EINVAL;
425 break;
426 }
427
428 release_chip_resource(dd, CR_EPROM);
429done_asic:
430 return ret;
431}
432
433/*
434 * Initialize the EPROM handler.
435 */
436int eprom_init(struct hfi1_devdata *dd)
437{
438 int ret = 0;
439
440 /* only the discrete chip has an EPROM */
441 if (dd->pcidev->device != PCI_DEVICE_ID_INTEL0)
442 return 0;
443
444 /*
445 * It is OK if both HFIs reset the EPROM as long as they don't
446 * do it at the same time.
447 */
448 ret = acquire_chip_resource(dd, CR_EPROM, EPROM_TIMEOUT);
449 if (ret) {
450 dd_dev_err(dd,
451 "%s: unable to acquire EPROM resource, no EPROM support\n",
452 __func__);
453 goto done_asic;
454 }
455
456 /* reset EPROM to be sure it is in a good state */
457
458 /* set reset */
459 write_csr(dd, ASIC_EEP_CTL_STAT, ASIC_EEP_CTL_STAT_EP_RESET_SMASK);
460 /* clear reset, set speed */
461 write_csr(dd, ASIC_EEP_CTL_STAT,
462 EP_SPEED_FULL << ASIC_EEP_CTL_STAT_RATE_SPI_SHIFT);
463
464 /* wake the device with command "release powerdown NoID" */
465 write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_RELEASE_POWERDOWN_NOID);
466
467 dd->eprom_available = true;
468 release_chip_resource(dd, CR_EPROM);
469done_asic:
470 return ret;
471}
diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h
index 37dd534cbeab..c8a773ffe23b 100644
--- a/include/rdma/ib_mad.h
+++ b/include/rdma/ib_mad.h
@@ -239,12 +239,15 @@ struct ib_vendor_mad {
239 239
240#define IB_MGMT_CLASSPORTINFO_ATTR_ID cpu_to_be16(0x0001) 240#define IB_MGMT_CLASSPORTINFO_ATTR_ID cpu_to_be16(0x0001)
241 241
242#define IB_CLASS_PORT_INFO_RESP_TIME_MASK 0x1F
243#define IB_CLASS_PORT_INFO_RESP_TIME_FIELD_SIZE 5
244
242struct ib_class_port_info { 245struct ib_class_port_info {
243 u8 base_version; 246 u8 base_version;
244 u8 class_version; 247 u8 class_version;
245 __be16 capability_mask; 248 __be16 capability_mask;
246 u8 reserved[3]; 249 /* 27 bits for cap_mask2, 5 bits for resp_time */
247 u8 resp_time_value; 250 __be32 cap_mask2_resp_time;
248 u8 redirect_gid[16]; 251 u8 redirect_gid[16];
249 __be32 redirect_tcslfl; 252 __be32 redirect_tcslfl;
250 __be16 redirect_lid; 253 __be16 redirect_lid;
@@ -259,6 +262,59 @@ struct ib_class_port_info {
259 __be32 trap_qkey; 262 __be32 trap_qkey;
260}; 263};
261 264
265/**
266 * ib_get_cpi_resp_time - Returns the resp_time value from
267 * cap_mask2_resp_time in ib_class_port_info.
268 * @cpi: A struct ib_class_port_info mad.
269 */
270static inline u8 ib_get_cpi_resp_time(struct ib_class_port_info *cpi)
271{
272 return (u8)(be32_to_cpu(cpi->cap_mask2_resp_time) &
273 IB_CLASS_PORT_INFO_RESP_TIME_MASK);
274}
275
276/**
277 * ib_set_cpi_resptime - Sets the response time in an
278 * ib_class_port_info mad.
279 * @cpi: A struct ib_class_port_info.
280 * @rtime: The response time to set.
281 */
282static inline void ib_set_cpi_resp_time(struct ib_class_port_info *cpi,
283 u8 rtime)
284{
285 cpi->cap_mask2_resp_time =
286 (cpi->cap_mask2_resp_time &
287 cpu_to_be32(~IB_CLASS_PORT_INFO_RESP_TIME_MASK)) |
288 cpu_to_be32(rtime & IB_CLASS_PORT_INFO_RESP_TIME_MASK);
289}
290
291/**
292 * ib_get_cpi_capmask2 - Returns the capmask2 value from
293 * cap_mask2_resp_time in ib_class_port_info.
294 * @cpi: A struct ib_class_port_info mad.
295 */
296static inline u32 ib_get_cpi_capmask2(struct ib_class_port_info *cpi)
297{
298 return (be32_to_cpu(cpi->cap_mask2_resp_time) >>
299 IB_CLASS_PORT_INFO_RESP_TIME_FIELD_SIZE);
300}
301
302/**
303 * ib_set_cpi_capmask2 - Sets the capmask2 in an
304 * ib_class_port_info mad.
305 * @cpi: A struct ib_class_port_info.
306 * @capmask2: The capmask2 to set.
307 */
308static inline void ib_set_cpi_capmask2(struct ib_class_port_info *cpi,
309 u32 capmask2)
310{
311 cpi->cap_mask2_resp_time =
312 (cpi->cap_mask2_resp_time &
313 cpu_to_be32(IB_CLASS_PORT_INFO_RESP_TIME_MASK)) |
314 cpu_to_be32(capmask2 <<
315 IB_CLASS_PORT_INFO_RESP_TIME_FIELD_SIZE);
316}
317
262struct ib_mad_notice_attr { 318struct ib_mad_notice_attr {
263 u8 generic_type; 319 u8 generic_type;
264 u8 prod_type_msb; 320 u8 prod_type_msb;
diff --git a/include/rdma/ib_pack.h b/include/rdma/ib_pack.h
index 0f3daae44bf9..b13419ce99ff 100644
--- a/include/rdma/ib_pack.h
+++ b/include/rdma/ib_pack.h
@@ -103,6 +103,9 @@ enum {
103 IB_OPCODE_ATOMIC_ACKNOWLEDGE = 0x12, 103 IB_OPCODE_ATOMIC_ACKNOWLEDGE = 0x12,
104 IB_OPCODE_COMPARE_SWAP = 0x13, 104 IB_OPCODE_COMPARE_SWAP = 0x13,
105 IB_OPCODE_FETCH_ADD = 0x14, 105 IB_OPCODE_FETCH_ADD = 0x14,
106 /* opcode 0x15 is reserved */
107 IB_OPCODE_SEND_LAST_WITH_INVALIDATE = 0x16,
108 IB_OPCODE_SEND_ONLY_WITH_INVALIDATE = 0x17,
106 109
107 /* real constants follow -- see comment about above IB_OPCODE() 110 /* real constants follow -- see comment about above IB_OPCODE()
108 macro for more details */ 111 macro for more details */
@@ -129,6 +132,8 @@ enum {
129 IB_OPCODE(RC, ATOMIC_ACKNOWLEDGE), 132 IB_OPCODE(RC, ATOMIC_ACKNOWLEDGE),
130 IB_OPCODE(RC, COMPARE_SWAP), 133 IB_OPCODE(RC, COMPARE_SWAP),
131 IB_OPCODE(RC, FETCH_ADD), 134 IB_OPCODE(RC, FETCH_ADD),
135 IB_OPCODE(RC, SEND_LAST_WITH_INVALIDATE),
136 IB_OPCODE(RC, SEND_ONLY_WITH_INVALIDATE),
132 137
133 /* UC */ 138 /* UC */
134 IB_OPCODE(UC, SEND_FIRST), 139 IB_OPCODE(UC, SEND_FIRST),
diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h
index cdc1c81aa275..384041669489 100644
--- a/include/rdma/ib_sa.h
+++ b/include/rdma/ib_sa.h
@@ -94,6 +94,8 @@ enum ib_sa_selector {
94 IB_SA_BEST = 3 94 IB_SA_BEST = 3
95}; 95};
96 96
97#define IB_SA_CAP_MASK2_SENDONLY_FULL_MEM_SUPPORT BIT(12)
98
97/* 99/*
98 * Structures for SA records are named "struct ib_sa_xxx_rec." No 100 * Structures for SA records are named "struct ib_sa_xxx_rec." No
99 * attempt is made to pack structures to match the physical layout of 101 * attempt is made to pack structures to match the physical layout of
@@ -439,4 +441,14 @@ int ib_sa_guid_info_rec_query(struct ib_sa_client *client,
439 void *context, 441 void *context,
440 struct ib_sa_query **sa_query); 442 struct ib_sa_query **sa_query);
441 443
444/* Support get SA ClassPortInfo */
445int ib_sa_classport_info_rec_query(struct ib_sa_client *client,
446 struct ib_device *device, u8 port_num,
447 int timeout_ms, gfp_t gfp_mask,
448 void (*callback)(int status,
449 struct ib_class_port_info *resp,
450 void *context),
451 void *context,
452 struct ib_sa_query **sa_query);
453
442#endif /* IB_SA_H */ 454#endif /* IB_SA_H */
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index fc0320c004a3..432bed510369 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -403,56 +403,55 @@ enum ib_port_speed {
403 IB_SPEED_EDR = 32 403 IB_SPEED_EDR = 32
404}; 404};
405 405
406struct ib_protocol_stats { 406/**
407 /* TBD... */ 407 * struct rdma_hw_stats
408}; 408 * @timestamp - Used by the core code to track when the last update was
409 409 * @lifespan - Used by the core code to determine how old the counters
410struct iw_protocol_stats { 410 * should be before being updated again. Stored in jiffies, defaults
411 u64 ipInReceives; 411 * to 10 milliseconds, drivers can override the default be specifying
412 u64 ipInHdrErrors; 412 * their own value during their allocation routine.
413 u64 ipInTooBigErrors; 413 * @name - Array of pointers to static names used for the counters in
414 u64 ipInNoRoutes; 414 * directory.
415 u64 ipInAddrErrors; 415 * @num_counters - How many hardware counters there are. If name is
416 u64 ipInUnknownProtos; 416 * shorter than this number, a kernel oops will result. Driver authors
417 u64 ipInTruncatedPkts; 417 * are encouraged to leave BUILD_BUG_ON(ARRAY_SIZE(@name) < num_counters)
418 u64 ipInDiscards; 418 * in their code to prevent this.
419 u64 ipInDelivers; 419 * @value - Array of u64 counters that are accessed by the sysfs code and
420 u64 ipOutForwDatagrams; 420 * filled in by the drivers get_stats routine
421 u64 ipOutRequests; 421 */
422 u64 ipOutDiscards; 422struct rdma_hw_stats {
423 u64 ipOutNoRoutes; 423 unsigned long timestamp;
424 u64 ipReasmTimeout; 424 unsigned long lifespan;
425 u64 ipReasmReqds; 425 const char * const *names;
426 u64 ipReasmOKs; 426 int num_counters;
427 u64 ipReasmFails; 427 u64 value[];
428 u64 ipFragOKs; 428};
429 u64 ipFragFails; 429
430 u64 ipFragCreates; 430#define RDMA_HW_STATS_DEFAULT_LIFESPAN 10
431 u64 ipInMcastPkts; 431/**
432 u64 ipOutMcastPkts; 432 * rdma_alloc_hw_stats_struct - Helper function to allocate dynamic struct
433 u64 ipInBcastPkts; 433 * for drivers.
434 u64 ipOutBcastPkts; 434 * @names - Array of static const char *
435 435 * @num_counters - How many elements in array
436 u64 tcpRtoAlgorithm; 436 * @lifespan - How many milliseconds between updates
437 u64 tcpRtoMin; 437 */
438 u64 tcpRtoMax; 438static inline struct rdma_hw_stats *rdma_alloc_hw_stats_struct(
439 u64 tcpMaxConn; 439 const char * const *names, int num_counters,
440 u64 tcpActiveOpens; 440 unsigned long lifespan)
441 u64 tcpPassiveOpens; 441{
442 u64 tcpAttemptFails; 442 struct rdma_hw_stats *stats;
443 u64 tcpEstabResets; 443
444 u64 tcpCurrEstab; 444 stats = kzalloc(sizeof(*stats) + num_counters * sizeof(u64),
445 u64 tcpInSegs; 445 GFP_KERNEL);
446 u64 tcpOutSegs; 446 if (!stats)
447 u64 tcpRetransSegs; 447 return NULL;
448 u64 tcpInErrs; 448 stats->names = names;
449 u64 tcpOutRsts; 449 stats->num_counters = num_counters;
450}; 450 stats->lifespan = msecs_to_jiffies(lifespan);
451 451
452union rdma_protocol_stats { 452 return stats;
453 struct ib_protocol_stats ib; 453}
454 struct iw_protocol_stats iw; 454
455};
456 455
457/* Define bits for the various functionality this port needs to be supported by 456/* Define bits for the various functionality this port needs to be supported by
458 * the core. 457 * the core.
@@ -1707,8 +1706,29 @@ struct ib_device {
1707 1706
1708 struct iw_cm_verbs *iwcm; 1707 struct iw_cm_verbs *iwcm;
1709 1708
1710 int (*get_protocol_stats)(struct ib_device *device, 1709 /**
1711 union rdma_protocol_stats *stats); 1710 * alloc_hw_stats - Allocate a struct rdma_hw_stats and fill in the
1711 * driver initialized data. The struct is kfree()'ed by the sysfs
1712 * core when the device is removed. A lifespan of -1 in the return
1713 * struct tells the core to set a default lifespan.
1714 */
1715 struct rdma_hw_stats *(*alloc_hw_stats)(struct ib_device *device,
1716 u8 port_num);
1717 /**
1718 * get_hw_stats - Fill in the counter value(s) in the stats struct.
1719 * @index - The index in the value array we wish to have updated, or
1720 * num_counters if we want all stats updated
1721 * Return codes -
1722 * < 0 - Error, no counters updated
1723 * index - Updated the single counter pointed to by index
1724 * num_counters - Updated all counters (will reset the timestamp
1725 * and prevent further calls for lifespan milliseconds)
1726 * Drivers are allowed to update all counters in leiu of just the
1727 * one given in index at their option
1728 */
1729 int (*get_hw_stats)(struct ib_device *device,
1730 struct rdma_hw_stats *stats,
1731 u8 port, int index);
1712 int (*query_device)(struct ib_device *device, 1732 int (*query_device)(struct ib_device *device,
1713 struct ib_device_attr *device_attr, 1733 struct ib_device_attr *device_attr,
1714 struct ib_udata *udata); 1734 struct ib_udata *udata);
@@ -1926,6 +1946,8 @@ struct ib_device {
1926 u8 node_type; 1946 u8 node_type;
1927 u8 phys_port_cnt; 1947 u8 phys_port_cnt;
1928 struct ib_device_attr attrs; 1948 struct ib_device_attr attrs;
1949 struct attribute_group *hw_stats_ag;
1950 struct rdma_hw_stats *hw_stats;
1929 1951
1930 /** 1952 /**
1931 * The following mandatory functions are used only at device 1953 * The following mandatory functions are used only at device
diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h
index d57ceee90d26..16274e2133cd 100644
--- a/include/rdma/rdma_vt.h
+++ b/include/rdma/rdma_vt.h
@@ -149,15 +149,15 @@ struct rvt_driver_params {
149 int qpn_res_end; 149 int qpn_res_end;
150 int nports; 150 int nports;
151 int npkeys; 151 int npkeys;
152 u8 qos_shift;
153 char cq_name[RVT_CQN_MAX]; 152 char cq_name[RVT_CQN_MAX];
154 int node; 153 int node;
155 int max_rdma_atomic;
156 int psn_mask; 154 int psn_mask;
157 int psn_shift; 155 int psn_shift;
158 int psn_modify_mask; 156 int psn_modify_mask;
159 u32 core_cap_flags; 157 u32 core_cap_flags;
160 u32 max_mad_size; 158 u32 max_mad_size;
159 u8 qos_shift;
160 u8 max_rdma_atomic;
161}; 161};
162 162
163/* Protection domain */ 163/* Protection domain */
@@ -426,6 +426,15 @@ static inline unsigned rvt_get_npkeys(struct rvt_dev_info *rdi)
426} 426}
427 427
428/* 428/*
429 * Return the max atomic suitable for determining
430 * the size of the ack ring buffer in a QP.
431 */
432static inline unsigned int rvt_max_atomic(struct rvt_dev_info *rdi)
433{
434 return rdi->dparms.max_rdma_atomic + 1;
435}
436
437/*
429 * Return the indexed PKEY from the port PKEY table. 438 * Return the indexed PKEY from the port PKEY table.
430 */ 439 */
431static inline u16 rvt_get_pkey(struct rvt_dev_info *rdi, 440static inline u16 rvt_get_pkey(struct rvt_dev_info *rdi,
diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h
index 0e1ff2abfe92..6d23b879416a 100644
--- a/include/rdma/rdmavt_qp.h
+++ b/include/rdma/rdmavt_qp.h
@@ -211,8 +211,6 @@ struct rvt_mmap_info {
211 unsigned size; 211 unsigned size;
212}; 212};
213 213
214#define RVT_MAX_RDMA_ATOMIC 16
215
216/* 214/*
217 * This structure holds the information that the send tasklet needs 215 * This structure holds the information that the send tasklet needs
218 * to send a RDMA read response or atomic operation. 216 * to send a RDMA read response or atomic operation.
@@ -282,8 +280,7 @@ struct rvt_qp {
282 atomic_t refcount ____cacheline_aligned_in_smp; 280 atomic_t refcount ____cacheline_aligned_in_smp;
283 wait_queue_head_t wait; 281 wait_queue_head_t wait;
284 282
285 struct rvt_ack_entry s_ack_queue[RVT_MAX_RDMA_ATOMIC + 1] 283 struct rvt_ack_entry *s_ack_queue;
286 ____cacheline_aligned_in_smp;
287 struct rvt_sge_state s_rdma_read_sge; 284 struct rvt_sge_state s_rdma_read_sge;
288 285
289 spinlock_t r_lock ____cacheline_aligned_in_smp; /* used for APM */ 286 spinlock_t r_lock ____cacheline_aligned_in_smp; /* used for APM */
diff --git a/include/uapi/rdma/hfi/hfi1_user.h b/include/uapi/rdma/hfi/hfi1_user.h
index a533cecab14f..98bebf8bef55 100644
--- a/include/uapi/rdma/hfi/hfi1_user.h
+++ b/include/uapi/rdma/hfi/hfi1_user.h
@@ -66,7 +66,7 @@
66 * The major version changes when data structures change in an incompatible 66 * The major version changes when data structures change in an incompatible
67 * way. The driver must be the same for initialization to succeed. 67 * way. The driver must be the same for initialization to succeed.
68 */ 68 */
69#define HFI1_USER_SWMAJOR 5 69#define HFI1_USER_SWMAJOR 6
70 70
71/* 71/*
72 * Minor version differences are always compatible 72 * Minor version differences are always compatible
@@ -75,7 +75,12 @@
75 * may not be implemented; the user code must deal with this if it 75 * may not be implemented; the user code must deal with this if it
76 * cares, or it must abort after initialization reports the difference. 76 * cares, or it must abort after initialization reports the difference.
77 */ 77 */
78#define HFI1_USER_SWMINOR 0 78#define HFI1_USER_SWMINOR 1
79
80/*
81 * We will encode the major/minor inside a single 32bit version number.
82 */
83#define HFI1_SWMAJOR_SHIFT 16
79 84
80/* 85/*
81 * Set of HW and driver capability/feature bits. 86 * Set of HW and driver capability/feature bits.
@@ -107,19 +112,6 @@
107#define HFI1_RCVHDR_ENTSIZE_16 (1UL << 1) 112#define HFI1_RCVHDR_ENTSIZE_16 (1UL << 1)
108#define HFI1_RCVDHR_ENTSIZE_32 (1UL << 2) 113#define HFI1_RCVDHR_ENTSIZE_32 (1UL << 2)
109 114
110/*
111 * If the unit is specified via open, HFI choice is fixed. If port is
112 * specified, it's also fixed. Otherwise we try to spread contexts
113 * across ports and HFIs, using different algorithms. WITHIN is
114 * the old default, prior to this mechanism.
115 */
116#define HFI1_ALG_ACROSS 0 /* round robin contexts across HFIs, then
117 * ports; this is the default */
118#define HFI1_ALG_WITHIN 1 /* use all contexts on an HFI (round robin
119 * active ports within), then next HFI */
120#define HFI1_ALG_COUNT 2 /* number of algorithm choices */
121
122
123/* User commands. */ 115/* User commands. */
124#define HFI1_CMD_ASSIGN_CTXT 1 /* allocate HFI and context */ 116#define HFI1_CMD_ASSIGN_CTXT 1 /* allocate HFI and context */
125#define HFI1_CMD_CTXT_INFO 2 /* find out what resources we got */ 117#define HFI1_CMD_CTXT_INFO 2 /* find out what resources we got */
@@ -127,7 +119,6 @@
127#define HFI1_CMD_TID_UPDATE 4 /* update expected TID entries */ 119#define HFI1_CMD_TID_UPDATE 4 /* update expected TID entries */
128#define HFI1_CMD_TID_FREE 5 /* free expected TID entries */ 120#define HFI1_CMD_TID_FREE 5 /* free expected TID entries */
129#define HFI1_CMD_CREDIT_UPD 6 /* force an update of PIO credit */ 121#define HFI1_CMD_CREDIT_UPD 6 /* force an update of PIO credit */
130#define HFI1_CMD_SDMA_STATUS_UPD 7 /* force update of SDMA status ring */
131 122
132#define HFI1_CMD_RECV_CTRL 8 /* control receipt of packets */ 123#define HFI1_CMD_RECV_CTRL 8 /* control receipt of packets */
133#define HFI1_CMD_POLL_TYPE 9 /* set the kind of polling we want */ 124#define HFI1_CMD_POLL_TYPE 9 /* set the kind of polling we want */
@@ -135,13 +126,46 @@
135#define HFI1_CMD_SET_PKEY 11 /* set context's pkey */ 126#define HFI1_CMD_SET_PKEY 11 /* set context's pkey */
136#define HFI1_CMD_CTXT_RESET 12 /* reset context's HW send context */ 127#define HFI1_CMD_CTXT_RESET 12 /* reset context's HW send context */
137#define HFI1_CMD_TID_INVAL_READ 13 /* read TID cache invalidations */ 128#define HFI1_CMD_TID_INVAL_READ 13 /* read TID cache invalidations */
138/* separate EPROM commands from normal PSM commands */ 129#define HFI1_CMD_GET_VERS 14 /* get the version of the user cdev */
139#define HFI1_CMD_EP_INFO 64 /* read EPROM device ID */ 130
140#define HFI1_CMD_EP_ERASE_CHIP 65 /* erase whole EPROM */ 131/*
141/* range 66-74 no longer used */ 132 * User IOCTLs can not go above 128 if they do then see common.h and change the
142#define HFI1_CMD_EP_ERASE_RANGE 75 /* erase EPROM range */ 133 * base for the snoop ioctl
143#define HFI1_CMD_EP_READ_RANGE 76 /* read EPROM range */ 134 */
144#define HFI1_CMD_EP_WRITE_RANGE 77 /* write EPROM range */ 135#define IB_IOCTL_MAGIC 0x1b /* See Documentation/ioctl/ioctl-number.txt */
136
137/*
138 * Make the ioctls occupy the last 0xf0-0xff portion of the IB range
139 */
140#define __NUM(cmd) (HFI1_CMD_##cmd + 0xe0)
141
142struct hfi1_cmd;
143#define HFI1_IOCTL_ASSIGN_CTXT \
144 _IOWR(IB_IOCTL_MAGIC, __NUM(ASSIGN_CTXT), struct hfi1_user_info)
145#define HFI1_IOCTL_CTXT_INFO \
146 _IOW(IB_IOCTL_MAGIC, __NUM(CTXT_INFO), struct hfi1_ctxt_info)
147#define HFI1_IOCTL_USER_INFO \
148 _IOW(IB_IOCTL_MAGIC, __NUM(USER_INFO), struct hfi1_base_info)
149#define HFI1_IOCTL_TID_UPDATE \
150 _IOWR(IB_IOCTL_MAGIC, __NUM(TID_UPDATE), struct hfi1_tid_info)
151#define HFI1_IOCTL_TID_FREE \
152 _IOWR(IB_IOCTL_MAGIC, __NUM(TID_FREE), struct hfi1_tid_info)
153#define HFI1_IOCTL_CREDIT_UPD \
154 _IO(IB_IOCTL_MAGIC, __NUM(CREDIT_UPD))
155#define HFI1_IOCTL_RECV_CTRL \
156 _IOW(IB_IOCTL_MAGIC, __NUM(RECV_CTRL), int)
157#define HFI1_IOCTL_POLL_TYPE \
158 _IOW(IB_IOCTL_MAGIC, __NUM(POLL_TYPE), int)
159#define HFI1_IOCTL_ACK_EVENT \
160 _IOW(IB_IOCTL_MAGIC, __NUM(ACK_EVENT), unsigned long)
161#define HFI1_IOCTL_SET_PKEY \
162 _IOW(IB_IOCTL_MAGIC, __NUM(SET_PKEY), __u16)
163#define HFI1_IOCTL_CTXT_RESET \
164 _IO(IB_IOCTL_MAGIC, __NUM(CTXT_RESET))
165#define HFI1_IOCTL_TID_INVAL_READ \
166 _IOWR(IB_IOCTL_MAGIC, __NUM(TID_INVAL_READ), struct hfi1_tid_info)
167#define HFI1_IOCTL_GET_VERS \
168 _IOR(IB_IOCTL_MAGIC, __NUM(GET_VERS), int)
145 169
146#define _HFI1_EVENT_FROZEN_BIT 0 170#define _HFI1_EVENT_FROZEN_BIT 0
147#define _HFI1_EVENT_LINKDOWN_BIT 1 171#define _HFI1_EVENT_LINKDOWN_BIT 1
@@ -199,9 +223,7 @@ struct hfi1_user_info {
199 * Should be set to HFI1_USER_SWVERSION. 223 * Should be set to HFI1_USER_SWVERSION.
200 */ 224 */
201 __u32 userversion; 225 __u32 userversion;
202 __u16 pad; 226 __u32 pad;
203 /* HFI selection algorithm, if unit has not selected */
204 __u16 hfi1_alg;
205 /* 227 /*
206 * If two or more processes wish to share a context, each process 228 * If two or more processes wish to share a context, each process
207 * must set the subcontext_cnt and subcontext_id to the same 229 * must set the subcontext_cnt and subcontext_id to the same
@@ -243,12 +265,6 @@ struct hfi1_tid_info {
243 __u32 length; 265 __u32 length;
244}; 266};
245 267
246struct hfi1_cmd {
247 __u32 type; /* command type */
248 __u32 len; /* length of struct pointed to by add */
249 __u64 addr; /* pointer to user structure */
250};
251
252enum hfi1_sdma_comp_state { 268enum hfi1_sdma_comp_state {
253 FREE = 0, 269 FREE = 0,
254 QUEUED, 270 QUEUED,
diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h
index 6e373d151cad..02fe8390c18f 100644
--- a/include/uapi/rdma/rdma_netlink.h
+++ b/include/uapi/rdma/rdma_netlink.h
@@ -135,10 +135,12 @@ enum {
135 * Local service operations: 135 * Local service operations:
136 * RESOLVE - The client requests the local service to resolve a path. 136 * RESOLVE - The client requests the local service to resolve a path.
137 * SET_TIMEOUT - The local service requests the client to set the timeout. 137 * SET_TIMEOUT - The local service requests the client to set the timeout.
138 * IP_RESOLVE - The client requests the local service to resolve an IP to GID.
138 */ 139 */
139enum { 140enum {
140 RDMA_NL_LS_OP_RESOLVE = 0, 141 RDMA_NL_LS_OP_RESOLVE = 0,
141 RDMA_NL_LS_OP_SET_TIMEOUT, 142 RDMA_NL_LS_OP_SET_TIMEOUT,
143 RDMA_NL_LS_OP_IP_RESOLVE,
142 RDMA_NL_LS_NUM_OPS 144 RDMA_NL_LS_NUM_OPS
143}; 145};
144 146
@@ -176,6 +178,10 @@ struct rdma_ls_resolve_header {
176 __u8 path_use; 178 __u8 path_use;
177}; 179};
178 180
181struct rdma_ls_ip_resolve_header {
182 __u32 ifindex;
183};
184
179/* Local service attribute type */ 185/* Local service attribute type */
180#define RDMA_NLA_F_MANDATORY (1 << 13) 186#define RDMA_NLA_F_MANDATORY (1 << 13)
181#define RDMA_NLA_TYPE_MASK (~(NLA_F_NESTED | NLA_F_NET_BYTEORDER | \ 187#define RDMA_NLA_TYPE_MASK (~(NLA_F_NESTED | NLA_F_NET_BYTEORDER | \
@@ -193,6 +199,8 @@ struct rdma_ls_resolve_header {
193 * TCLASS u8 199 * TCLASS u8
194 * PKEY u16 cpu 200 * PKEY u16 cpu
195 * QOS_CLASS u16 cpu 201 * QOS_CLASS u16 cpu
202 * IPV4 u32 BE
203 * IPV6 u8[16] BE
196 */ 204 */
197enum { 205enum {
198 LS_NLA_TYPE_UNSPEC = 0, 206 LS_NLA_TYPE_UNSPEC = 0,
@@ -204,6 +212,8 @@ enum {
204 LS_NLA_TYPE_TCLASS, 212 LS_NLA_TYPE_TCLASS,
205 LS_NLA_TYPE_PKEY, 213 LS_NLA_TYPE_PKEY,
206 LS_NLA_TYPE_QOS_CLASS, 214 LS_NLA_TYPE_QOS_CLASS,
215 LS_NLA_TYPE_IPV4,
216 LS_NLA_TYPE_IPV6,
207 LS_NLA_TYPE_MAX 217 LS_NLA_TYPE_MAX
208}; 218};
209 219