From 733d65fe33b3002a6f2694c0fd8bd760dc13141f Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Wed, 8 Aug 2007 15:41:28 -0700 Subject: IB/sa: Add new QoS fields to path record The QoS annex defines new fields for path records. Add them to the ib_sa for consumers that want to use them. Signed-off-by: Sean Hefty Reviewed-by: Or Gerlitz Signed-off-by: Roland Dreier --- include/rdma/ib_sa.h | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h index 5e26b2f53f86..942692b0b92e 100644 --- a/include/rdma/ib_sa.h +++ b/include/rdma/ib_sa.h @@ -109,8 +109,8 @@ enum ib_sa_selector { * Reserved rows are indicated with comments to help maintainability. */ -/* reserved: 0 */ -/* reserved: 1 */ +#define IB_SA_PATH_REC_SERVICE_ID (IB_SA_COMP_MASK( 0) |\ + IB_SA_COMP_MASK( 1)) #define IB_SA_PATH_REC_DGID IB_SA_COMP_MASK( 2) #define IB_SA_PATH_REC_SGID IB_SA_COMP_MASK( 3) #define IB_SA_PATH_REC_DLID IB_SA_COMP_MASK( 4) @@ -123,7 +123,7 @@ enum ib_sa_selector { #define IB_SA_PATH_REC_REVERSIBLE IB_SA_COMP_MASK(11) #define IB_SA_PATH_REC_NUMB_PATH IB_SA_COMP_MASK(12) #define IB_SA_PATH_REC_PKEY IB_SA_COMP_MASK(13) -/* reserved: 14 */ +#define IB_SA_PATH_REC_QOS_CLASS IB_SA_COMP_MASK(14) #define IB_SA_PATH_REC_SL IB_SA_COMP_MASK(15) #define IB_SA_PATH_REC_MTU_SELECTOR IB_SA_COMP_MASK(16) #define IB_SA_PATH_REC_MTU IB_SA_COMP_MASK(17) @@ -134,8 +134,7 @@ enum ib_sa_selector { #define IB_SA_PATH_REC_PREFERENCE IB_SA_COMP_MASK(22) struct ib_sa_path_rec { - /* reserved */ - /* reserved */ + __be64 service_id; union ib_gid dgid; union ib_gid sgid; __be16 dlid; @@ -148,7 +147,7 @@ struct ib_sa_path_rec { int reversible; u8 numb_path; __be16 pkey; - /* reserved */ + __be16 qos_class; u8 sl; u8 mtu_selector; u8 mtu; -- cgit v1.2.2 From a81c994d5eef87ed77cb30d8343d6be296528b3f Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Wed, 8 Aug 2007 15:51:06 -0700 Subject: RDMA/cma: Add ability to specify type of service Provide support to specify a type of service for a communication identifier. A new function call is used when dealing with IPv4 addresses. For IPv6 addresses, the ToS is specified through the traffic class field in the sockaddr_in6 structure. Signed-off-by: Sean Hefty [ The comments Eitan Zahavi and myself have made over the v1 post at were fully addressed. ] Reviewed-by: Or Gerlitz Signed-off-by: Roland Dreier --- include/rdma/rdma_cm.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include') diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h index 2d6a7705eae7..010f876f41d8 100644 --- a/include/rdma/rdma_cm.h +++ b/include/rdma/rdma_cm.h @@ -314,4 +314,18 @@ int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr, */ void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr); +/** + * rdma_set_service_type - Set the type of service associated with a + * connection identifier. + * @id: Communication identifier to associated with service type. + * @tos: Type of service. + * + * The type of service is interpretted as a differentiated service + * field (RFC 2474). The service type should be specified before + * performing route resolution, as existing communication on the + * connection identifier may be unaffected. The type of service + * requested may not be supported by the network to all destinations. + */ +void rdma_set_service_type(struct rdma_cm_id *id, int tos); + #endif /* RDMA_CM_H */ -- cgit v1.2.2 From 7ce86409adcd2fda652b628173a66e905950ece1 Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Wed, 8 Aug 2007 15:51:13 -0700 Subject: RDMA/ucma: Allow user space to set service type Export the ability to set the type of service to user space. Model the interface after setsockopt. Signed-off-by: Sean Hefty Signed-off-by: Roland Dreier --- include/rdma/rdma_user_cm.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include') diff --git a/include/rdma/rdma_user_cm.h b/include/rdma/rdma_user_cm.h index f632b0c007c9..9749c1b34d00 100644 --- a/include/rdma/rdma_user_cm.h +++ b/include/rdma/rdma_user_cm.h @@ -212,4 +212,22 @@ struct rdma_ucm_event_resp { } param; }; +/* Option levels */ +enum { + RDMA_OPTION_ID = 0 +}; + +/* Option details */ +enum { + RDMA_OPTION_ID_TOS = 0 +}; + +struct rdma_ucm_set_option { + __u64 optval; + __u32 id; + __u32 level; + __u32 optname; + __u32 optlen; +}; + #endif /* RDMA_USER_CM_H */ -- cgit v1.2.2 From c8d8beea0383e47c9d65d45f0ca95626ec435fcd Mon Sep 17 00:00:00 2001 From: Joachim Fenkes Date: Thu, 13 Sep 2007 18:15:28 +0200 Subject: IB/umem: Add hugetlb flag to struct ib_umem During ib_umem_get(), determine whether all pages from the memory region are hugetlb pages and report this in the "hugetlb" member. Low-level drivers can use this information if they need it. Signed-off-by: Joachim Fenkes Signed-off-by: Roland Dreier --- include/rdma/ib_umem.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h index c533d6c7903f..22298423cf0b 100644 --- a/include/rdma/ib_umem.h +++ b/include/rdma/ib_umem.h @@ -45,6 +45,7 @@ struct ib_umem { int offset; int page_size; int writable; + int hugetlb; struct list_head chunk_list; struct work_struct work; struct mm_struct *mm; -- cgit v1.2.2 From cd9281d873c91a01af0cb96ff0f75e9905e54403 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Tue, 18 Sep 2007 09:14:18 +0200 Subject: IB/mlx4: Display misc device information under /sys/class/infiniband/ display the following device information under /sys/class/infiniband/mlx4_X: board_id, fw_ver, hw_rev, hca_type. This patch makes this information available to userspace utilities such as ibstat and ibv_devinfo. Signed-off-by: Jack Morgenstein Signed-off-by: Roland Dreier --- include/linux/mlx4/device.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index cfb78fb2c046..a93520c76fde 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -48,6 +48,10 @@ enum { MLX4_MAX_PORTS = 2 }; +enum { + MLX4_BOARD_ID_LEN = 64 +}; + enum { MLX4_DEV_CAP_FLAG_RC = 1 << 0, MLX4_DEV_CAP_FLAG_UC = 1 << 1, @@ -272,6 +276,8 @@ struct mlx4_dev { unsigned long flags; struct mlx4_caps caps; struct radix_tree_root qp_table_tree; + u32 rev_id; + char board_id[MLX4_BOARD_ID_LEN]; }; struct mlx4_init_port_param { -- cgit v1.2.2 From 2be8e3ee8efd6f99ce454115c29d09750915021a Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Tue, 9 Oct 2007 19:59:15 -0700 Subject: IB/umad: Add P_Key index support Add support for setting the P_Key index of sent MADs and getting the P_Key index of received MADs. This requires a change to the layout of the ABI structure struct ib_user_mad_hdr, so to avoid breaking compatibility, we default to the old (unchanged) ABI and add a new ioctl IB_USER_MAD_ENABLE_PKEY that allows applications that are aware of the new ABI to opt into using it. We plan on switching to the new ABI by default in a year or so, and this patch adds a warning that is printed when an application uses the old ABI, to push people towards converting to the new ABI. Signed-off-by: Roland Dreier Reviewed-by: Sean Hefty Reviewed-by: Hal Rosenstock --- include/rdma/ib_user_mad.h | 48 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) (limited to 'include') diff --git a/include/rdma/ib_user_mad.h b/include/rdma/ib_user_mad.h index d66b15ea82c4..2a32043d1abd 100644 --- a/include/rdma/ib_user_mad.h +++ b/include/rdma/ib_user_mad.h @@ -51,8 +51,51 @@ * avoid incompatibility between 32-bit userspace and 64-bit kernels). */ +/** + * ib_user_mad_hdr_old - Old version of MAD packet header without pkey_index + * @id - ID of agent MAD received with/to be sent with + * @status - 0 on successful receive, ETIMEDOUT if no response + * received (transaction ID in data[] will be set to TID of original + * request) (ignored on send) + * @timeout_ms - Milliseconds to wait for response (unset on receive) + * @retries - Number of automatic retries to attempt + * @qpn - Remote QP number received from/to be sent to + * @qkey - Remote Q_Key to be sent with (unset on receive) + * @lid - Remote lid received from/to be sent to + * @sl - Service level received with/to be sent with + * @path_bits - Local path bits received with/to be sent with + * @grh_present - If set, GRH was received/should be sent + * @gid_index - Local GID index to send with (unset on receive) + * @hop_limit - Hop limit in GRH + * @traffic_class - Traffic class in GRH + * @gid - Remote GID in GRH + * @flow_label - Flow label in GRH + */ +struct ib_user_mad_hdr_old { + __u32 id; + __u32 status; + __u32 timeout_ms; + __u32 retries; + __u32 length; + __be32 qpn; + __be32 qkey; + __be16 lid; + __u8 sl; + __u8 path_bits; + __u8 grh_present; + __u8 gid_index; + __u8 hop_limit; + __u8 traffic_class; + __u8 gid[16]; + __be32 flow_label; +}; + /** * ib_user_mad_hdr - MAD packet header + * This layout allows specifying/receiving the P_Key index. To use + * this capability, an application must call the + * IB_USER_MAD_ENABLE_PKEY ioctl on the user MAD file handle before + * any other actions with the file handle. * @id - ID of agent MAD received with/to be sent with * @status - 0 on successful receive, ETIMEDOUT if no response * received (transaction ID in data[] will be set to TID of original @@ -70,6 +113,7 @@ * @traffic_class - Traffic class in GRH * @gid - Remote GID in GRH * @flow_label - Flow label in GRH + * @pkey_index - P_Key index */ struct ib_user_mad_hdr { __u32 id; @@ -88,6 +132,8 @@ struct ib_user_mad_hdr { __u8 traffic_class; __u8 gid[16]; __be32 flow_label; + __u16 pkey_index; + __u8 reserved[6]; }; /** @@ -134,4 +180,6 @@ struct ib_user_mad_reg_req { #define IB_USER_MAD_UNREGISTER_AGENT _IOW(IB_IOCTL_MAGIC, 2, __u32) +#define IB_USER_MAD_ENABLE_PKEY _IO(IB_IOCTL_MAGIC, 3) + #endif /* IB_USER_MAD_H */ -- cgit v1.2.2 From a394f83bdfec10b09d8cb111e622556b2e6fd0de Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Tue, 9 Oct 2007 19:59:15 -0700 Subject: IB/umad: Fix bit ordering and 32-on-64 problems on big endian systems The declaration of struct ib_user_mad_reg_req.method_mask[] exported to userspace was an array of __u32, but the kernel internally treated it as a bitmap made up of longs. This makes a difference for 64-bit big-endian kernels, where numbering the bits in an array of__u32 gives: |31.....0|63....31|95....64|127...96| while numbering the bits in an array of longs gives: |63..............0|127............64| 64-bit userspace can handle this by just treating method_mask[] as an array of longs, but 32-bit userspace is really stuck: the meaning of the bits in method_mask[] depends on whether the kernel is 32-bit or 64-bit, and there's no sane way for userspace to know that. Fix this by updating to make it clear that method_mask[] is an array of longs, and using a compat_ioctl method to convert to an array of 64-bit longs to handle the 32-on-64 problem. This fixes the interface description to match existing behavior (so working binaries continue to work) in almost all situations, and gives consistent semantics in the case of 32-bit userspace that can run on either a 32-bit or 64-bit kernel, so that the same binary can work for both 32-on-32 and 32-on-64 systems. Signed-off-by: Roland Dreier --- include/rdma/ib_user_mad.h | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/rdma/ib_user_mad.h b/include/rdma/ib_user_mad.h index 2a32043d1abd..29d2c7205a90 100644 --- a/include/rdma/ib_user_mad.h +++ b/include/rdma/ib_user_mad.h @@ -147,6 +147,26 @@ struct ib_user_mad { __u64 data[0]; }; +/* + * Earlier versions of this interface definition declared the + * method_mask[] member as an array of __u32 but treated it as a + * bitmap made up of longs in the kernel. This ambiguity meant that + * 32-bit big-endian applications that can run on both 32-bit and + * 64-bit kernels had no consistent ABI to rely on, and 64-bit + * big-endian applications that treated method_mask as being made up + * of 32-bit words would have their bitmap misinterpreted. + * + * To clear up this confusion, we change the declaration of + * method_mask[] to use unsigned long and handle the conversion from + * 32-bit userspace to 64-bit kernel for big-endian systems in the + * compat_ioctl method. Unfortunately, to keep the structure layout + * the same, we need the method_mask[] array to be aligned only to 4 + * bytes even when long is 64 bits, which forces us into this ugly + * typedef. + */ +typedef unsigned long __attribute__((aligned(4))) packed_ulong; +#define IB_USER_MAD_LONGS_PER_METHOD_MASK (128 / (8 * sizeof (long))) + /** * ib_user_mad_reg_req - MAD registration request * @id - Set by the kernel; used to identify agent in future requests. @@ -165,7 +185,7 @@ struct ib_user_mad { */ struct ib_user_mad_reg_req { __u32 id; - __u32 method_mask[4]; + packed_ulong method_mask[IB_USER_MAD_LONGS_PER_METHOD_MASK]; __u8 qpn; __u8 mgmt_class; __u8 mgmt_class_version; -- cgit v1.2.2 From 8ad11fb6b0739e704953e2b0aed453bf7d75d4f6 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Wed, 1 Aug 2007 12:29:05 +0300 Subject: IB/mlx4: Implement FMRs Implement FMRs for mlx4. This is an adaptation of code from mthca. Signed-off-by: Jack Morgenstein Signed-off-by: Michael S. Tsirkin Signed-off-by: Roland Dreier --- include/linux/mlx4/device.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index a93520c76fde..222815d91c40 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -214,6 +214,17 @@ struct mlx4_mr { int enabled; }; +struct mlx4_fmr { + struct mlx4_mr mr; + struct mlx4_mpt_entry *mpt; + __be64 *mtts; + dma_addr_t dma_handle; + int max_pages; + int max_maps; + int maps; + u8 page_shift; +}; + struct mlx4_uar { unsigned long pfn; int index; @@ -337,4 +348,14 @@ int mlx4_CLOSE_PORT(struct mlx4_dev *dev, int port); int mlx4_multicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16]); int mlx4_multicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16]); +int mlx4_map_phys_fmr(struct mlx4_dev *dev, struct mlx4_fmr *fmr, u64 *page_list, + int npages, u64 iova, u32 *lkey, u32 *rkey); +int mlx4_fmr_alloc(struct mlx4_dev *dev, u32 pd, u32 access, int max_pages, + int max_maps, u8 page_shift, struct mlx4_fmr *fmr); +int mlx4_fmr_enable(struct mlx4_dev *dev, struct mlx4_fmr *fmr); +void mlx4_fmr_unmap(struct mlx4_dev *dev, struct mlx4_fmr *fmr, + u32 *lkey, u32 *rkey); +int mlx4_fmr_free(struct mlx4_dev *dev, struct mlx4_fmr *fmr); +int mlx4_SYNC_TPT(struct mlx4_dev *dev); + #endif /* MLX4_DEVICE_H */ -- cgit v1.2.2 From de98b693e9857e183679cd2f49b3c30d2bc57629 Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Wed, 1 Aug 2007 13:49:53 -0700 Subject: IB/cm: Modify interface to send MRAs in response to duplicate messages The IB CM provides a message received acknowledged (MRA) message that can be sent to indicate that a REQ or REP message has been received, but will require more time to process than the timeout specified by those messages. In many cases, the application may not know how long it will take to respond to a CM message, but the majority of the time, it will usually respond before a retry has been sent. Rather than sending an MRA in response to all messages just to handle the case where a longer timeout is needed, it is more efficient to queue the MRA for sending in case a duplicate message is received. This avoids sending an MRA when it is not needed, but limits the number of times that a REQ or REP will be resent. It also provides for a simpler implementation than generating the MRA based on a timer event. (That is, trying to send the MRA after receiving the first REQ or REP if a response has not been generated, so that it is received at the remote side before a duplicate REQ or REP has been received) Signed-off-by: Sean Hefty Signed-off-by: Roland Dreier --- include/rdma/ib_cm.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/rdma/ib_cm.h b/include/rdma/ib_cm.h index 12243e80c706..a627c8682d2f 100644 --- a/include/rdma/ib_cm.h +++ b/include/rdma/ib_cm.h @@ -477,12 +477,15 @@ int ib_send_cm_rej(struct ib_cm_id *cm_id, const void *private_data, u8 private_data_len); +#define IB_CM_MRA_FLAG_DELAY 0x80 /* Send MRA only after a duplicate msg */ + /** * ib_send_cm_mra - Sends a message receipt acknowledgement to a connection * message. * @cm_id: Connection identifier associated with the connection message. - * @service_timeout: The maximum time required for the sender to reply to - * to the connection message. + * @service_timeout: The lower 5-bits specify the maximum time required for + * the sender to reply to to the connection message. The upper 3-bits + * specify additional control flags. * @private_data: Optional user-defined private data sent with the * message receipt acknowledgement. * @private_data_len: Size of the private data buffer, in bytes. -- cgit v1.2.2