aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJack Morgenstein <jackm@mellanox.co.il>2006-03-04 00:54:13 -0500
committerRoland Dreier <rolandd@cisco.com>2006-03-20 13:08:23 -0500
commitf36e1793e25513380cae5958a9164d4cc4458ad0 (patch)
treeaa31d34ee07971645af6f21068709166420caee2
parent6ecb0c849625e830ab96495d473bb704812c30e1 (diff)
IB/umad: Add support for large RMPP transfers
Add support for sending and receiving large RMPP transfers. The old code supports transfers only as large as a single contiguous kernel memory allocation. This patch uses linked list of memory buffers when sending and receiving data to avoid needing contiguous pages for larger transfers. Receive side: copy the arriving MADs in chunks instead of coalescing to one large buffer in kernel space. Send side: split a multipacket MAD buffer to a list of segments, (multipacket_list) and send these using a gather list of size 2. Also, save pointer to last sent segment, and retrieve requested segments by walking list starting at last sent segment. Finally, save pointer to last-acked segment. When retrying, retrieve segments for resending relative to this pointer. When updating last ack, start at this pointer. Signed-off-by: Jack Morgenstein <jackm@mellanox.co.il> Signed-off-by: Sean Hefty <sean.hefty@intel.com> Signed-off-by: Roland Dreier <rolandd@cisco.com>
-rw-r--r--drivers/infiniband/core/mad.c166
-rw-r--r--drivers/infiniband/core/mad_priv.h16
-rw-r--r--drivers/infiniband/core/mad_rmpp.c148
-rw-r--r--drivers/infiniband/core/user_mad.c225
-rw-r--r--include/rdma/ib_mad.h48
5 files changed, 376 insertions, 227 deletions
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index 445ad0dda213..16549add8e8f 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -31,7 +31,7 @@
31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32 * SOFTWARE. 32 * SOFTWARE.
33 * 33 *
34 * $Id: mad.c 2817 2005-07-07 11:29:26Z halr $ 34 * $Id: mad.c 5596 2006-03-03 01:00:07Z sean.hefty $
35 */ 35 */
36#include <linux/dma-mapping.h> 36#include <linux/dma-mapping.h>
37 37
@@ -765,18 +765,67 @@ out:
765 return ret; 765 return ret;
766} 766}
767 767
768static int get_buf_length(int hdr_len, int data_len) 768static int get_pad_size(int hdr_len, int data_len)
769{ 769{
770 int seg_size, pad; 770 int seg_size, pad;
771 771
772 seg_size = sizeof(struct ib_mad) - hdr_len; 772 seg_size = sizeof(struct ib_mad) - hdr_len;
773 if (data_len && seg_size) { 773 if (data_len && seg_size) {
774 pad = seg_size - data_len % seg_size; 774 pad = seg_size - data_len % seg_size;
775 if (pad == seg_size) 775 return pad == seg_size ? 0 : pad;
776 pad = 0;
777 } else 776 } else
778 pad = seg_size; 777 return seg_size;
779 return hdr_len + data_len + pad; 778}
779
780static void free_send_rmpp_list(struct ib_mad_send_wr_private *mad_send_wr)
781{
782 struct ib_rmpp_segment *s, *t;
783
784 list_for_each_entry_safe(s, t, &mad_send_wr->rmpp_list, list) {
785 list_del(&s->list);
786 kfree(s);
787 }
788}
789
790static int alloc_send_rmpp_list(struct ib_mad_send_wr_private *send_wr,
791 gfp_t gfp_mask)
792{
793 struct ib_mad_send_buf *send_buf = &send_wr->send_buf;
794 struct ib_rmpp_mad *rmpp_mad = send_buf->mad;
795 struct ib_rmpp_segment *seg = NULL;
796 int left, seg_size, pad;
797
798 send_buf->seg_size = sizeof (struct ib_mad) - send_buf->hdr_len;
799 seg_size = send_buf->seg_size;
800 pad = send_wr->pad;
801
802 /* Allocate data segments. */
803 for (left = send_buf->data_len + pad; left > 0; left -= seg_size) {
804 seg = kmalloc(sizeof (*seg) + seg_size, gfp_mask);
805 if (!seg) {
806 printk(KERN_ERR "alloc_send_rmpp_segs: RMPP mem "
807 "alloc failed for len %zd, gfp %#x\n",
808 sizeof (*seg) + seg_size, gfp_mask);
809 free_send_rmpp_list(send_wr);
810 return -ENOMEM;
811 }
812 seg->num = ++send_buf->seg_count;
813 list_add_tail(&seg->list, &send_wr->rmpp_list);
814 }
815
816 /* Zero any padding */
817 if (pad)
818 memset(seg->data + seg_size - pad, 0, pad);
819
820 rmpp_mad->rmpp_hdr.rmpp_version = send_wr->mad_agent_priv->
821 agent.rmpp_version;
822 rmpp_mad->rmpp_hdr.rmpp_type = IB_MGMT_RMPP_TYPE_DATA;
823 ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr, IB_MGMT_RMPP_FLAG_ACTIVE);
824
825 send_wr->cur_seg = container_of(send_wr->rmpp_list.next,
826 struct ib_rmpp_segment, list);
827 send_wr->last_ack_seg = send_wr->cur_seg;
828 return 0;
780} 829}
781 830
782struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent, 831struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent,
@@ -787,32 +836,40 @@ struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent,
787{ 836{
788 struct ib_mad_agent_private *mad_agent_priv; 837 struct ib_mad_agent_private *mad_agent_priv;
789 struct ib_mad_send_wr_private *mad_send_wr; 838 struct ib_mad_send_wr_private *mad_send_wr;
790 int buf_size; 839 int pad, message_size, ret, size;
791 void *buf; 840 void *buf;
792 841
793 mad_agent_priv = container_of(mad_agent, struct ib_mad_agent_private, 842 mad_agent_priv = container_of(mad_agent, struct ib_mad_agent_private,
794 agent); 843 agent);
795 buf_size = get_buf_length(hdr_len, data_len); 844 pad = get_pad_size(hdr_len, data_len);
845 message_size = hdr_len + data_len + pad;
796 846
797 if ((!mad_agent->rmpp_version && 847 if ((!mad_agent->rmpp_version &&
798 (rmpp_active || buf_size > sizeof(struct ib_mad))) || 848 (rmpp_active || message_size > sizeof(struct ib_mad))) ||
799 (!rmpp_active && buf_size > sizeof(struct ib_mad))) 849 (!rmpp_active && message_size > sizeof(struct ib_mad)))
800 return ERR_PTR(-EINVAL); 850 return ERR_PTR(-EINVAL);
801 851
802 buf = kzalloc(sizeof *mad_send_wr + buf_size, gfp_mask); 852 size = rmpp_active ? hdr_len : sizeof(struct ib_mad);
853 buf = kzalloc(sizeof *mad_send_wr + size, gfp_mask);
803 if (!buf) 854 if (!buf)
804 return ERR_PTR(-ENOMEM); 855 return ERR_PTR(-ENOMEM);
805 856
806 mad_send_wr = buf + buf_size; 857 mad_send_wr = buf + size;
858 INIT_LIST_HEAD(&mad_send_wr->rmpp_list);
807 mad_send_wr->send_buf.mad = buf; 859 mad_send_wr->send_buf.mad = buf;
860 mad_send_wr->send_buf.hdr_len = hdr_len;
861 mad_send_wr->send_buf.data_len = data_len;
862 mad_send_wr->pad = pad;
808 863
809 mad_send_wr->mad_agent_priv = mad_agent_priv; 864 mad_send_wr->mad_agent_priv = mad_agent_priv;
810 mad_send_wr->sg_list[0].length = buf_size; 865 mad_send_wr->sg_list[0].length = hdr_len;
811 mad_send_wr->sg_list[0].lkey = mad_agent->mr->lkey; 866 mad_send_wr->sg_list[0].lkey = mad_agent->mr->lkey;
867 mad_send_wr->sg_list[1].length = sizeof(struct ib_mad) - hdr_len;
868 mad_send_wr->sg_list[1].lkey = mad_agent->mr->lkey;
812 869
813 mad_send_wr->send_wr.wr_id = (unsigned long) mad_send_wr; 870 mad_send_wr->send_wr.wr_id = (unsigned long) mad_send_wr;
814 mad_send_wr->send_wr.sg_list = mad_send_wr->sg_list; 871 mad_send_wr->send_wr.sg_list = mad_send_wr->sg_list;
815 mad_send_wr->send_wr.num_sge = 1; 872 mad_send_wr->send_wr.num_sge = 2;
816 mad_send_wr->send_wr.opcode = IB_WR_SEND; 873 mad_send_wr->send_wr.opcode = IB_WR_SEND;
817 mad_send_wr->send_wr.send_flags = IB_SEND_SIGNALED; 874 mad_send_wr->send_wr.send_flags = IB_SEND_SIGNALED;
818 mad_send_wr->send_wr.wr.ud.remote_qpn = remote_qpn; 875 mad_send_wr->send_wr.wr.ud.remote_qpn = remote_qpn;
@@ -820,13 +877,11 @@ struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent,
820 mad_send_wr->send_wr.wr.ud.pkey_index = pkey_index; 877 mad_send_wr->send_wr.wr.ud.pkey_index = pkey_index;
821 878
822 if (rmpp_active) { 879 if (rmpp_active) {
823 struct ib_rmpp_mad *rmpp_mad = mad_send_wr->send_buf.mad; 880 ret = alloc_send_rmpp_list(mad_send_wr, gfp_mask);
824 rmpp_mad->rmpp_hdr.paylen_newwin = cpu_to_be32(hdr_len - 881 if (ret) {
825 IB_MGMT_RMPP_HDR + data_len); 882 kfree(buf);
826 rmpp_mad->rmpp_hdr.rmpp_version = mad_agent->rmpp_version; 883 return ERR_PTR(ret);
827 rmpp_mad->rmpp_hdr.rmpp_type = IB_MGMT_RMPP_TYPE_DATA; 884 }
828 ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr,
829 IB_MGMT_RMPP_FLAG_ACTIVE);
830 } 885 }
831 886
832 mad_send_wr->send_buf.mad_agent = mad_agent; 887 mad_send_wr->send_buf.mad_agent = mad_agent;
@@ -835,14 +890,50 @@ struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent,
835} 890}
836EXPORT_SYMBOL(ib_create_send_mad); 891EXPORT_SYMBOL(ib_create_send_mad);
837 892
893void *ib_get_rmpp_segment(struct ib_mad_send_buf *send_buf, int seg_num)
894{
895 struct ib_mad_send_wr_private *mad_send_wr;
896 struct list_head *list;
897
898 mad_send_wr = container_of(send_buf, struct ib_mad_send_wr_private,
899 send_buf);
900 list = &mad_send_wr->cur_seg->list;
901
902 if (mad_send_wr->cur_seg->num < seg_num) {
903 list_for_each_entry(mad_send_wr->cur_seg, list, list)
904 if (mad_send_wr->cur_seg->num == seg_num)
905 break;
906 } else if (mad_send_wr->cur_seg->num > seg_num) {
907 list_for_each_entry_reverse(mad_send_wr->cur_seg, list, list)
908 if (mad_send_wr->cur_seg->num == seg_num)
909 break;
910 }
911 return mad_send_wr->cur_seg->data;
912}
913EXPORT_SYMBOL(ib_get_rmpp_segment);
914
915static inline void *ib_get_payload(struct ib_mad_send_wr_private *mad_send_wr)
916{
917 if (mad_send_wr->send_buf.seg_count)
918 return ib_get_rmpp_segment(&mad_send_wr->send_buf,
919 mad_send_wr->seg_num);
920 else
921 return mad_send_wr->send_buf.mad +
922 mad_send_wr->send_buf.hdr_len;
923}
924
838void ib_free_send_mad(struct ib_mad_send_buf *send_buf) 925void ib_free_send_mad(struct ib_mad_send_buf *send_buf)
839{ 926{
840 struct ib_mad_agent_private *mad_agent_priv; 927 struct ib_mad_agent_private *mad_agent_priv;
928 struct ib_mad_send_wr_private *mad_send_wr;
841 929
842 mad_agent_priv = container_of(send_buf->mad_agent, 930 mad_agent_priv = container_of(send_buf->mad_agent,
843 struct ib_mad_agent_private, agent); 931 struct ib_mad_agent_private, agent);
844 kfree(send_buf->mad); 932 mad_send_wr = container_of(send_buf, struct ib_mad_send_wr_private,
933 send_buf);
845 934
935 free_send_rmpp_list(mad_send_wr);
936 kfree(send_buf->mad);
846 if (atomic_dec_and_test(&mad_agent_priv->refcount)) 937 if (atomic_dec_and_test(&mad_agent_priv->refcount))
847 wake_up(&mad_agent_priv->wait); 938 wake_up(&mad_agent_priv->wait);
848} 939}
@@ -865,10 +956,17 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr)
865 956
866 mad_agent = mad_send_wr->send_buf.mad_agent; 957 mad_agent = mad_send_wr->send_buf.mad_agent;
867 sge = mad_send_wr->sg_list; 958 sge = mad_send_wr->sg_list;
868 sge->addr = dma_map_single(mad_agent->device->dma_device, 959 sge[0].addr = dma_map_single(mad_agent->device->dma_device,
869 mad_send_wr->send_buf.mad, sge->length, 960 mad_send_wr->send_buf.mad,
870 DMA_TO_DEVICE); 961 sge[0].length,
871 pci_unmap_addr_set(mad_send_wr, mapping, sge->addr); 962 DMA_TO_DEVICE);
963 pci_unmap_addr_set(mad_send_wr, header_mapping, sge[0].addr);
964
965 sge[1].addr = dma_map_single(mad_agent->device->dma_device,
966 ib_get_payload(mad_send_wr),
967 sge[1].length,
968 DMA_TO_DEVICE);
969 pci_unmap_addr_set(mad_send_wr, payload_mapping, sge[1].addr);
872 970
873 spin_lock_irqsave(&qp_info->send_queue.lock, flags); 971 spin_lock_irqsave(&qp_info->send_queue.lock, flags);
874 if (qp_info->send_queue.count < qp_info->send_queue.max_active) { 972 if (qp_info->send_queue.count < qp_info->send_queue.max_active) {
@@ -885,11 +983,14 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr)
885 list_add_tail(&mad_send_wr->mad_list.list, list); 983 list_add_tail(&mad_send_wr->mad_list.list, list);
886 } 984 }
887 spin_unlock_irqrestore(&qp_info->send_queue.lock, flags); 985 spin_unlock_irqrestore(&qp_info->send_queue.lock, flags);
888 if (ret) 986 if (ret) {
889 dma_unmap_single(mad_agent->device->dma_device, 987 dma_unmap_single(mad_agent->device->dma_device,
890 pci_unmap_addr(mad_send_wr, mapping), 988 pci_unmap_addr(mad_send_wr, header_mapping),
891 sge->length, DMA_TO_DEVICE); 989 sge[0].length, DMA_TO_DEVICE);
892 990 dma_unmap_single(mad_agent->device->dma_device,
991 pci_unmap_addr(mad_send_wr, payload_mapping),
992 sge[1].length, DMA_TO_DEVICE);
993 }
893 return ret; 994 return ret;
894} 995}
895 996
@@ -1860,8 +1961,11 @@ static void ib_mad_send_done_handler(struct ib_mad_port_private *port_priv,
1860 1961
1861retry: 1962retry:
1862 dma_unmap_single(mad_send_wr->send_buf.mad_agent->device->dma_device, 1963 dma_unmap_single(mad_send_wr->send_buf.mad_agent->device->dma_device,
1863 pci_unmap_addr(mad_send_wr, mapping), 1964 pci_unmap_addr(mad_send_wr, header_mapping),
1864 mad_send_wr->sg_list[0].length, DMA_TO_DEVICE); 1965 mad_send_wr->sg_list[0].length, DMA_TO_DEVICE);
1966 dma_unmap_single(mad_send_wr->send_buf.mad_agent->device->dma_device,
1967 pci_unmap_addr(mad_send_wr, payload_mapping),
1968 mad_send_wr->sg_list[1].length, DMA_TO_DEVICE);
1865 queued_send_wr = NULL; 1969 queued_send_wr = NULL;
1866 spin_lock_irqsave(&send_queue->lock, flags); 1970 spin_lock_irqsave(&send_queue->lock, flags);
1867 list_del(&mad_list->list); 1971 list_del(&mad_list->list);
diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h
index 570f78682af3..a7125d4b5ccf 100644
--- a/drivers/infiniband/core/mad_priv.h
+++ b/drivers/infiniband/core/mad_priv.h
@@ -31,7 +31,7 @@
31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32 * SOFTWARE. 32 * SOFTWARE.
33 * 33 *
34 * $Id: mad_priv.h 2730 2005-06-28 16:43:03Z sean.hefty $ 34 * $Id: mad_priv.h 5596 2006-03-03 01:00:07Z sean.hefty $
35 */ 35 */
36 36
37#ifndef __IB_MAD_PRIV_H__ 37#ifndef __IB_MAD_PRIV_H__
@@ -85,6 +85,12 @@ struct ib_mad_private {
85 } mad; 85 } mad;
86} __attribute__ ((packed)); 86} __attribute__ ((packed));
87 87
88struct ib_rmpp_segment {
89 struct list_head list;
90 u32 num;
91 u8 data[0];
92};
93
88struct ib_mad_agent_private { 94struct ib_mad_agent_private {
89 struct list_head agent_list; 95 struct list_head agent_list;
90 struct ib_mad_agent agent; 96 struct ib_mad_agent agent;
@@ -119,7 +125,8 @@ struct ib_mad_send_wr_private {
119 struct list_head agent_list; 125 struct list_head agent_list;
120 struct ib_mad_agent_private *mad_agent_priv; 126 struct ib_mad_agent_private *mad_agent_priv;
121 struct ib_mad_send_buf send_buf; 127 struct ib_mad_send_buf send_buf;
122 DECLARE_PCI_UNMAP_ADDR(mapping) 128 DECLARE_PCI_UNMAP_ADDR(header_mapping)
129 DECLARE_PCI_UNMAP_ADDR(payload_mapping)
123 struct ib_send_wr send_wr; 130 struct ib_send_wr send_wr;
124 struct ib_sge sg_list[IB_MAD_SEND_REQ_MAX_SG]; 131 struct ib_sge sg_list[IB_MAD_SEND_REQ_MAX_SG];
125 __be64 tid; 132 __be64 tid;
@@ -130,11 +137,12 @@ struct ib_mad_send_wr_private {
130 enum ib_wc_status status; 137 enum ib_wc_status status;
131 138
132 /* RMPP control */ 139 /* RMPP control */
140 struct list_head rmpp_list;
141 struct ib_rmpp_segment *last_ack_seg;
142 struct ib_rmpp_segment *cur_seg;
133 int last_ack; 143 int last_ack;
134 int seg_num; 144 int seg_num;
135 int newwin; 145 int newwin;
136 int total_seg;
137 int data_offset;
138 int pad; 146 int pad;
139}; 147};
140 148
diff --git a/drivers/infiniband/core/mad_rmpp.c b/drivers/infiniband/core/mad_rmpp.c
index 3249e1d8c07b..bacfdd5bddad 100644
--- a/drivers/infiniband/core/mad_rmpp.c
+++ b/drivers/infiniband/core/mad_rmpp.c
@@ -111,14 +111,14 @@ static int data_offset(u8 mgmt_class)
111 return IB_MGMT_RMPP_HDR; 111 return IB_MGMT_RMPP_HDR;
112} 112}
113 113
114static void format_ack(struct ib_rmpp_mad *ack, 114static void format_ack(struct ib_mad_send_buf *msg,
115 struct ib_rmpp_mad *data, 115 struct ib_rmpp_mad *data,
116 struct mad_rmpp_recv *rmpp_recv) 116 struct mad_rmpp_recv *rmpp_recv)
117{ 117{
118 struct ib_rmpp_mad *ack = msg->mad;
118 unsigned long flags; 119 unsigned long flags;
119 120
120 memcpy(&ack->mad_hdr, &data->mad_hdr, 121 memcpy(ack, &data->mad_hdr, msg->hdr_len);
121 data_offset(data->mad_hdr.mgmt_class));
122 122
123 ack->mad_hdr.method ^= IB_MGMT_METHOD_RESP; 123 ack->mad_hdr.method ^= IB_MGMT_METHOD_RESP;
124 ack->rmpp_hdr.rmpp_type = IB_MGMT_RMPP_TYPE_ACK; 124 ack->rmpp_hdr.rmpp_type = IB_MGMT_RMPP_TYPE_ACK;
@@ -135,16 +135,16 @@ static void ack_recv(struct mad_rmpp_recv *rmpp_recv,
135 struct ib_mad_recv_wc *recv_wc) 135 struct ib_mad_recv_wc *recv_wc)
136{ 136{
137 struct ib_mad_send_buf *msg; 137 struct ib_mad_send_buf *msg;
138 int ret; 138 int ret, hdr_len;
139 139
140 hdr_len = data_offset(recv_wc->recv_buf.mad->mad_hdr.mgmt_class);
140 msg = ib_create_send_mad(&rmpp_recv->agent->agent, recv_wc->wc->src_qp, 141 msg = ib_create_send_mad(&rmpp_recv->agent->agent, recv_wc->wc->src_qp,
141 recv_wc->wc->pkey_index, 1, IB_MGMT_RMPP_HDR, 142 recv_wc->wc->pkey_index, 1, hdr_len,
142 IB_MGMT_RMPP_DATA, GFP_KERNEL); 143 0, GFP_KERNEL);
143 if (!msg) 144 if (!msg)
144 return; 145 return;
145 146
146 format_ack(msg->mad, (struct ib_rmpp_mad *) recv_wc->recv_buf.mad, 147 format_ack(msg, (struct ib_rmpp_mad *) recv_wc->recv_buf.mad, rmpp_recv);
147 rmpp_recv);
148 msg->ah = rmpp_recv->ah; 148 msg->ah = rmpp_recv->ah;
149 ret = ib_post_send_mad(msg, NULL); 149 ret = ib_post_send_mad(msg, NULL);
150 if (ret) 150 if (ret)
@@ -156,16 +156,17 @@ static struct ib_mad_send_buf *alloc_response_msg(struct ib_mad_agent *agent,
156{ 156{
157 struct ib_mad_send_buf *msg; 157 struct ib_mad_send_buf *msg;
158 struct ib_ah *ah; 158 struct ib_ah *ah;
159 int hdr_len;
159 160
160 ah = ib_create_ah_from_wc(agent->qp->pd, recv_wc->wc, 161 ah = ib_create_ah_from_wc(agent->qp->pd, recv_wc->wc,
161 recv_wc->recv_buf.grh, agent->port_num); 162 recv_wc->recv_buf.grh, agent->port_num);
162 if (IS_ERR(ah)) 163 if (IS_ERR(ah))
163 return (void *) ah; 164 return (void *) ah;
164 165
166 hdr_len = data_offset(recv_wc->recv_buf.mad->mad_hdr.mgmt_class);
165 msg = ib_create_send_mad(agent, recv_wc->wc->src_qp, 167 msg = ib_create_send_mad(agent, recv_wc->wc->src_qp,
166 recv_wc->wc->pkey_index, 1, 168 recv_wc->wc->pkey_index, 1,
167 IB_MGMT_RMPP_HDR, IB_MGMT_RMPP_DATA, 169 hdr_len, 0, GFP_KERNEL);
168 GFP_KERNEL);
169 if (IS_ERR(msg)) 170 if (IS_ERR(msg))
170 ib_destroy_ah(ah); 171 ib_destroy_ah(ah);
171 else 172 else
@@ -195,8 +196,7 @@ static void nack_recv(struct ib_mad_agent_private *agent,
195 return; 196 return;
196 197
197 rmpp_mad = msg->mad; 198 rmpp_mad = msg->mad;
198 memcpy(rmpp_mad, recv_wc->recv_buf.mad, 199 memcpy(rmpp_mad, recv_wc->recv_buf.mad, msg->hdr_len);
199 data_offset(recv_wc->recv_buf.mad->mad_hdr.mgmt_class));
200 200
201 rmpp_mad->mad_hdr.method ^= IB_MGMT_METHOD_RESP; 201 rmpp_mad->mad_hdr.method ^= IB_MGMT_METHOD_RESP;
202 rmpp_mad->rmpp_hdr.rmpp_version = IB_MGMT_RMPP_VERSION; 202 rmpp_mad->rmpp_hdr.rmpp_version = IB_MGMT_RMPP_VERSION;
@@ -433,44 +433,6 @@ static struct ib_mad_recv_wc * complete_rmpp(struct mad_rmpp_recv *rmpp_recv)
433 return rmpp_wc; 433 return rmpp_wc;
434} 434}
435 435
436void ib_coalesce_recv_mad(struct ib_mad_recv_wc *mad_recv_wc, void *buf)
437{
438 struct ib_mad_recv_buf *seg_buf;
439 struct ib_rmpp_mad *rmpp_mad;
440 void *data;
441 int size, len, offset;
442 u8 flags;
443
444 len = mad_recv_wc->mad_len;
445 if (len <= sizeof(struct ib_mad)) {
446 memcpy(buf, mad_recv_wc->recv_buf.mad, len);
447 return;
448 }
449
450 offset = data_offset(mad_recv_wc->recv_buf.mad->mad_hdr.mgmt_class);
451
452 list_for_each_entry(seg_buf, &mad_recv_wc->rmpp_list, list) {
453 rmpp_mad = (struct ib_rmpp_mad *)seg_buf->mad;
454 flags = ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr);
455
456 if (flags & IB_MGMT_RMPP_FLAG_FIRST) {
457 data = rmpp_mad;
458 size = sizeof(*rmpp_mad);
459 } else {
460 data = (void *) rmpp_mad + offset;
461 if (flags & IB_MGMT_RMPP_FLAG_LAST)
462 size = len;
463 else
464 size = sizeof(*rmpp_mad) - offset;
465 }
466
467 memcpy(buf, data, size);
468 len -= size;
469 buf += size;
470 }
471}
472EXPORT_SYMBOL(ib_coalesce_recv_mad);
473
474static struct ib_mad_recv_wc * 436static struct ib_mad_recv_wc *
475continue_rmpp(struct ib_mad_agent_private *agent, 437continue_rmpp(struct ib_mad_agent_private *agent,
476 struct ib_mad_recv_wc *mad_recv_wc) 438 struct ib_mad_recv_wc *mad_recv_wc)
@@ -570,50 +532,33 @@ start_rmpp(struct ib_mad_agent_private *agent,
570 return mad_recv_wc; 532 return mad_recv_wc;
571} 533}
572 534
573static inline u64 get_seg_addr(struct ib_mad_send_wr_private *mad_send_wr)
574{
575 return mad_send_wr->sg_list[0].addr + mad_send_wr->data_offset +
576 (sizeof(struct ib_rmpp_mad) - mad_send_wr->data_offset) *
577 (mad_send_wr->seg_num - 1);
578}
579
580static int send_next_seg(struct ib_mad_send_wr_private *mad_send_wr) 535static int send_next_seg(struct ib_mad_send_wr_private *mad_send_wr)
581{ 536{
582 struct ib_rmpp_mad *rmpp_mad; 537 struct ib_rmpp_mad *rmpp_mad;
583 int timeout; 538 int timeout;
584 u32 paylen; 539 u32 paylen = 0;
585 540
586 rmpp_mad = mad_send_wr->send_buf.mad; 541 rmpp_mad = mad_send_wr->send_buf.mad;
587 ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr, IB_MGMT_RMPP_FLAG_ACTIVE); 542 ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr, IB_MGMT_RMPP_FLAG_ACTIVE);
588 rmpp_mad->rmpp_hdr.seg_num = cpu_to_be32(mad_send_wr->seg_num); 543 rmpp_mad->rmpp_hdr.seg_num = cpu_to_be32(++mad_send_wr->seg_num);
589 544
590 if (mad_send_wr->seg_num == 1) { 545 if (mad_send_wr->seg_num == 1) {
591 rmpp_mad->rmpp_hdr.rmpp_rtime_flags |= IB_MGMT_RMPP_FLAG_FIRST; 546 rmpp_mad->rmpp_hdr.rmpp_rtime_flags |= IB_MGMT_RMPP_FLAG_FIRST;
592 paylen = mad_send_wr->total_seg * IB_MGMT_RMPP_DATA - 547 paylen = mad_send_wr->send_buf.seg_count * IB_MGMT_RMPP_DATA -
593 mad_send_wr->pad; 548 mad_send_wr->pad;
594 rmpp_mad->rmpp_hdr.paylen_newwin = cpu_to_be32(paylen);
595 mad_send_wr->sg_list[0].length = sizeof(struct ib_rmpp_mad);
596 } else {
597 mad_send_wr->send_wr.num_sge = 2;
598 mad_send_wr->sg_list[0].length = mad_send_wr->data_offset;
599 mad_send_wr->sg_list[1].addr = get_seg_addr(mad_send_wr);
600 mad_send_wr->sg_list[1].length = sizeof(struct ib_rmpp_mad) -
601 mad_send_wr->data_offset;
602 mad_send_wr->sg_list[1].lkey = mad_send_wr->sg_list[0].lkey;
603 rmpp_mad->rmpp_hdr.paylen_newwin = 0;
604 } 549 }
605 550
606 if (mad_send_wr->seg_num == mad_send_wr->total_seg) { 551 if (mad_send_wr->seg_num == mad_send_wr->send_buf.seg_count) {
607 rmpp_mad->rmpp_hdr.rmpp_rtime_flags |= IB_MGMT_RMPP_FLAG_LAST; 552 rmpp_mad->rmpp_hdr.rmpp_rtime_flags |= IB_MGMT_RMPP_FLAG_LAST;
608 paylen = IB_MGMT_RMPP_DATA - mad_send_wr->pad; 553 paylen = IB_MGMT_RMPP_DATA - mad_send_wr->pad;
609 rmpp_mad->rmpp_hdr.paylen_newwin = cpu_to_be32(paylen);
610 } 554 }
555 rmpp_mad->rmpp_hdr.paylen_newwin = cpu_to_be32(paylen);
611 556
612 /* 2 seconds for an ACK until we can find the packet lifetime */ 557 /* 2 seconds for an ACK until we can find the packet lifetime */
613 timeout = mad_send_wr->send_buf.timeout_ms; 558 timeout = mad_send_wr->send_buf.timeout_ms;
614 if (!timeout || timeout > 2000) 559 if (!timeout || timeout > 2000)
615 mad_send_wr->timeout = msecs_to_jiffies(2000); 560 mad_send_wr->timeout = msecs_to_jiffies(2000);
616 mad_send_wr->seg_num++; 561
617 return ib_send_mad(mad_send_wr); 562 return ib_send_mad(mad_send_wr);
618} 563}
619 564
@@ -629,7 +574,7 @@ static void abort_send(struct ib_mad_agent_private *agent, __be64 tid,
629 if (!mad_send_wr) 574 if (!mad_send_wr)
630 goto out; /* Unmatched send */ 575 goto out; /* Unmatched send */
631 576
632 if ((mad_send_wr->last_ack == mad_send_wr->total_seg) || 577 if ((mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count) ||
633 (!mad_send_wr->timeout) || (mad_send_wr->status != IB_WC_SUCCESS)) 578 (!mad_send_wr->timeout) || (mad_send_wr->status != IB_WC_SUCCESS))
634 goto out; /* Send is already done */ 579 goto out; /* Send is already done */
635 580
@@ -645,6 +590,18 @@ out:
645 spin_unlock_irqrestore(&agent->lock, flags); 590 spin_unlock_irqrestore(&agent->lock, flags);
646} 591}
647 592
593static inline void adjust_last_ack(struct ib_mad_send_wr_private *wr,
594 int seg_num)
595{
596 struct list_head *list;
597
598 wr->last_ack = seg_num;
599 list = &wr->last_ack_seg->list;
600 list_for_each_entry(wr->last_ack_seg, list, list)
601 if (wr->last_ack_seg->num == seg_num)
602 break;
603}
604
648static void process_rmpp_ack(struct ib_mad_agent_private *agent, 605static void process_rmpp_ack(struct ib_mad_agent_private *agent,
649 struct ib_mad_recv_wc *mad_recv_wc) 606 struct ib_mad_recv_wc *mad_recv_wc)
650{ 607{
@@ -675,11 +632,12 @@ static void process_rmpp_ack(struct ib_mad_agent_private *agent,
675 if (!mad_send_wr) 632 if (!mad_send_wr)
676 goto out; /* Unmatched ACK */ 633 goto out; /* Unmatched ACK */
677 634
678 if ((mad_send_wr->last_ack == mad_send_wr->total_seg) || 635 if ((mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count) ||
679 (!mad_send_wr->timeout) || (mad_send_wr->status != IB_WC_SUCCESS)) 636 (!mad_send_wr->timeout) || (mad_send_wr->status != IB_WC_SUCCESS))
680 goto out; /* Send is already done */ 637 goto out; /* Send is already done */
681 638
682 if (seg_num > mad_send_wr->total_seg || seg_num > mad_send_wr->newwin) { 639 if (seg_num > mad_send_wr->send_buf.seg_count ||
640 seg_num > mad_send_wr->newwin) {
683 spin_unlock_irqrestore(&agent->lock, flags); 641 spin_unlock_irqrestore(&agent->lock, flags);
684 abort_send(agent, rmpp_mad->mad_hdr.tid, 642 abort_send(agent, rmpp_mad->mad_hdr.tid,
685 IB_MGMT_RMPP_STATUS_S2B); 643 IB_MGMT_RMPP_STATUS_S2B);
@@ -691,11 +649,11 @@ static void process_rmpp_ack(struct ib_mad_agent_private *agent,
691 goto out; /* Old ACK */ 649 goto out; /* Old ACK */
692 650
693 if (seg_num > mad_send_wr->last_ack) { 651 if (seg_num > mad_send_wr->last_ack) {
694 mad_send_wr->last_ack = seg_num; 652 adjust_last_ack(mad_send_wr, seg_num);
695 mad_send_wr->retries = mad_send_wr->send_buf.retries; 653 mad_send_wr->retries = mad_send_wr->send_buf.retries;
696 } 654 }
697 mad_send_wr->newwin = newwin; 655 mad_send_wr->newwin = newwin;
698 if (mad_send_wr->last_ack == mad_send_wr->total_seg) { 656 if (mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count) {
699 /* If no response is expected, the ACK completes the send */ 657 /* If no response is expected, the ACK completes the send */
700 if (!mad_send_wr->send_buf.timeout_ms) { 658 if (!mad_send_wr->send_buf.timeout_ms) {
701 struct ib_mad_send_wc wc; 659 struct ib_mad_send_wc wc;
@@ -714,7 +672,7 @@ static void process_rmpp_ack(struct ib_mad_agent_private *agent,
714 mad_send_wr->send_buf.timeout_ms); 672 mad_send_wr->send_buf.timeout_ms);
715 } else if (mad_send_wr->refcount == 1 && 673 } else if (mad_send_wr->refcount == 1 &&
716 mad_send_wr->seg_num < mad_send_wr->newwin && 674 mad_send_wr->seg_num < mad_send_wr->newwin &&
717 mad_send_wr->seg_num <= mad_send_wr->total_seg) { 675 mad_send_wr->seg_num < mad_send_wr->send_buf.seg_count) {
718 /* Send failure will just result in a timeout/retry */ 676 /* Send failure will just result in a timeout/retry */
719 ret = send_next_seg(mad_send_wr); 677 ret = send_next_seg(mad_send_wr);
720 if (ret) 678 if (ret)
@@ -838,31 +796,19 @@ out:
838int ib_send_rmpp_mad(struct ib_mad_send_wr_private *mad_send_wr) 796int ib_send_rmpp_mad(struct ib_mad_send_wr_private *mad_send_wr)
839{ 797{
840 struct ib_rmpp_mad *rmpp_mad; 798 struct ib_rmpp_mad *rmpp_mad;
841 int i, total_len, ret; 799 int ret;
842 800
843 rmpp_mad = mad_send_wr->send_buf.mad; 801 rmpp_mad = mad_send_wr->send_buf.mad;
844 if (!(ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & 802 if (!(ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) &
845 IB_MGMT_RMPP_FLAG_ACTIVE)) 803 IB_MGMT_RMPP_FLAG_ACTIVE))
846 return IB_RMPP_RESULT_UNHANDLED; 804 return IB_RMPP_RESULT_UNHANDLED;
847 805
848 if (rmpp_mad->rmpp_hdr.rmpp_type != IB_MGMT_RMPP_TYPE_DATA) 806 if (rmpp_mad->rmpp_hdr.rmpp_type != IB_MGMT_RMPP_TYPE_DATA) {
807 mad_send_wr->seg_num = 1;
849 return IB_RMPP_RESULT_INTERNAL; 808 return IB_RMPP_RESULT_INTERNAL;
809 }
850 810
851 if (mad_send_wr->send_wr.num_sge > 1)
852 return -EINVAL; /* TODO: support num_sge > 1 */
853
854 mad_send_wr->seg_num = 1;
855 mad_send_wr->newwin = 1; 811 mad_send_wr->newwin = 1;
856 mad_send_wr->data_offset = data_offset(rmpp_mad->mad_hdr.mgmt_class);
857
858 total_len = 0;
859 for (i = 0; i < mad_send_wr->send_wr.num_sge; i++)
860 total_len += mad_send_wr->send_wr.sg_list[i].length;
861
862 mad_send_wr->total_seg = (total_len - mad_send_wr->data_offset) /
863 (sizeof(struct ib_rmpp_mad) - mad_send_wr->data_offset);
864 mad_send_wr->pad = total_len - IB_MGMT_RMPP_HDR -
865 be32_to_cpu(rmpp_mad->rmpp_hdr.paylen_newwin);
866 812
867 /* We need to wait for the final ACK even if there isn't a response */ 813 /* We need to wait for the final ACK even if there isn't a response */
868 mad_send_wr->refcount += (mad_send_wr->timeout == 0); 814 mad_send_wr->refcount += (mad_send_wr->timeout == 0);
@@ -893,14 +839,14 @@ int ib_process_rmpp_send_wc(struct ib_mad_send_wr_private *mad_send_wr,
893 if (!mad_send_wr->timeout) 839 if (!mad_send_wr->timeout)
894 return IB_RMPP_RESULT_PROCESSED; /* Response received */ 840 return IB_RMPP_RESULT_PROCESSED; /* Response received */
895 841
896 if (mad_send_wr->last_ack == mad_send_wr->total_seg) { 842 if (mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count) {
897 mad_send_wr->timeout = 843 mad_send_wr->timeout =
898 msecs_to_jiffies(mad_send_wr->send_buf.timeout_ms); 844 msecs_to_jiffies(mad_send_wr->send_buf.timeout_ms);
899 return IB_RMPP_RESULT_PROCESSED; /* Send done */ 845 return IB_RMPP_RESULT_PROCESSED; /* Send done */
900 } 846 }
901 847
902 if (mad_send_wr->seg_num > mad_send_wr->newwin || 848 if (mad_send_wr->seg_num == mad_send_wr->newwin ||
903 mad_send_wr->seg_num > mad_send_wr->total_seg) 849 mad_send_wr->seg_num == mad_send_wr->send_buf.seg_count)
904 return IB_RMPP_RESULT_PROCESSED; /* Wait for ACK */ 850 return IB_RMPP_RESULT_PROCESSED; /* Wait for ACK */
905 851
906 ret = send_next_seg(mad_send_wr); 852 ret = send_next_seg(mad_send_wr);
@@ -921,10 +867,12 @@ int ib_retry_rmpp(struct ib_mad_send_wr_private *mad_send_wr)
921 IB_MGMT_RMPP_FLAG_ACTIVE)) 867 IB_MGMT_RMPP_FLAG_ACTIVE))
922 return IB_RMPP_RESULT_UNHANDLED; /* RMPP not active */ 868 return IB_RMPP_RESULT_UNHANDLED; /* RMPP not active */
923 869
924 if (mad_send_wr->last_ack == mad_send_wr->total_seg) 870 if (mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count)
925 return IB_RMPP_RESULT_PROCESSED; 871 return IB_RMPP_RESULT_PROCESSED;
926 872
927 mad_send_wr->seg_num = mad_send_wr->last_ack + 1; 873 mad_send_wr->seg_num = mad_send_wr->last_ack;
874 mad_send_wr->cur_seg = mad_send_wr->last_ack_seg;
875
928 ret = send_next_seg(mad_send_wr); 876 ret = send_next_seg(mad_send_wr);
929 if (ret) 877 if (ret)
930 return IB_RMPP_RESULT_PROCESSED; 878 return IB_RMPP_RESULT_PROCESSED;
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index c908de8db5a9..fb6cd42601f9 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -31,7 +31,7 @@
31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32 * SOFTWARE. 32 * SOFTWARE.
33 * 33 *
34 * $Id: user_mad.c 4010 2005-11-09 23:11:56Z roland $ 34 * $Id: user_mad.c 5596 2006-03-03 01:00:07Z sean.hefty $
35 */ 35 */
36 36
37#include <linux/module.h> 37#include <linux/module.h>
@@ -121,6 +121,7 @@ struct ib_umad_file {
121 121
122struct ib_umad_packet { 122struct ib_umad_packet {
123 struct ib_mad_send_buf *msg; 123 struct ib_mad_send_buf *msg;
124 struct ib_mad_recv_wc *recv_wc;
124 struct list_head list; 125 struct list_head list;
125 int length; 126 int length;
126 struct ib_user_mad mad; 127 struct ib_user_mad mad;
@@ -176,31 +177,32 @@ static int queue_packet(struct ib_umad_file *file,
176 return ret; 177 return ret;
177} 178}
178 179
180static int data_offset(u8 mgmt_class)
181{
182 if (mgmt_class == IB_MGMT_CLASS_SUBN_ADM)
183 return IB_MGMT_SA_HDR;
184 else if ((mgmt_class >= IB_MGMT_CLASS_VENDOR_RANGE2_START) &&
185 (mgmt_class <= IB_MGMT_CLASS_VENDOR_RANGE2_END))
186 return IB_MGMT_VENDOR_HDR;
187 else
188 return IB_MGMT_RMPP_HDR;
189}
190
179static void send_handler(struct ib_mad_agent *agent, 191static void send_handler(struct ib_mad_agent *agent,
180 struct ib_mad_send_wc *send_wc) 192 struct ib_mad_send_wc *send_wc)
181{ 193{
182 struct ib_umad_file *file = agent->context; 194 struct ib_umad_file *file = agent->context;
183 struct ib_umad_packet *timeout;
184 struct ib_umad_packet *packet = send_wc->send_buf->context[0]; 195 struct ib_umad_packet *packet = send_wc->send_buf->context[0];
185 196
186 ib_destroy_ah(packet->msg->ah); 197 ib_destroy_ah(packet->msg->ah);
187 ib_free_send_mad(packet->msg); 198 ib_free_send_mad(packet->msg);
188 199
189 if (send_wc->status == IB_WC_RESP_TIMEOUT_ERR) { 200 if (send_wc->status == IB_WC_RESP_TIMEOUT_ERR) {
190 timeout = kzalloc(sizeof *timeout + IB_MGMT_MAD_HDR, GFP_KERNEL); 201 packet->length = IB_MGMT_MAD_HDR;
191 if (!timeout) 202 packet->mad.hdr.status = ETIMEDOUT;
192 goto out; 203 if (!queue_packet(file, agent, packet))
193 204 return;
194 timeout->length = IB_MGMT_MAD_HDR;
195 timeout->mad.hdr.id = packet->mad.hdr.id;
196 timeout->mad.hdr.status = ETIMEDOUT;
197 memcpy(timeout->mad.data, packet->mad.data,
198 sizeof (struct ib_mad_hdr));
199
200 if (queue_packet(file, agent, timeout))
201 kfree(timeout);
202 } 205 }
203out:
204 kfree(packet); 206 kfree(packet);
205} 207}
206 208
@@ -209,22 +211,20 @@ static void recv_handler(struct ib_mad_agent *agent,
209{ 211{
210 struct ib_umad_file *file = agent->context; 212 struct ib_umad_file *file = agent->context;
211 struct ib_umad_packet *packet; 213 struct ib_umad_packet *packet;
212 int length;
213 214
214 if (mad_recv_wc->wc->status != IB_WC_SUCCESS) 215 if (mad_recv_wc->wc->status != IB_WC_SUCCESS)
215 goto out; 216 goto err1;
216 217
217 length = mad_recv_wc->mad_len; 218 packet = kzalloc(sizeof *packet, GFP_KERNEL);
218 packet = kzalloc(sizeof *packet + length, GFP_KERNEL);
219 if (!packet) 219 if (!packet)
220 goto out; 220 goto err1;
221 221
222 packet->length = length; 222 packet->length = mad_recv_wc->mad_len;
223 223 packet->recv_wc = mad_recv_wc;
224 ib_coalesce_recv_mad(mad_recv_wc, packet->mad.data);
225 224
226 packet->mad.hdr.status = 0; 225 packet->mad.hdr.status = 0;
227 packet->mad.hdr.length = length + sizeof (struct ib_user_mad); 226 packet->mad.hdr.length = sizeof (struct ib_user_mad) +
227 mad_recv_wc->mad_len;
228 packet->mad.hdr.qpn = cpu_to_be32(mad_recv_wc->wc->src_qp); 228 packet->mad.hdr.qpn = cpu_to_be32(mad_recv_wc->wc->src_qp);
229 packet->mad.hdr.lid = cpu_to_be16(mad_recv_wc->wc->slid); 229 packet->mad.hdr.lid = cpu_to_be16(mad_recv_wc->wc->slid);
230 packet->mad.hdr.sl = mad_recv_wc->wc->sl; 230 packet->mad.hdr.sl = mad_recv_wc->wc->sl;
@@ -240,12 +240,79 @@ static void recv_handler(struct ib_mad_agent *agent,
240 } 240 }
241 241
242 if (queue_packet(file, agent, packet)) 242 if (queue_packet(file, agent, packet))
243 kfree(packet); 243 goto err2;
244 return;
244 245
245out: 246err2:
247 kfree(packet);
248err1:
246 ib_free_recv_mad(mad_recv_wc); 249 ib_free_recv_mad(mad_recv_wc);
247} 250}
248 251
252static ssize_t copy_recv_mad(char __user *buf, struct ib_umad_packet *packet,
253 size_t count)
254{
255 struct ib_mad_recv_buf *recv_buf;
256 int left, seg_payload, offset, max_seg_payload;
257
258 /* We need enough room to copy the first (or only) MAD segment. */
259 recv_buf = &packet->recv_wc->recv_buf;
260 if ((packet->length <= sizeof (*recv_buf->mad) &&
261 count < sizeof (packet->mad) + packet->length) ||
262 (packet->length > sizeof (*recv_buf->mad) &&
263 count < sizeof (packet->mad) + sizeof (*recv_buf->mad)))
264 return -EINVAL;
265
266 if (copy_to_user(buf, &packet->mad, sizeof (packet->mad)))
267 return -EFAULT;
268
269 buf += sizeof (packet->mad);
270 seg_payload = min_t(int, packet->length, sizeof (*recv_buf->mad));
271 if (copy_to_user(buf, recv_buf->mad, seg_payload))
272 return -EFAULT;
273
274 if (seg_payload < packet->length) {
275 /*
276 * Multipacket RMPP MAD message. Copy remainder of message.
277 * Note that last segment may have a shorter payload.
278 */
279 if (count < sizeof (packet->mad) + packet->length) {
280 /*
281 * The buffer is too small, return the first RMPP segment,
282 * which includes the RMPP message length.
283 */
284 return -ENOSPC;
285 }
286 offset = data_offset(recv_buf->mad->mad_hdr.mgmt_class);
287 max_seg_payload = sizeof (struct ib_mad) - offset;
288
289 for (left = packet->length - seg_payload, buf += seg_payload;
290 left; left -= seg_payload, buf += seg_payload) {
291 recv_buf = container_of(recv_buf->list.next,
292 struct ib_mad_recv_buf, list);
293 seg_payload = min(left, max_seg_payload);
294 if (copy_to_user(buf, ((void *) recv_buf->mad) + offset,
295 seg_payload))
296 return -EFAULT;
297 }
298 }
299 return sizeof (packet->mad) + packet->length;
300}
301
302static ssize_t copy_send_mad(char __user *buf, struct ib_umad_packet *packet,
303 size_t count)
304{
305 ssize_t size = sizeof (packet->mad) + packet->length;
306
307 if (count < size)
308 return -EINVAL;
309
310 if (copy_to_user(buf, &packet->mad, size))
311 return -EFAULT;
312
313 return size;
314}
315
249static ssize_t ib_umad_read(struct file *filp, char __user *buf, 316static ssize_t ib_umad_read(struct file *filp, char __user *buf,
250 size_t count, loff_t *pos) 317 size_t count, loff_t *pos)
251{ 318{
@@ -253,7 +320,7 @@ static ssize_t ib_umad_read(struct file *filp, char __user *buf,
253 struct ib_umad_packet *packet; 320 struct ib_umad_packet *packet;
254 ssize_t ret; 321 ssize_t ret;
255 322
256 if (count < sizeof (struct ib_user_mad) + sizeof (struct ib_mad)) 323 if (count < sizeof (struct ib_user_mad))
257 return -EINVAL; 324 return -EINVAL;
258 325
259 spin_lock_irq(&file->recv_lock); 326 spin_lock_irq(&file->recv_lock);
@@ -276,28 +343,44 @@ static ssize_t ib_umad_read(struct file *filp, char __user *buf,
276 343
277 spin_unlock_irq(&file->recv_lock); 344 spin_unlock_irq(&file->recv_lock);
278 345
279 if (count < packet->length + sizeof (struct ib_user_mad)) { 346 if (packet->recv_wc)
280 /* Return length needed (and first RMPP segment) if too small */ 347 ret = copy_recv_mad(buf, packet, count);
281 if (copy_to_user(buf, &packet->mad,
282 sizeof (struct ib_user_mad) + sizeof (struct ib_mad)))
283 ret = -EFAULT;
284 else
285 ret = -ENOSPC;
286 } else if (copy_to_user(buf, &packet->mad,
287 packet->length + sizeof (struct ib_user_mad)))
288 ret = -EFAULT;
289 else 348 else
290 ret = packet->length + sizeof (struct ib_user_mad); 349 ret = copy_send_mad(buf, packet, count);
350
291 if (ret < 0) { 351 if (ret < 0) {
292 /* Requeue packet */ 352 /* Requeue packet */
293 spin_lock_irq(&file->recv_lock); 353 spin_lock_irq(&file->recv_lock);
294 list_add(&packet->list, &file->recv_list); 354 list_add(&packet->list, &file->recv_list);
295 spin_unlock_irq(&file->recv_lock); 355 spin_unlock_irq(&file->recv_lock);
296 } else 356 } else {
357 if (packet->recv_wc)
358 ib_free_recv_mad(packet->recv_wc);
297 kfree(packet); 359 kfree(packet);
360 }
298 return ret; 361 return ret;
299} 362}
300 363
364static int copy_rmpp_mad(struct ib_mad_send_buf *msg, const char __user *buf)
365{
366 int left, seg;
367
368 /* Copy class specific header */
369 if ((msg->hdr_len > IB_MGMT_RMPP_HDR) &&
370 copy_from_user(msg->mad + IB_MGMT_RMPP_HDR, buf + IB_MGMT_RMPP_HDR,
371 msg->hdr_len - IB_MGMT_RMPP_HDR))
372 return -EFAULT;
373
374 /* All headers are in place. Copy data segments. */
375 for (seg = 1, left = msg->data_len, buf += msg->hdr_len; left > 0;
376 seg++, left -= msg->seg_size, buf += msg->seg_size) {
377 if (copy_from_user(ib_get_rmpp_segment(msg, seg), buf,
378 min(left, msg->seg_size)))
379 return -EFAULT;
380 }
381 return 0;
382}
383
301static ssize_t ib_umad_write(struct file *filp, const char __user *buf, 384static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
302 size_t count, loff_t *pos) 385 size_t count, loff_t *pos)
303{ 386{
@@ -309,14 +392,12 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
309 struct ib_rmpp_mad *rmpp_mad; 392 struct ib_rmpp_mad *rmpp_mad;
310 u8 method; 393 u8 method;
311 __be64 *tid; 394 __be64 *tid;
312 int ret, length, hdr_len, copy_offset; 395 int ret, data_len, hdr_len, copy_offset, rmpp_active;
313 int rmpp_active, has_rmpp_header;
314 396
315 if (count < sizeof (struct ib_user_mad) + IB_MGMT_RMPP_HDR) 397 if (count < sizeof (struct ib_user_mad) + IB_MGMT_RMPP_HDR)
316 return -EINVAL; 398 return -EINVAL;
317 399
318 length = count - sizeof (struct ib_user_mad); 400 packet = kzalloc(sizeof *packet + IB_MGMT_RMPP_HDR, GFP_KERNEL);
319 packet = kmalloc(sizeof *packet + IB_MGMT_RMPP_HDR, GFP_KERNEL);
320 if (!packet) 401 if (!packet)
321 return -ENOMEM; 402 return -ENOMEM;
322 403
@@ -363,35 +444,25 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
363 if (rmpp_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_ADM) { 444 if (rmpp_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_ADM) {
364 hdr_len = IB_MGMT_SA_HDR; 445 hdr_len = IB_MGMT_SA_HDR;
365 copy_offset = IB_MGMT_RMPP_HDR; 446 copy_offset = IB_MGMT_RMPP_HDR;
366 has_rmpp_header = 1; 447 rmpp_active = ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) &
448 IB_MGMT_RMPP_FLAG_ACTIVE;
367 } else if (rmpp_mad->mad_hdr.mgmt_class >= IB_MGMT_CLASS_VENDOR_RANGE2_START && 449 } else if (rmpp_mad->mad_hdr.mgmt_class >= IB_MGMT_CLASS_VENDOR_RANGE2_START &&
368 rmpp_mad->mad_hdr.mgmt_class <= IB_MGMT_CLASS_VENDOR_RANGE2_END) { 450 rmpp_mad->mad_hdr.mgmt_class <= IB_MGMT_CLASS_VENDOR_RANGE2_END) {
369 hdr_len = IB_MGMT_VENDOR_HDR; 451 hdr_len = IB_MGMT_VENDOR_HDR;
370 copy_offset = IB_MGMT_RMPP_HDR; 452 copy_offset = IB_MGMT_RMPP_HDR;
371 has_rmpp_header = 1; 453 rmpp_active = ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) &
454 IB_MGMT_RMPP_FLAG_ACTIVE;
372 } else { 455 } else {
373 hdr_len = IB_MGMT_MAD_HDR; 456 hdr_len = IB_MGMT_MAD_HDR;
374 copy_offset = IB_MGMT_MAD_HDR; 457 copy_offset = IB_MGMT_MAD_HDR;
375 has_rmpp_header = 0;
376 }
377
378 if (has_rmpp_header)
379 rmpp_active = ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) &
380 IB_MGMT_RMPP_FLAG_ACTIVE;
381 else
382 rmpp_active = 0; 458 rmpp_active = 0;
383
384 /* Validate that the management class can support RMPP */
385 if (rmpp_active && !agent->rmpp_version) {
386 ret = -EINVAL;
387 goto err_ah;
388 } 459 }
389 460
461 data_len = count - sizeof (struct ib_user_mad) - hdr_len;
390 packet->msg = ib_create_send_mad(agent, 462 packet->msg = ib_create_send_mad(agent,
391 be32_to_cpu(packet->mad.hdr.qpn), 463 be32_to_cpu(packet->mad.hdr.qpn),
392 0, rmpp_active, 464 0, rmpp_active, hdr_len,
393 hdr_len, length - hdr_len, 465 data_len, GFP_KERNEL);
394 GFP_KERNEL);
395 if (IS_ERR(packet->msg)) { 466 if (IS_ERR(packet->msg)) {
396 ret = PTR_ERR(packet->msg); 467 ret = PTR_ERR(packet->msg);
397 goto err_ah; 468 goto err_ah;
@@ -402,14 +473,21 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
402 packet->msg->retries = packet->mad.hdr.retries; 473 packet->msg->retries = packet->mad.hdr.retries;
403 packet->msg->context[0] = packet; 474 packet->msg->context[0] = packet;
404 475
405 /* Copy MAD headers (RMPP header in place) */ 476 /* Copy MAD header. Any RMPP header is already in place. */
406 memcpy(packet->msg->mad, packet->mad.data, IB_MGMT_MAD_HDR); 477 memcpy(packet->msg->mad, packet->mad.data, IB_MGMT_MAD_HDR);
407 /* Now, copy rest of message from user into send buffer */ 478 buf += sizeof (struct ib_user_mad);
408 if (copy_from_user(packet->msg->mad + copy_offset, 479
409 buf + sizeof (struct ib_user_mad) + copy_offset, 480 if (!rmpp_active) {
410 length - copy_offset)) { 481 if (copy_from_user(packet->msg->mad + copy_offset,
411 ret = -EFAULT; 482 buf + copy_offset,
412 goto err_msg; 483 hdr_len + data_len - copy_offset)) {
484 ret = -EFAULT;
485 goto err_msg;
486 }
487 } else {
488 ret = copy_rmpp_mad(packet->msg, buf);
489 if (ret)
490 goto err_msg;
413 } 491 }
414 492
415 /* 493 /*
@@ -433,18 +511,14 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
433 goto err_msg; 511 goto err_msg;
434 512
435 up_read(&file->port->mutex); 513 up_read(&file->port->mutex);
436
437 return count; 514 return count;
438 515
439err_msg: 516err_msg:
440 ib_free_send_mad(packet->msg); 517 ib_free_send_mad(packet->msg);
441
442err_ah: 518err_ah:
443 ib_destroy_ah(ah); 519 ib_destroy_ah(ah);
444
445err_up: 520err_up:
446 up_read(&file->port->mutex); 521 up_read(&file->port->mutex);
447
448err: 522err:
449 kfree(packet); 523 kfree(packet);
450 return ret; 524 return ret;
@@ -627,8 +701,11 @@ static int ib_umad_close(struct inode *inode, struct file *filp)
627 already_dead = file->agents_dead; 701 already_dead = file->agents_dead;
628 file->agents_dead = 1; 702 file->agents_dead = 1;
629 703
630 list_for_each_entry_safe(packet, tmp, &file->recv_list, list) 704 list_for_each_entry_safe(packet, tmp, &file->recv_list, list) {
705 if (packet->recv_wc)
706 ib_free_recv_mad(packet->recv_wc);
631 kfree(packet); 707 kfree(packet);
708 }
632 709
633 list_del(&file->port_list); 710 list_del(&file->port_list);
634 711
diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h
index 2c133506742b..51ab8eddb295 100644
--- a/include/rdma/ib_mad.h
+++ b/include/rdma/ib_mad.h
@@ -33,7 +33,7 @@
33 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 33 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34 * SOFTWARE. 34 * SOFTWARE.
35 * 35 *
36 * $Id: ib_mad.h 2775 2005-07-02 13:42:12Z halr $ 36 * $Id: ib_mad.h 5596 2006-03-03 01:00:07Z sean.hefty $
37 */ 37 */
38 38
39#if !defined( IB_MAD_H ) 39#if !defined( IB_MAD_H )
@@ -208,15 +208,23 @@ struct ib_class_port_info
208/** 208/**
209 * ib_mad_send_buf - MAD data buffer and work request for sends. 209 * ib_mad_send_buf - MAD data buffer and work request for sends.
210 * @next: A pointer used to chain together MADs for posting. 210 * @next: A pointer used to chain together MADs for posting.
211 * @mad: References an allocated MAD data buffer. 211 * @mad: References an allocated MAD data buffer for MADs that do not have
212 * RMPP active. For MADs using RMPP, references the common and management
213 * class specific headers.
212 * @mad_agent: MAD agent that allocated the buffer. 214 * @mad_agent: MAD agent that allocated the buffer.
213 * @ah: The address handle to use when sending the MAD. 215 * @ah: The address handle to use when sending the MAD.
214 * @context: User-controlled context fields. 216 * @context: User-controlled context fields.
217 * @hdr_len: Indicates the size of the data header of the MAD. This length
218 * includes the common MAD, RMPP, and class specific headers.
219 * @data_len: Indicates the total size of user-transferred data.
220 * @seg_count: The number of RMPP segments allocated for this send.
221 * @seg_size: Size of each RMPP segment.
215 * @timeout_ms: Time to wait for a response. 222 * @timeout_ms: Time to wait for a response.
216 * @retries: Number of times to retry a request for a response. 223 * @retries: Number of times to retry a request for a response.
217 * 224 *
218 * Users are responsible for initializing the MAD buffer itself, with the 225 * Users are responsible for initializing the MAD buffer itself, with the
219 * exception of specifying the payload length field in any RMPP MAD. 226 * exception of any RMPP header. Additional segment buffer space allocated
227 * beyond data_len is padding.
220 */ 228 */
221struct ib_mad_send_buf { 229struct ib_mad_send_buf {
222 struct ib_mad_send_buf *next; 230 struct ib_mad_send_buf *next;
@@ -224,6 +232,10 @@ struct ib_mad_send_buf {
224 struct ib_mad_agent *mad_agent; 232 struct ib_mad_agent *mad_agent;
225 struct ib_ah *ah; 233 struct ib_ah *ah;
226 void *context[2]; 234 void *context[2];
235 int hdr_len;
236 int data_len;
237 int seg_count;
238 int seg_size;
227 int timeout_ms; 239 int timeout_ms;
228 int retries; 240 int retries;
229}; 241};
@@ -299,7 +311,7 @@ typedef void (*ib_mad_snoop_handler)(struct ib_mad_agent *mad_agent,
299 * @mad_recv_wc: Received work completion information on the received MAD. 311 * @mad_recv_wc: Received work completion information on the received MAD.
300 * 312 *
301 * MADs received in response to a send request operation will be handed to 313 * MADs received in response to a send request operation will be handed to
302 * the user after the send operation completes. All data buffers given 314 * the user before the send operation completes. All data buffers given
303 * to registered agents through this routine are owned by the receiving 315 * to registered agents through this routine are owned by the receiving
304 * client, except for snooping agents. Clients snooping MADs should not 316 * client, except for snooping agents. Clients snooping MADs should not
305 * modify the data referenced by @mad_recv_wc. 317 * modify the data referenced by @mad_recv_wc.
@@ -485,17 +497,6 @@ int ib_unregister_mad_agent(struct ib_mad_agent *mad_agent);
485int ib_post_send_mad(struct ib_mad_send_buf *send_buf, 497int ib_post_send_mad(struct ib_mad_send_buf *send_buf,
486 struct ib_mad_send_buf **bad_send_buf); 498 struct ib_mad_send_buf **bad_send_buf);
487 499
488/**
489 * ib_coalesce_recv_mad - Coalesces received MAD data into a single buffer.
490 * @mad_recv_wc: Work completion information for a received MAD.
491 * @buf: User-provided data buffer to receive the coalesced buffers. The
492 * referenced buffer should be at least the size of the mad_len specified
493 * by @mad_recv_wc.
494 *
495 * This call copies a chain of received MAD segments into a single data buffer,
496 * removing duplicated headers.
497 */
498void ib_coalesce_recv_mad(struct ib_mad_recv_wc *mad_recv_wc, void *buf);
499 500
500/** 501/**
501 * ib_free_recv_mad - Returns data buffers used to receive a MAD. 502 * ib_free_recv_mad - Returns data buffers used to receive a MAD.
@@ -590,9 +591,10 @@ int ib_process_mad_wc(struct ib_mad_agent *mad_agent,
590 * with an initialized work request structure. Users may modify the returned 591 * with an initialized work request structure. Users may modify the returned
591 * MAD data buffer before posting the send. 592 * MAD data buffer before posting the send.
592 * 593 *
593 * The returned data buffer will be cleared. Users are responsible for 594 * The returned MAD header, class specific headers, and any padding will be
594 * initializing the common MAD and any class specific headers. If @rmpp_active 595 * cleared. Users are responsible for initializing the common MAD header,
595 * is set, the RMPP header will be initialized for sending. 596 * any class specific header, and MAD data area.
597 * If @rmpp_active is set, the RMPP header will be initialized for sending.
596 */ 598 */
597struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent, 599struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent,
598 u32 remote_qpn, u16 pkey_index, 600 u32 remote_qpn, u16 pkey_index,
@@ -601,6 +603,16 @@ struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent,
601 gfp_t gfp_mask); 603 gfp_t gfp_mask);
602 604
603/** 605/**
606 * ib_get_rmpp_segment - returns the data buffer for a given RMPP segment.
607 * @send_buf: Previously allocated send data buffer.
608 * @seg_num: number of segment to return
609 *
610 * This routine returns a pointer to the data buffer of an RMPP MAD.
611 * Users must provide synchronization to @send_buf around this call.
612 */
613void *ib_get_rmpp_segment(struct ib_mad_send_buf *send_buf, int seg_num);
614
615/**
604 * ib_free_send_mad - Returns data buffers used to send a MAD. 616 * ib_free_send_mad - Returns data buffers used to send a MAD.
605 * @send_buf: Previously allocated send data buffer. 617 * @send_buf: Previously allocated send data buffer.
606 */ 618 */