aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c42
-rw-r--r--include/linux/mlx4/qp.h4
2 files changed, 43 insertions, 3 deletions
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 355a31f9c03c..28a08bdd1800 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -192,6 +192,8 @@ static int send_wqe_overhead(enum ib_qp_type type)
192 case IB_QPT_GSI: 192 case IB_QPT_GSI:
193 return sizeof (struct mlx4_wqe_ctrl_seg) + 193 return sizeof (struct mlx4_wqe_ctrl_seg) +
194 ALIGN(MLX4_IB_UD_HEADER_SIZE + 194 ALIGN(MLX4_IB_UD_HEADER_SIZE +
195 DIV_ROUND_UP(MLX4_IB_UD_HEADER_SIZE,
196 MLX4_INLINE_ALIGN) *
195 sizeof (struct mlx4_wqe_inline_seg), 197 sizeof (struct mlx4_wqe_inline_seg),
196 sizeof (struct mlx4_wqe_data_seg)) + 198 sizeof (struct mlx4_wqe_data_seg)) +
197 ALIGN(4 + 199 ALIGN(4 +
@@ -1049,6 +1051,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
1049 u16 pkey; 1051 u16 pkey;
1050 int send_size; 1052 int send_size;
1051 int header_size; 1053 int header_size;
1054 int spc;
1052 int i; 1055 int i;
1053 1056
1054 send_size = 0; 1057 send_size = 0;
@@ -1124,10 +1127,43 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
1124 printk("\n"); 1127 printk("\n");
1125 } 1128 }
1126 1129
1127 inl->byte_count = cpu_to_be32(1 << 31 | header_size); 1130 /*
1128 memcpy(inl + 1, sqp->header_buf, header_size); 1131 * Inline data segments may not cross a 64 byte boundary. If
1132 * our UD header is bigger than the space available up to the
1133 * next 64 byte boundary in the WQE, use two inline data
1134 * segments to hold the UD header.
1135 */
1136 spc = MLX4_INLINE_ALIGN -
1137 ((unsigned long) (inl + 1) & (MLX4_INLINE_ALIGN - 1));
1138 if (header_size <= spc) {
1139 inl->byte_count = cpu_to_be32(1 << 31 | header_size);
1140 memcpy(inl + 1, sqp->header_buf, header_size);
1141 i = 1;
1142 } else {
1143 inl->byte_count = cpu_to_be32(1 << 31 | spc);
1144 memcpy(inl + 1, sqp->header_buf, spc);
1145
1146 inl = (void *) (inl + 1) + spc;
1147 memcpy(inl + 1, sqp->header_buf + spc, header_size - spc);
1148 /*
1149 * Need a barrier here to make sure all the data is
1150 * visible before the byte_count field is set.
1151 * Otherwise the HCA prefetcher could grab the 64-byte
1152 * chunk with this inline segment and get a valid (!=
1153 * 0xffffffff) byte count but stale data, and end up
1154 * generating a packet with bad headers.
1155 *
1156 * The first inline segment's byte_count field doesn't
1157 * need a barrier, because it comes after a
1158 * control/MLX segment and therefore is at an offset
1159 * of 16 mod 64.
1160 */
1161 wmb();
1162 inl->byte_count = cpu_to_be32(1 << 31 | (header_size - spc));
1163 i = 2;
1164 }
1129 1165
1130 return ALIGN(sizeof (struct mlx4_wqe_inline_seg) + header_size, 16); 1166 return ALIGN(i * sizeof (struct mlx4_wqe_inline_seg) + header_size, 16);
1131} 1167}
1132 1168
1133static int mlx4_wq_overflow(struct mlx4_ib_wq *wq, int nreq, struct ib_cq *ib_cq) 1169static int mlx4_wq_overflow(struct mlx4_ib_wq *wq, int nreq, struct ib_cq *ib_cq)
diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h
index 9eeb61adf6a3..10c57d279144 100644
--- a/include/linux/mlx4/qp.h
+++ b/include/linux/mlx4/qp.h
@@ -269,6 +269,10 @@ struct mlx4_wqe_data_seg {
269 __be64 addr; 269 __be64 addr;
270}; 270};
271 271
272enum {
273 MLX4_INLINE_ALIGN = 64,
274};
275
272struct mlx4_wqe_inline_seg { 276struct mlx4_wqe_inline_seg {
273 __be32 byte_count; 277 __be32 byte_count;
274}; 278};