diff options
-rw-r--r-- | drivers/infiniband/hw/mlx4/qp.c | 42 | ||||
-rw-r--r-- | include/linux/mlx4/qp.h | 4 |
2 files changed, 43 insertions, 3 deletions
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 355a31f9c03c..28a08bdd1800 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c | |||
@@ -192,6 +192,8 @@ static int send_wqe_overhead(enum ib_qp_type type) | |||
192 | case IB_QPT_GSI: | 192 | case IB_QPT_GSI: |
193 | return sizeof (struct mlx4_wqe_ctrl_seg) + | 193 | return sizeof (struct mlx4_wqe_ctrl_seg) + |
194 | ALIGN(MLX4_IB_UD_HEADER_SIZE + | 194 | ALIGN(MLX4_IB_UD_HEADER_SIZE + |
195 | DIV_ROUND_UP(MLX4_IB_UD_HEADER_SIZE, | ||
196 | MLX4_INLINE_ALIGN) * | ||
195 | sizeof (struct mlx4_wqe_inline_seg), | 197 | sizeof (struct mlx4_wqe_inline_seg), |
196 | sizeof (struct mlx4_wqe_data_seg)) + | 198 | sizeof (struct mlx4_wqe_data_seg)) + |
197 | ALIGN(4 + | 199 | ALIGN(4 + |
@@ -1049,6 +1051,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, | |||
1049 | u16 pkey; | 1051 | u16 pkey; |
1050 | int send_size; | 1052 | int send_size; |
1051 | int header_size; | 1053 | int header_size; |
1054 | int spc; | ||
1052 | int i; | 1055 | int i; |
1053 | 1056 | ||
1054 | send_size = 0; | 1057 | send_size = 0; |
@@ -1124,10 +1127,43 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, | |||
1124 | printk("\n"); | 1127 | printk("\n"); |
1125 | } | 1128 | } |
1126 | 1129 | ||
1127 | inl->byte_count = cpu_to_be32(1 << 31 | header_size); | 1130 | /* |
1128 | memcpy(inl + 1, sqp->header_buf, header_size); | 1131 | * Inline data segments may not cross a 64 byte boundary. If |
1132 | * our UD header is bigger than the space available up to the | ||
1133 | * next 64 byte boundary in the WQE, use two inline data | ||
1134 | * segments to hold the UD header. | ||
1135 | */ | ||
1136 | spc = MLX4_INLINE_ALIGN - | ||
1137 | ((unsigned long) (inl + 1) & (MLX4_INLINE_ALIGN - 1)); | ||
1138 | if (header_size <= spc) { | ||
1139 | inl->byte_count = cpu_to_be32(1 << 31 | header_size); | ||
1140 | memcpy(inl + 1, sqp->header_buf, header_size); | ||
1141 | i = 1; | ||
1142 | } else { | ||
1143 | inl->byte_count = cpu_to_be32(1 << 31 | spc); | ||
1144 | memcpy(inl + 1, sqp->header_buf, spc); | ||
1145 | |||
1146 | inl = (void *) (inl + 1) + spc; | ||
1147 | memcpy(inl + 1, sqp->header_buf + spc, header_size - spc); | ||
1148 | /* | ||
1149 | * Need a barrier here to make sure all the data is | ||
1150 | * visible before the byte_count field is set. | ||
1151 | * Otherwise the HCA prefetcher could grab the 64-byte | ||
1152 | * chunk with this inline segment and get a valid (!= | ||
1153 | * 0xffffffff) byte count but stale data, and end up | ||
1154 | * generating a packet with bad headers. | ||
1155 | * | ||
1156 | * The first inline segment's byte_count field doesn't | ||
1157 | * need a barrier, because it comes after a | ||
1158 | * control/MLX segment and therefore is at an offset | ||
1159 | * of 16 mod 64. | ||
1160 | */ | ||
1161 | wmb(); | ||
1162 | inl->byte_count = cpu_to_be32(1 << 31 | (header_size - spc)); | ||
1163 | i = 2; | ||
1164 | } | ||
1129 | 1165 | ||
1130 | return ALIGN(sizeof (struct mlx4_wqe_inline_seg) + header_size, 16); | 1166 | return ALIGN(i * sizeof (struct mlx4_wqe_inline_seg) + header_size, 16); |
1131 | } | 1167 | } |
1132 | 1168 | ||
1133 | static int mlx4_wq_overflow(struct mlx4_ib_wq *wq, int nreq, struct ib_cq *ib_cq) | 1169 | static int mlx4_wq_overflow(struct mlx4_ib_wq *wq, int nreq, struct ib_cq *ib_cq) |
diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h index 9eeb61adf6a3..10c57d279144 100644 --- a/include/linux/mlx4/qp.h +++ b/include/linux/mlx4/qp.h | |||
@@ -269,6 +269,10 @@ struct mlx4_wqe_data_seg { | |||
269 | __be64 addr; | 269 | __be64 addr; |
270 | }; | 270 | }; |
271 | 271 | ||
272 | enum { | ||
273 | MLX4_INLINE_ALIGN = 64, | ||
274 | }; | ||
275 | |||
272 | struct mlx4_wqe_inline_seg { | 276 | struct mlx4_wqe_inline_seg { |
273 | __be32 byte_count; | 277 | __be32 byte_count; |
274 | }; | 278 | }; |