diff options
author | Jack Morgenstein <jackm@dev.mellanox.co.il> | 2007-08-01 05:28:20 -0400 |
---|---|---|
committer | Roland Dreier <rolandd@cisco.com> | 2007-10-09 22:59:15 -0400 |
commit | 5b0bf5e25efea77103b0ac7c8057cd56c778ef41 (patch) | |
tree | 60fc637ca840c2a88563edc495d01f2ad75a5b69 /drivers/net/mlx4/icm.c | |
parent | 04d29b0ede242000b24cfc34cc78fbd164c47e1a (diff) |
mlx4_core: Support ICM tables in coherent memory
Enable having ICM tables in coherent memory, and use coherent memory
for the dMPT table. This will allow writing MPT entries for MRs both
via the SW2HW_MPT command and also directly by the driver for FMR
remapping without needing to flush or worry about cacheline boundaries.
Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Michael S. Tsirkin <mst@dev.mellanox.co.il>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
Diffstat (limited to 'drivers/net/mlx4/icm.c')
-rw-r--r-- | drivers/net/mlx4/icm.c | 113 |
1 files changed, 87 insertions, 26 deletions
diff --git a/drivers/net/mlx4/icm.c b/drivers/net/mlx4/icm.c index b7a4aa8476fb..250e24887578 100644 --- a/drivers/net/mlx4/icm.c +++ b/drivers/net/mlx4/icm.c | |||
@@ -34,6 +34,7 @@ | |||
34 | #include <linux/init.h> | 34 | #include <linux/init.h> |
35 | #include <linux/errno.h> | 35 | #include <linux/errno.h> |
36 | #include <linux/mm.h> | 36 | #include <linux/mm.h> |
37 | #include <linux/scatterlist.h> | ||
37 | 38 | ||
38 | #include <linux/mlx4/cmd.h> | 39 | #include <linux/mlx4/cmd.h> |
39 | 40 | ||
@@ -50,19 +51,41 @@ enum { | |||
50 | MLX4_TABLE_CHUNK_SIZE = 1 << 18 | 51 | MLX4_TABLE_CHUNK_SIZE = 1 << 18 |
51 | }; | 52 | }; |
52 | 53 | ||
53 | void mlx4_free_icm(struct mlx4_dev *dev, struct mlx4_icm *icm) | 54 | static void mlx4_free_icm_pages(struct mlx4_dev *dev, struct mlx4_icm_chunk *chunk) |
54 | { | 55 | { |
55 | struct mlx4_icm_chunk *chunk, *tmp; | ||
56 | int i; | 56 | int i; |
57 | 57 | ||
58 | list_for_each_entry_safe(chunk, tmp, &icm->chunk_list, list) { | 58 | if (chunk->nsg > 0) |
59 | if (chunk->nsg > 0) | 59 | pci_unmap_sg(dev->pdev, chunk->mem, chunk->npages, |
60 | pci_unmap_sg(dev->pdev, chunk->mem, chunk->npages, | 60 | PCI_DMA_BIDIRECTIONAL); |
61 | PCI_DMA_BIDIRECTIONAL); | 61 | |
62 | for (i = 0; i < chunk->npages; ++i) | ||
63 | __free_pages(chunk->mem[i].page, | ||
64 | get_order(chunk->mem[i].length)); | ||
65 | } | ||
62 | 66 | ||
63 | for (i = 0; i < chunk->npages; ++i) | 67 | static void mlx4_free_icm_coherent(struct mlx4_dev *dev, struct mlx4_icm_chunk *chunk) |
64 | __free_pages(chunk->mem[i].page, | 68 | { |
65 | get_order(chunk->mem[i].length)); | 69 | int i; |
70 | |||
71 | for (i = 0; i < chunk->npages; ++i) | ||
72 | dma_free_coherent(&dev->pdev->dev, chunk->mem[i].length, | ||
73 | lowmem_page_address(chunk->mem[i].page), | ||
74 | sg_dma_address(&chunk->mem[i])); | ||
75 | } | ||
76 | |||
77 | void mlx4_free_icm(struct mlx4_dev *dev, struct mlx4_icm *icm, int coherent) | ||
78 | { | ||
79 | struct mlx4_icm_chunk *chunk, *tmp; | ||
80 | |||
81 | if (!icm) | ||
82 | return; | ||
83 | |||
84 | list_for_each_entry_safe(chunk, tmp, &icm->chunk_list, list) { | ||
85 | if (coherent) | ||
86 | mlx4_free_icm_coherent(dev, chunk); | ||
87 | else | ||
88 | mlx4_free_icm_pages(dev, chunk); | ||
66 | 89 | ||
67 | kfree(chunk); | 90 | kfree(chunk); |
68 | } | 91 | } |
@@ -70,16 +93,45 @@ void mlx4_free_icm(struct mlx4_dev *dev, struct mlx4_icm *icm) | |||
70 | kfree(icm); | 93 | kfree(icm); |
71 | } | 94 | } |
72 | 95 | ||
96 | static int mlx4_alloc_icm_pages(struct scatterlist *mem, int order, gfp_t gfp_mask) | ||
97 | { | ||
98 | mem->page = alloc_pages(gfp_mask, order); | ||
99 | if (!mem->page) | ||
100 | return -ENOMEM; | ||
101 | |||
102 | mem->length = PAGE_SIZE << order; | ||
103 | mem->offset = 0; | ||
104 | return 0; | ||
105 | } | ||
106 | |||
107 | static int mlx4_alloc_icm_coherent(struct device *dev, struct scatterlist *mem, | ||
108 | int order, gfp_t gfp_mask) | ||
109 | { | ||
110 | void *buf = dma_alloc_coherent(dev, PAGE_SIZE << order, | ||
111 | &sg_dma_address(mem), gfp_mask); | ||
112 | if (!buf) | ||
113 | return -ENOMEM; | ||
114 | |||
115 | sg_set_buf(mem, buf, PAGE_SIZE << order); | ||
116 | BUG_ON(mem->offset); | ||
117 | sg_dma_len(mem) = PAGE_SIZE << order; | ||
118 | return 0; | ||
119 | } | ||
120 | |||
73 | struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages, | 121 | struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages, |
74 | gfp_t gfp_mask) | 122 | gfp_t gfp_mask, int coherent) |
75 | { | 123 | { |
76 | struct mlx4_icm *icm; | 124 | struct mlx4_icm *icm; |
77 | struct mlx4_icm_chunk *chunk = NULL; | 125 | struct mlx4_icm_chunk *chunk = NULL; |
78 | int cur_order; | 126 | int cur_order; |
127 | int ret; | ||
128 | |||
129 | /* We use sg_set_buf for coherent allocs, which assumes low memory */ | ||
130 | BUG_ON(coherent && (gfp_mask & __GFP_HIGHMEM)); | ||
79 | 131 | ||
80 | icm = kmalloc(sizeof *icm, gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN)); | 132 | icm = kmalloc(sizeof *icm, gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN)); |
81 | if (!icm) | 133 | if (!icm) |
82 | return icm; | 134 | return NULL; |
83 | 135 | ||
84 | icm->refcount = 0; | 136 | icm->refcount = 0; |
85 | INIT_LIST_HEAD(&icm->chunk_list); | 137 | INIT_LIST_HEAD(&icm->chunk_list); |
@@ -101,12 +153,20 @@ struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages, | |||
101 | while (1 << cur_order > npages) | 153 | while (1 << cur_order > npages) |
102 | --cur_order; | 154 | --cur_order; |
103 | 155 | ||
104 | chunk->mem[chunk->npages].page = alloc_pages(gfp_mask, cur_order); | 156 | if (coherent) |
105 | if (chunk->mem[chunk->npages].page) { | 157 | ret = mlx4_alloc_icm_coherent(&dev->pdev->dev, |
106 | chunk->mem[chunk->npages].length = PAGE_SIZE << cur_order; | 158 | &chunk->mem[chunk->npages], |
107 | chunk->mem[chunk->npages].offset = 0; | 159 | cur_order, gfp_mask); |
160 | else | ||
161 | ret = mlx4_alloc_icm_pages(&chunk->mem[chunk->npages], | ||
162 | cur_order, gfp_mask); | ||
163 | |||
164 | if (!ret) { | ||
165 | ++chunk->npages; | ||
108 | 166 | ||
109 | if (++chunk->npages == MLX4_ICM_CHUNK_LEN) { | 167 | if (coherent) |
168 | ++chunk->nsg; | ||
169 | else if (chunk->npages == MLX4_ICM_CHUNK_LEN) { | ||
110 | chunk->nsg = pci_map_sg(dev->pdev, chunk->mem, | 170 | chunk->nsg = pci_map_sg(dev->pdev, chunk->mem, |
111 | chunk->npages, | 171 | chunk->npages, |
112 | PCI_DMA_BIDIRECTIONAL); | 172 | PCI_DMA_BIDIRECTIONAL); |
@@ -125,7 +185,7 @@ struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages, | |||
125 | } | 185 | } |
126 | } | 186 | } |
127 | 187 | ||
128 | if (chunk) { | 188 | if (!coherent && chunk) { |
129 | chunk->nsg = pci_map_sg(dev->pdev, chunk->mem, | 189 | chunk->nsg = pci_map_sg(dev->pdev, chunk->mem, |
130 | chunk->npages, | 190 | chunk->npages, |
131 | PCI_DMA_BIDIRECTIONAL); | 191 | PCI_DMA_BIDIRECTIONAL); |
@@ -137,7 +197,7 @@ struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages, | |||
137 | return icm; | 197 | return icm; |
138 | 198 | ||
139 | fail: | 199 | fail: |
140 | mlx4_free_icm(dev, icm); | 200 | mlx4_free_icm(dev, icm, coherent); |
141 | return NULL; | 201 | return NULL; |
142 | } | 202 | } |
143 | 203 | ||
@@ -202,7 +262,7 @@ int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, int obj) | |||
202 | 262 | ||
203 | table->icm[i] = mlx4_alloc_icm(dev, MLX4_TABLE_CHUNK_SIZE >> PAGE_SHIFT, | 263 | table->icm[i] = mlx4_alloc_icm(dev, MLX4_TABLE_CHUNK_SIZE >> PAGE_SHIFT, |
204 | (table->lowmem ? GFP_KERNEL : GFP_HIGHUSER) | | 264 | (table->lowmem ? GFP_KERNEL : GFP_HIGHUSER) | |
205 | __GFP_NOWARN); | 265 | __GFP_NOWARN, table->coherent); |
206 | if (!table->icm[i]) { | 266 | if (!table->icm[i]) { |
207 | ret = -ENOMEM; | 267 | ret = -ENOMEM; |
208 | goto out; | 268 | goto out; |
@@ -210,7 +270,7 @@ int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, int obj) | |||
210 | 270 | ||
211 | if (mlx4_MAP_ICM(dev, table->icm[i], table->virt + | 271 | if (mlx4_MAP_ICM(dev, table->icm[i], table->virt + |
212 | (u64) i * MLX4_TABLE_CHUNK_SIZE)) { | 272 | (u64) i * MLX4_TABLE_CHUNK_SIZE)) { |
213 | mlx4_free_icm(dev, table->icm[i]); | 273 | mlx4_free_icm(dev, table->icm[i], table->coherent); |
214 | table->icm[i] = NULL; | 274 | table->icm[i] = NULL; |
215 | ret = -ENOMEM; | 275 | ret = -ENOMEM; |
216 | goto out; | 276 | goto out; |
@@ -234,7 +294,7 @@ void mlx4_table_put(struct mlx4_dev *dev, struct mlx4_icm_table *table, int obj) | |||
234 | if (--table->icm[i]->refcount == 0) { | 294 | if (--table->icm[i]->refcount == 0) { |
235 | mlx4_UNMAP_ICM(dev, table->virt + i * MLX4_TABLE_CHUNK_SIZE, | 295 | mlx4_UNMAP_ICM(dev, table->virt + i * MLX4_TABLE_CHUNK_SIZE, |
236 | MLX4_TABLE_CHUNK_SIZE / MLX4_ICM_PAGE_SIZE); | 296 | MLX4_TABLE_CHUNK_SIZE / MLX4_ICM_PAGE_SIZE); |
237 | mlx4_free_icm(dev, table->icm[i]); | 297 | mlx4_free_icm(dev, table->icm[i], table->coherent); |
238 | table->icm[i] = NULL; | 298 | table->icm[i] = NULL; |
239 | } | 299 | } |
240 | 300 | ||
@@ -309,7 +369,7 @@ void mlx4_table_put_range(struct mlx4_dev *dev, struct mlx4_icm_table *table, | |||
309 | 369 | ||
310 | int mlx4_init_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table, | 370 | int mlx4_init_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table, |
311 | u64 virt, int obj_size, int nobj, int reserved, | 371 | u64 virt, int obj_size, int nobj, int reserved, |
312 | int use_lowmem) | 372 | int use_lowmem, int use_coherent) |
313 | { | 373 | { |
314 | int obj_per_chunk; | 374 | int obj_per_chunk; |
315 | int num_icm; | 375 | int num_icm; |
@@ -327,6 +387,7 @@ int mlx4_init_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table, | |||
327 | table->num_obj = nobj; | 387 | table->num_obj = nobj; |
328 | table->obj_size = obj_size; | 388 | table->obj_size = obj_size; |
329 | table->lowmem = use_lowmem; | 389 | table->lowmem = use_lowmem; |
390 | table->coherent = use_coherent; | ||
330 | mutex_init(&table->mutex); | 391 | mutex_init(&table->mutex); |
331 | 392 | ||
332 | for (i = 0; i * MLX4_TABLE_CHUNK_SIZE < reserved * obj_size; ++i) { | 393 | for (i = 0; i * MLX4_TABLE_CHUNK_SIZE < reserved * obj_size; ++i) { |
@@ -336,11 +397,11 @@ int mlx4_init_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table, | |||
336 | 397 | ||
337 | table->icm[i] = mlx4_alloc_icm(dev, chunk_size >> PAGE_SHIFT, | 398 | table->icm[i] = mlx4_alloc_icm(dev, chunk_size >> PAGE_SHIFT, |
338 | (use_lowmem ? GFP_KERNEL : GFP_HIGHUSER) | | 399 | (use_lowmem ? GFP_KERNEL : GFP_HIGHUSER) | |
339 | __GFP_NOWARN); | 400 | __GFP_NOWARN, use_coherent); |
340 | if (!table->icm[i]) | 401 | if (!table->icm[i]) |
341 | goto err; | 402 | goto err; |
342 | if (mlx4_MAP_ICM(dev, table->icm[i], virt + i * MLX4_TABLE_CHUNK_SIZE)) { | 403 | if (mlx4_MAP_ICM(dev, table->icm[i], virt + i * MLX4_TABLE_CHUNK_SIZE)) { |
343 | mlx4_free_icm(dev, table->icm[i]); | 404 | mlx4_free_icm(dev, table->icm[i], use_coherent); |
344 | table->icm[i] = NULL; | 405 | table->icm[i] = NULL; |
345 | goto err; | 406 | goto err; |
346 | } | 407 | } |
@@ -359,7 +420,7 @@ err: | |||
359 | if (table->icm[i]) { | 420 | if (table->icm[i]) { |
360 | mlx4_UNMAP_ICM(dev, virt + i * MLX4_TABLE_CHUNK_SIZE, | 421 | mlx4_UNMAP_ICM(dev, virt + i * MLX4_TABLE_CHUNK_SIZE, |
361 | MLX4_TABLE_CHUNK_SIZE / MLX4_ICM_PAGE_SIZE); | 422 | MLX4_TABLE_CHUNK_SIZE / MLX4_ICM_PAGE_SIZE); |
362 | mlx4_free_icm(dev, table->icm[i]); | 423 | mlx4_free_icm(dev, table->icm[i], use_coherent); |
363 | } | 424 | } |
364 | 425 | ||
365 | return -ENOMEM; | 426 | return -ENOMEM; |
@@ -373,7 +434,7 @@ void mlx4_cleanup_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table) | |||
373 | if (table->icm[i]) { | 434 | if (table->icm[i]) { |
374 | mlx4_UNMAP_ICM(dev, table->virt + i * MLX4_TABLE_CHUNK_SIZE, | 435 | mlx4_UNMAP_ICM(dev, table->virt + i * MLX4_TABLE_CHUNK_SIZE, |
375 | MLX4_TABLE_CHUNK_SIZE / MLX4_ICM_PAGE_SIZE); | 436 | MLX4_TABLE_CHUNK_SIZE / MLX4_ICM_PAGE_SIZE); |
376 | mlx4_free_icm(dev, table->icm[i]); | 437 | mlx4_free_icm(dev, table->icm[i], table->coherent); |
377 | } | 438 | } |
378 | 439 | ||
379 | kfree(table->icm); | 440 | kfree(table->icm); |