aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/misc
diff options
context:
space:
mode:
authorFrank Haverkamp <haver@linux.vnet.ibm.com>2014-03-20 10:11:05 -0400
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2014-04-16 15:12:39 -0400
commit718f762efc454796d02f172a929d051f2d6ec01a (patch)
tree13d8f9558b39963356082460de3ee6855454d972 /drivers/misc
parentebb2c96bb9214ba38c7fe35d5d725f6e7cb3bbc8 (diff)
GenWQE: Fix multithreading problems
When being used in a multithreaded application there were problems with memory pages/cachelines accessed by multiple threads/cpus at the same time, while doing DMA transfers to/from those. To avoid such situations this fix is creating a copy of the first and the last page if it is not fully used. The data is copied from user-space into those pages and results are copied back when the DDCB-request is successfully finished. Signed-off-by: Frank Haverkamp <haver@linux.vnet.ibm.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'drivers/misc')
-rw-r--r--drivers/misc/genwqe/card_base.h58
-rw-r--r--drivers/misc/genwqe/card_dev.c38
-rw-r--r--drivers/misc/genwqe/card_utils.c170
3 files changed, 180 insertions, 86 deletions
diff --git a/drivers/misc/genwqe/card_base.h b/drivers/misc/genwqe/card_base.h
index 5e4dbd21f89a..0e608a288603 100644
--- a/drivers/misc/genwqe/card_base.h
+++ b/drivers/misc/genwqe/card_base.h
@@ -337,6 +337,44 @@ enum genwqe_requ_state {
337}; 337};
338 338
339/** 339/**
340 * struct genwqe_sgl - Scatter gather list describing user-space memory
341 * @sgl: scatter gather list needs to be 128 byte aligned
342 * @sgl_dma_addr: dma address of sgl
343 * @sgl_size: size of area used for sgl
344 * @user_addr: user-space address of memory area
345 * @user_size: size of user-space memory area
346 * @page: buffer for partial pages if needed
347 * @page_dma_addr: dma address partial pages
348 */
349struct genwqe_sgl {
350 dma_addr_t sgl_dma_addr;
351 struct sg_entry *sgl;
352 size_t sgl_size; /* size of sgl */
353
354 void __user *user_addr; /* user-space base-address */
355 size_t user_size; /* size of memory area */
356
357 unsigned long nr_pages;
358 unsigned long fpage_offs;
359 size_t fpage_size;
360 size_t lpage_size;
361
362 void *fpage;
363 dma_addr_t fpage_dma_addr;
364
365 void *lpage;
366 dma_addr_t lpage_dma_addr;
367};
368
369int genwqe_alloc_sync_sgl(struct genwqe_dev *cd, struct genwqe_sgl *sgl,
370 void __user *user_addr, size_t user_size);
371
372int genwqe_setup_sgl(struct genwqe_dev *cd, struct genwqe_sgl *sgl,
373 dma_addr_t *dma_list);
374
375int genwqe_free_sync_sgl(struct genwqe_dev *cd, struct genwqe_sgl *sgl);
376
377/**
340 * struct ddcb_requ - Kernel internal representation of the DDCB request 378 * struct ddcb_requ - Kernel internal representation of the DDCB request
341 * @cmd: User space representation of the DDCB execution request 379 * @cmd: User space representation of the DDCB execution request
342 */ 380 */
@@ -347,9 +385,7 @@ struct ddcb_requ {
347 struct ddcb_queue *queue; /* associated queue */ 385 struct ddcb_queue *queue; /* associated queue */
348 386
349 struct dma_mapping dma_mappings[DDCB_FIXUPS]; 387 struct dma_mapping dma_mappings[DDCB_FIXUPS];
350 struct sg_entry *sgl[DDCB_FIXUPS]; 388 struct genwqe_sgl sgls[DDCB_FIXUPS];
351 dma_addr_t sgl_dma_addr[DDCB_FIXUPS];
352 size_t sgl_size[DDCB_FIXUPS];
353 389
354 /* kernel/user shared content */ 390 /* kernel/user shared content */
355 struct genwqe_ddcb_cmd cmd; /* ddcb_no for this request */ 391 struct genwqe_ddcb_cmd cmd; /* ddcb_no for this request */
@@ -453,22 +489,6 @@ int genwqe_user_vmap(struct genwqe_dev *cd, struct dma_mapping *m,
453int genwqe_user_vunmap(struct genwqe_dev *cd, struct dma_mapping *m, 489int genwqe_user_vunmap(struct genwqe_dev *cd, struct dma_mapping *m,
454 struct ddcb_requ *req); 490 struct ddcb_requ *req);
455 491
456struct sg_entry *genwqe_alloc_sgl(struct genwqe_dev *cd, int num_pages,
457 dma_addr_t *dma_addr, size_t *sgl_size);
458
459void genwqe_free_sgl(struct genwqe_dev *cd, struct sg_entry *sg_list,
460 dma_addr_t dma_addr, size_t size);
461
462int genwqe_setup_sgl(struct genwqe_dev *cd,
463 unsigned long offs,
464 unsigned long size,
465 struct sg_entry *sgl, /* genwqe sgl */
466 dma_addr_t dma_addr, size_t sgl_size,
467 dma_addr_t *dma_list, int page_offs, int num_pages);
468
469int genwqe_check_sgl(struct genwqe_dev *cd, struct sg_entry *sg_list,
470 int size);
471
472static inline bool dma_mapping_used(struct dma_mapping *m) 492static inline bool dma_mapping_used(struct dma_mapping *m)
473{ 493{
474 if (!m) 494 if (!m)
diff --git a/drivers/misc/genwqe/card_dev.c b/drivers/misc/genwqe/card_dev.c
index 0d05ca77c458..1d2f163a1906 100644
--- a/drivers/misc/genwqe/card_dev.c
+++ b/drivers/misc/genwqe/card_dev.c
@@ -840,15 +840,8 @@ static int ddcb_cmd_cleanup(struct genwqe_file *cfile, struct ddcb_requ *req)
840 __genwqe_del_mapping(cfile, dma_map); 840 __genwqe_del_mapping(cfile, dma_map);
841 genwqe_user_vunmap(cd, dma_map, req); 841 genwqe_user_vunmap(cd, dma_map, req);
842 } 842 }
843 if (req->sgl[i] != NULL) { 843 if (req->sgls[i].sgl != NULL)
844 genwqe_free_sgl(cd, req->sgl[i], 844 genwqe_free_sync_sgl(cd, &req->sgls[i]);
845 req->sgl_dma_addr[i],
846 req->sgl_size[i]);
847 req->sgl[i] = NULL;
848 req->sgl_dma_addr[i] = 0x0;
849 req->sgl_size[i] = 0;
850 }
851
852 } 845 }
853 return 0; 846 return 0;
854} 847}
@@ -917,7 +910,7 @@ static int ddcb_cmd_fixups(struct genwqe_file *cfile, struct ddcb_requ *req)
917 910
918 case ATS_TYPE_SGL_RDWR: 911 case ATS_TYPE_SGL_RDWR:
919 case ATS_TYPE_SGL_RD: { 912 case ATS_TYPE_SGL_RD: {
920 int page_offs, nr_pages, offs; 913 int page_offs;
921 914
922 u_addr = be64_to_cpu(*((__be64 *) 915 u_addr = be64_to_cpu(*((__be64 *)
923 &cmd->asiv[asiv_offs])); 916 &cmd->asiv[asiv_offs]));
@@ -955,27 +948,18 @@ static int ddcb_cmd_fixups(struct genwqe_file *cfile, struct ddcb_requ *req)
955 page_offs = 0; 948 page_offs = 0;
956 } 949 }
957 950
958 offs = offset_in_page(u_addr);
959 nr_pages = DIV_ROUND_UP(offs + u_size, PAGE_SIZE);
960
961 /* create genwqe style scatter gather list */ 951 /* create genwqe style scatter gather list */
962 req->sgl[i] = genwqe_alloc_sgl(cd, m->nr_pages, 952 rc = genwqe_alloc_sync_sgl(cd, &req->sgls[i],
963 &req->sgl_dma_addr[i], 953 (void __user *)u_addr,
964 &req->sgl_size[i]); 954 u_size);
965 if (req->sgl[i] == NULL) { 955 if (rc != 0)
966 rc = -ENOMEM;
967 goto err_out; 956 goto err_out;
968 } 957
969 genwqe_setup_sgl(cd, offs, u_size, 958 genwqe_setup_sgl(cd, &req->sgls[i],
970 req->sgl[i], 959 &m->dma_list[page_offs]);
971 req->sgl_dma_addr[i],
972 req->sgl_size[i],
973 m->dma_list,
974 page_offs,
975 nr_pages);
976 960
977 *((__be64 *)&cmd->asiv[asiv_offs]) = 961 *((__be64 *)&cmd->asiv[asiv_offs]) =
978 cpu_to_be64(req->sgl_dma_addr[i]); 962 cpu_to_be64(req->sgls[i].sgl_dma_addr);
979 963
980 break; 964 break;
981 } 965 }
diff --git a/drivers/misc/genwqe/card_utils.c b/drivers/misc/genwqe/card_utils.c
index 6b1a6ef9f1a8..d049d271699c 100644
--- a/drivers/misc/genwqe/card_utils.c
+++ b/drivers/misc/genwqe/card_utils.c
@@ -275,67 +275,107 @@ static int genwqe_sgl_size(int num_pages)
275 return roundup(len, PAGE_SIZE); 275 return roundup(len, PAGE_SIZE);
276} 276}
277 277
278struct sg_entry *genwqe_alloc_sgl(struct genwqe_dev *cd, int num_pages, 278/**
279 dma_addr_t *dma_addr, size_t *sgl_size) 279 * genwqe_alloc_sync_sgl() - Allocate memory for sgl and overlapping pages
280 *
281 * Allocates memory for sgl and overlapping pages. Pages which might
282 * overlap other user-space memory blocks are being cached for DMAs,
283 * such that we do not run into syncronization issues. Data is copied
284 * from user-space into the cached pages.
285 */
286int genwqe_alloc_sync_sgl(struct genwqe_dev *cd, struct genwqe_sgl *sgl,
287 void __user *user_addr, size_t user_size)
280{ 288{
289 int rc;
281 struct pci_dev *pci_dev = cd->pci_dev; 290 struct pci_dev *pci_dev = cd->pci_dev;
282 struct sg_entry *sgl;
283 291
284 *sgl_size = genwqe_sgl_size(num_pages); 292 sgl->fpage_offs = offset_in_page((unsigned long)user_addr);
285 if (get_order(*sgl_size) > MAX_ORDER) { 293 sgl->fpage_size = min_t(size_t, PAGE_SIZE-sgl->fpage_offs, user_size);
294 sgl->nr_pages = DIV_ROUND_UP(sgl->fpage_offs + user_size, PAGE_SIZE);
295 sgl->lpage_size = (user_size - sgl->fpage_size) % PAGE_SIZE;
296
297 dev_dbg(&pci_dev->dev, "[%s] uaddr=%p usize=%8ld nr_pages=%ld "
298 "fpage_offs=%lx fpage_size=%ld lpage_size=%ld\n",
299 __func__, user_addr, user_size, sgl->nr_pages,
300 sgl->fpage_offs, sgl->fpage_size, sgl->lpage_size);
301
302 sgl->user_addr = user_addr;
303 sgl->user_size = user_size;
304 sgl->sgl_size = genwqe_sgl_size(sgl->nr_pages);
305
306 if (get_order(sgl->sgl_size) > MAX_ORDER) {
286 dev_err(&pci_dev->dev, 307 dev_err(&pci_dev->dev,
287 "[%s] err: too much memory requested!\n", __func__); 308 "[%s] err: too much memory requested!\n", __func__);
288 return NULL; 309 return -ENOMEM;
289 } 310 }
290 311
291 sgl = __genwqe_alloc_consistent(cd, *sgl_size, dma_addr); 312 sgl->sgl = __genwqe_alloc_consistent(cd, sgl->sgl_size,
292 if (sgl == NULL) { 313 &sgl->sgl_dma_addr);
314 if (sgl->sgl == NULL) {
293 dev_err(&pci_dev->dev, 315 dev_err(&pci_dev->dev,
294 "[%s] err: no memory available!\n", __func__); 316 "[%s] err: no memory available!\n", __func__);
295 return NULL; 317 return -ENOMEM;
296 } 318 }
297 319
298 return sgl; 320 /* Only use buffering on incomplete pages */
321 if ((sgl->fpage_size != 0) && (sgl->fpage_size != PAGE_SIZE)) {
322 sgl->fpage = __genwqe_alloc_consistent(cd, PAGE_SIZE,
323 &sgl->fpage_dma_addr);
324 if (sgl->fpage == NULL)
325 goto err_out;
326
327 /* Sync with user memory */
328 if (copy_from_user(sgl->fpage + sgl->fpage_offs,
329 user_addr, sgl->fpage_size)) {
330 rc = -EFAULT;
331 goto err_out;
332 }
333 }
334 if (sgl->lpage_size != 0) {
335 sgl->lpage = __genwqe_alloc_consistent(cd, PAGE_SIZE,
336 &sgl->lpage_dma_addr);
337 if (sgl->lpage == NULL)
338 goto err_out1;
339
340 /* Sync with user memory */
341 if (copy_from_user(sgl->lpage, user_addr + user_size -
342 sgl->lpage_size, sgl->lpage_size)) {
343 rc = -EFAULT;
344 goto err_out1;
345 }
346 }
347 return 0;
348
349 err_out1:
350 __genwqe_free_consistent(cd, PAGE_SIZE, sgl->fpage,
351 sgl->fpage_dma_addr);
352 err_out:
353 __genwqe_free_consistent(cd, sgl->sgl_size, sgl->sgl,
354 sgl->sgl_dma_addr);
355 return -ENOMEM;
299} 356}
300 357
301int genwqe_setup_sgl(struct genwqe_dev *cd, 358int genwqe_setup_sgl(struct genwqe_dev *cd, struct genwqe_sgl *sgl,
302 unsigned long offs, 359 dma_addr_t *dma_list)
303 unsigned long size,
304 struct sg_entry *sgl,
305 dma_addr_t dma_addr, size_t sgl_size,
306 dma_addr_t *dma_list, int page_offs, int num_pages)
307{ 360{
308 int i = 0, j = 0, p; 361 int i = 0, j = 0, p;
309 unsigned long dma_offs, map_offs; 362 unsigned long dma_offs, map_offs;
310 struct pci_dev *pci_dev = cd->pci_dev;
311 dma_addr_t prev_daddr = 0; 363 dma_addr_t prev_daddr = 0;
312 struct sg_entry *s, *last_s = NULL; 364 struct sg_entry *s, *last_s = NULL;
313 365 size_t size = sgl->user_size;
314 /* sanity checks */
315 if (offs > PAGE_SIZE) {
316 dev_err(&pci_dev->dev,
317 "[%s] too large start offs %08lx\n", __func__, offs);
318 return -EFAULT;
319 }
320 if (sgl_size < genwqe_sgl_size(num_pages)) {
321 dev_err(&pci_dev->dev,
322 "[%s] sgl_size too small %08lx for %d pages\n",
323 __func__, sgl_size, num_pages);
324 return -EFAULT;
325 }
326 366
327 dma_offs = 128; /* next block if needed/dma_offset */ 367 dma_offs = 128; /* next block if needed/dma_offset */
328 map_offs = offs; /* offset in first page */ 368 map_offs = sgl->fpage_offs; /* offset in first page */
329 369
330 s = &sgl[0]; /* first set of 8 entries */ 370 s = &sgl->sgl[0]; /* first set of 8 entries */
331 p = 0; /* page */ 371 p = 0; /* page */
332 while (p < num_pages) { 372 while (p < sgl->nr_pages) {
333 dma_addr_t daddr; 373 dma_addr_t daddr;
334 unsigned int size_to_map; 374 unsigned int size_to_map;
335 375
336 /* always write the chaining entry, cleanup is done later */ 376 /* always write the chaining entry, cleanup is done later */
337 j = 0; 377 j = 0;
338 s[j].target_addr = cpu_to_be64(dma_addr + dma_offs); 378 s[j].target_addr = cpu_to_be64(sgl->sgl_dma_addr + dma_offs);
339 s[j].len = cpu_to_be32(128); 379 s[j].len = cpu_to_be32(128);
340 s[j].flags = cpu_to_be32(SG_CHAINED); 380 s[j].flags = cpu_to_be32(SG_CHAINED);
341 j++; 381 j++;
@@ -343,7 +383,17 @@ int genwqe_setup_sgl(struct genwqe_dev *cd,
343 while (j < 8) { 383 while (j < 8) {
344 /* DMA mapping for requested page, offs, size */ 384 /* DMA mapping for requested page, offs, size */
345 size_to_map = min(size, PAGE_SIZE - map_offs); 385 size_to_map = min(size, PAGE_SIZE - map_offs);
346 daddr = dma_list[page_offs + p] + map_offs; 386
387 if ((p == 0) && (sgl->fpage != NULL)) {
388 daddr = sgl->fpage_dma_addr + map_offs;
389
390 } else if ((p == sgl->nr_pages - 1) &&
391 (sgl->lpage != NULL)) {
392 daddr = sgl->lpage_dma_addr;
393 } else {
394 daddr = dma_list[p] + map_offs;
395 }
396
347 size -= size_to_map; 397 size -= size_to_map;
348 map_offs = 0; 398 map_offs = 0;
349 399
@@ -358,7 +408,7 @@ int genwqe_setup_sgl(struct genwqe_dev *cd,
358 size_to_map); 408 size_to_map);
359 409
360 p++; /* process next page */ 410 p++; /* process next page */
361 if (p == num_pages) 411 if (p == sgl->nr_pages)
362 goto fixup; /* nothing to do */ 412 goto fixup; /* nothing to do */
363 413
364 prev_daddr = daddr + size_to_map; 414 prev_daddr = daddr + size_to_map;
@@ -374,7 +424,7 @@ int genwqe_setup_sgl(struct genwqe_dev *cd,
374 j++; 424 j++;
375 425
376 p++; /* process next page */ 426 p++; /* process next page */
377 if (p == num_pages) 427 if (p == sgl->nr_pages)
378 goto fixup; /* nothing to do */ 428 goto fixup; /* nothing to do */
379 } 429 }
380 dma_offs += 128; 430 dma_offs += 128;
@@ -395,10 +445,50 @@ int genwqe_setup_sgl(struct genwqe_dev *cd,
395 return 0; 445 return 0;
396} 446}
397 447
398void genwqe_free_sgl(struct genwqe_dev *cd, struct sg_entry *sg_list, 448/**
399 dma_addr_t dma_addr, size_t size) 449 * genwqe_free_sync_sgl() - Free memory for sgl and overlapping pages
450 *
451 * After the DMA transfer has been completed we free the memory for
452 * the sgl and the cached pages. Data is being transfered from cached
453 * pages into user-space buffers.
454 */
455int genwqe_free_sync_sgl(struct genwqe_dev *cd, struct genwqe_sgl *sgl)
400{ 456{
401 __genwqe_free_consistent(cd, size, sg_list, dma_addr); 457 int rc;
458 struct pci_dev *pci_dev = cd->pci_dev;
459
460 if (sgl->fpage) {
461 if (copy_to_user(sgl->user_addr, sgl->fpage + sgl->fpage_offs,
462 sgl->fpage_size)) {
463 dev_err(&pci_dev->dev, "[%s] err: copying fpage!\n",
464 __func__);
465 rc = -EFAULT;
466 }
467 __genwqe_free_consistent(cd, PAGE_SIZE, sgl->fpage,
468 sgl->fpage_dma_addr);
469 sgl->fpage = NULL;
470 sgl->fpage_dma_addr = 0;
471 }
472 if (sgl->lpage) {
473 if (copy_to_user(sgl->user_addr + sgl->user_size -
474 sgl->lpage_size, sgl->lpage,
475 sgl->lpage_size)) {
476 dev_err(&pci_dev->dev, "[%s] err: copying lpage!\n",
477 __func__);
478 rc = -EFAULT;
479 }
480 __genwqe_free_consistent(cd, PAGE_SIZE, sgl->lpage,
481 sgl->lpage_dma_addr);
482 sgl->lpage = NULL;
483 sgl->lpage_dma_addr = 0;
484 }
485 __genwqe_free_consistent(cd, sgl->sgl_size, sgl->sgl,
486 sgl->sgl_dma_addr);
487
488 sgl->sgl = NULL;
489 sgl->sgl_dma_addr = 0x0;
490 sgl->sgl_size = 0;
491 return rc;
402} 492}
403 493
404/** 494/**