aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLogan Gunthorpe <logang@deltatee.com>2016-06-20 15:15:05 -0400
committerJon Mason <jdmason@kudzu.us>2016-08-05 10:21:06 -0400
commitda573eaa3a13f60efafcbe25e4f4465cf1a1b40b (patch)
tree4de19d41638f8499bbc19a80c7cbd9c36c5bac1f
parentfd2ecd885bab8e456298d0b702806ea736456c62 (diff)
ntb_perf: Improve thread handling to increase robustness
This commit accomplishes a few things: 1) Properly prevent multiple sets of threads from running at once using a mutex. Lots of race issues existed with the thread_cleanup. 2) The mutex allows us to ensure that threads are finished before tearing down the device or module. 3) Don't use kthread_stop when the threads can exit by themselves, as this is counter-indicated by the kthread_create documentation. Threads now wait for kthread_stop to occur. 4) Writing to the run file now blocks until the threads are complete. The test can then be safely interrupted by a SIGINT. Also, while I was at it: 5) debugfs_run_write shouldn't return 0 in the early check cases as this could cause debugfs_run_write to loop undesirably. Signed-off-by: Logan Gunthorpe <logang@deltatee.com> Acked-by: Dave Jiang <dave.jiang@intel.com> Signed-off-by: Jon Mason <jdmason@kudzu.us>
-rw-r--r--drivers/ntb/test/ntb_perf.c124
1 files changed, 76 insertions, 48 deletions
diff --git a/drivers/ntb/test/ntb_perf.c b/drivers/ntb/test/ntb_perf.c
index 5008ccf600a9..db4dc61164ca 100644
--- a/drivers/ntb/test/ntb_perf.c
+++ b/drivers/ntb/test/ntb_perf.c
@@ -58,6 +58,7 @@
58#include <linux/delay.h> 58#include <linux/delay.h>
59#include <linux/sizes.h> 59#include <linux/sizes.h>
60#include <linux/ntb.h> 60#include <linux/ntb.h>
61#include <linux/mutex.h>
61 62
62#define DRIVER_NAME "ntb_perf" 63#define DRIVER_NAME "ntb_perf"
63#define DRIVER_DESCRIPTION "PCIe NTB Performance Measurement Tool" 64#define DRIVER_DESCRIPTION "PCIe NTB Performance Measurement Tool"
@@ -121,6 +122,7 @@ struct pthr_ctx {
121 int dma_prep_err; 122 int dma_prep_err;
122 int src_idx; 123 int src_idx;
123 void *srcs[MAX_SRCS]; 124 void *srcs[MAX_SRCS];
125 wait_queue_head_t *wq;
124}; 126};
125 127
126struct perf_ctx { 128struct perf_ctx {
@@ -134,9 +136,11 @@ struct perf_ctx {
134 struct dentry *debugfs_run; 136 struct dentry *debugfs_run;
135 struct dentry *debugfs_threads; 137 struct dentry *debugfs_threads;
136 u8 perf_threads; 138 u8 perf_threads;
137 bool run; 139 /* mutex ensures only one set of threads run at once */
140 struct mutex run_mutex;
138 struct pthr_ctx pthr_ctx[MAX_THREADS]; 141 struct pthr_ctx pthr_ctx[MAX_THREADS];
139 atomic_t tsync; 142 atomic_t tsync;
143 atomic_t tdone;
140}; 144};
141 145
142enum { 146enum {
@@ -295,12 +299,18 @@ static int perf_move_data(struct pthr_ctx *pctx, char __iomem *dst, char *src,
295 set_current_state(TASK_INTERRUPTIBLE); 299 set_current_state(TASK_INTERRUPTIBLE);
296 schedule_timeout(1); 300 schedule_timeout(1);
297 } 301 }
302
303 if (unlikely(kthread_should_stop()))
304 break;
298 } 305 }
299 306
300 if (use_dma) { 307 if (use_dma) {
301 pr_info("%s: All DMA descriptors submitted\n", current->comm); 308 pr_info("%s: All DMA descriptors submitted\n", current->comm);
302 while (atomic_read(&pctx->dma_sync) != 0) 309 while (atomic_read(&pctx->dma_sync) != 0) {
310 if (kthread_should_stop())
311 break;
303 msleep(20); 312 msleep(20);
313 }
304 } 314 }
305 315
306 kstop = ktime_get(); 316 kstop = ktime_get();
@@ -393,7 +403,10 @@ static int ntb_perf_thread(void *data)
393 pctx->srcs[i] = NULL; 403 pctx->srcs[i] = NULL;
394 } 404 }
395 405
396 return 0; 406 atomic_inc(&perf->tdone);
407 wake_up(pctx->wq);
408 rc = 0;
409 goto done;
397 410
398err: 411err:
399 for (i = 0; i < MAX_SRCS; i++) { 412 for (i = 0; i < MAX_SRCS; i++) {
@@ -406,6 +419,16 @@ err:
406 pctx->dma_chan = NULL; 419 pctx->dma_chan = NULL;
407 } 420 }
408 421
422done:
423 /* Wait until we are told to stop */
424 for (;;) {
425 set_current_state(TASK_INTERRUPTIBLE);
426 if (kthread_should_stop())
427 break;
428 schedule();
429 }
430 __set_current_state(TASK_RUNNING);
431
409 return rc; 432 return rc;
410} 433}
411 434
@@ -553,6 +576,7 @@ static ssize_t debugfs_run_read(struct file *filp, char __user *ubuf,
553 struct perf_ctx *perf = filp->private_data; 576 struct perf_ctx *perf = filp->private_data;
554 char *buf; 577 char *buf;
555 ssize_t ret, out_offset; 578 ssize_t ret, out_offset;
579 int running;
556 580
557 if (!perf) 581 if (!perf)
558 return 0; 582 return 0;
@@ -560,7 +584,9 @@ static ssize_t debugfs_run_read(struct file *filp, char __user *ubuf,
560 buf = kmalloc(64, GFP_KERNEL); 584 buf = kmalloc(64, GFP_KERNEL);
561 if (!buf) 585 if (!buf)
562 return -ENOMEM; 586 return -ENOMEM;
563 out_offset = snprintf(buf, 64, "%d\n", perf->run); 587
588 running = mutex_is_locked(&perf->run_mutex);
589 out_offset = snprintf(buf, 64, "%d\n", running);
564 ret = simple_read_from_buffer(ubuf, count, offp, buf, out_offset); 590 ret = simple_read_from_buffer(ubuf, count, offp, buf, out_offset);
565 kfree(buf); 591 kfree(buf);
566 592
@@ -572,7 +598,6 @@ static void threads_cleanup(struct perf_ctx *perf)
572 struct pthr_ctx *pctx; 598 struct pthr_ctx *pctx;
573 int i; 599 int i;
574 600
575 perf->run = false;
576 for (i = 0; i < MAX_THREADS; i++) { 601 for (i = 0; i < MAX_THREADS; i++) {
577 pctx = &perf->pthr_ctx[i]; 602 pctx = &perf->pthr_ctx[i];
578 if (pctx->thread) { 603 if (pctx->thread) {
@@ -587,65 +612,66 @@ static ssize_t debugfs_run_write(struct file *filp, const char __user *ubuf,
587{ 612{
588 struct perf_ctx *perf = filp->private_data; 613 struct perf_ctx *perf = filp->private_data;
589 int node, i; 614 int node, i;
615 DECLARE_WAIT_QUEUE_HEAD(wq);
590 616
591 if (!perf->link_is_up) 617 if (!perf->link_is_up)
592 return 0; 618 return -ENOLINK;
593 619
594 if (perf->perf_threads == 0) 620 if (perf->perf_threads == 0)
595 return 0; 621 return -EINVAL;
596 622
597 if (atomic_read(&perf->tsync) == 0) 623 if (!mutex_trylock(&perf->run_mutex))
598 perf->run = false; 624 return -EBUSY;
599 625
600 if (perf->run) 626 if (perf->perf_threads > MAX_THREADS) {
601 threads_cleanup(perf); 627 perf->perf_threads = MAX_THREADS;
602 else { 628 pr_info("Reset total threads to: %u\n", MAX_THREADS);
603 perf->run = true; 629 }
604 630
605 if (perf->perf_threads > MAX_THREADS) { 631 /* no greater than 1M */
606 perf->perf_threads = MAX_THREADS; 632 if (seg_order > MAX_SEG_ORDER) {
607 pr_info("Reset total threads to: %u\n", MAX_THREADS); 633 seg_order = MAX_SEG_ORDER;
608 } 634 pr_info("Fix seg_order to %u\n", seg_order);
635 }
609 636
610 /* no greater than 1M */ 637 if (run_order < seg_order) {
611 if (seg_order > MAX_SEG_ORDER) { 638 run_order = seg_order;
612 seg_order = MAX_SEG_ORDER; 639 pr_info("Fix run_order to %u\n", run_order);
613 pr_info("Fix seg_order to %u\n", seg_order); 640 }
614 }
615 641
616 if (run_order < seg_order) { 642 node = dev_to_node(&perf->ntb->pdev->dev);
617 run_order = seg_order; 643 atomic_set(&perf->tdone, 0);
618 pr_info("Fix run_order to %u\n", run_order);
619 }
620 644
621 node = dev_to_node(&perf->ntb->pdev->dev); 645 /* launch kernel thread */
622 /* launch kernel thread */ 646 for (i = 0; i < perf->perf_threads; i++) {
623 for (i = 0; i < perf->perf_threads; i++) { 647 struct pthr_ctx *pctx;
624 struct pthr_ctx *pctx;
625
626 pctx = &perf->pthr_ctx[i];
627 atomic_set(&pctx->dma_sync, 0);
628 pctx->perf = perf;
629 pctx->thread =
630 kthread_create_on_node(ntb_perf_thread,
631 (void *)pctx,
632 node, "ntb_perf %d", i);
633 if (IS_ERR(pctx->thread)) {
634 pctx->thread = NULL;
635 goto err;
636 } else
637 wake_up_process(pctx->thread);
638
639 if (perf->run == false)
640 return -ENXIO;
641 }
642 648
649 pctx = &perf->pthr_ctx[i];
650 atomic_set(&pctx->dma_sync, 0);
651 pctx->perf = perf;
652 pctx->wq = &wq;
653 pctx->thread =
654 kthread_create_on_node(ntb_perf_thread,
655 (void *)pctx,
656 node, "ntb_perf %d", i);
657 if (IS_ERR(pctx->thread)) {
658 pctx->thread = NULL;
659 goto err;
660 } else {
661 wake_up_process(pctx->thread);
662 }
643 } 663 }
644 664
665 wait_event_interruptible(wq,
666 atomic_read(&perf->tdone) == perf->perf_threads);
667
668 threads_cleanup(perf);
669 mutex_unlock(&perf->run_mutex);
645 return count; 670 return count;
646 671
647err: 672err:
648 threads_cleanup(perf); 673 threads_cleanup(perf);
674 mutex_unlock(&perf->run_mutex);
649 return -ENXIO; 675 return -ENXIO;
650} 676}
651 677
@@ -713,7 +739,7 @@ static int perf_probe(struct ntb_client *client, struct ntb_dev *ntb)
713 perf->ntb = ntb; 739 perf->ntb = ntb;
714 perf->perf_threads = 1; 740 perf->perf_threads = 1;
715 atomic_set(&perf->tsync, 0); 741 atomic_set(&perf->tsync, 0);
716 perf->run = false; 742 mutex_init(&perf->run_mutex);
717 spin_lock_init(&perf->db_lock); 743 spin_lock_init(&perf->db_lock);
718 perf_setup_mw(ntb, perf); 744 perf_setup_mw(ntb, perf);
719 INIT_DELAYED_WORK(&perf->link_work, perf_link_work); 745 INIT_DELAYED_WORK(&perf->link_work, perf_link_work);
@@ -748,6 +774,8 @@ static void perf_remove(struct ntb_client *client, struct ntb_dev *ntb)
748 774
749 dev_dbg(&perf->ntb->dev, "%s called\n", __func__); 775 dev_dbg(&perf->ntb->dev, "%s called\n", __func__);
750 776
777 mutex_lock(&perf->run_mutex);
778
751 cancel_delayed_work_sync(&perf->link_work); 779 cancel_delayed_work_sync(&perf->link_work);
752 cancel_work_sync(&perf->link_cleanup); 780 cancel_work_sync(&perf->link_cleanup);
753 781