diff options
author | Logan Gunthorpe <logang@deltatee.com> | 2016-06-20 15:15:05 -0400 |
---|---|---|
committer | Jon Mason <jdmason@kudzu.us> | 2016-08-05 10:21:06 -0400 |
commit | da573eaa3a13f60efafcbe25e4f4465cf1a1b40b (patch) | |
tree | 4de19d41638f8499bbc19a80c7cbd9c36c5bac1f | |
parent | fd2ecd885bab8e456298d0b702806ea736456c62 (diff) |
ntb_perf: Improve thread handling to increase robustness
This commit accomplishes a few things:
1) Properly prevent multiple sets of threads from running at once using
a mutex. Lots of race issues existed with the thread_cleanup.
2) The mutex allows us to ensure that threads are finished before
tearing down the device or module.
3) Don't use kthread_stop when the threads can exit by themselves, as
this is counter-indicated by the kthread_create documentation. Threads
now wait for kthread_stop to occur.
4) Writing to the run file now blocks until the threads are complete.
The test can then be safely interrupted by a SIGINT.
Also, while I was at it:
5) debugfs_run_write shouldn't return 0 in the early check cases as this
could cause debugfs_run_write to loop undesirably.
Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
Acked-by: Dave Jiang <dave.jiang@intel.com>
Signed-off-by: Jon Mason <jdmason@kudzu.us>
-rw-r--r-- | drivers/ntb/test/ntb_perf.c | 124 |
1 files changed, 76 insertions, 48 deletions
diff --git a/drivers/ntb/test/ntb_perf.c b/drivers/ntb/test/ntb_perf.c index 5008ccf600a9..db4dc61164ca 100644 --- a/drivers/ntb/test/ntb_perf.c +++ b/drivers/ntb/test/ntb_perf.c | |||
@@ -58,6 +58,7 @@ | |||
58 | #include <linux/delay.h> | 58 | #include <linux/delay.h> |
59 | #include <linux/sizes.h> | 59 | #include <linux/sizes.h> |
60 | #include <linux/ntb.h> | 60 | #include <linux/ntb.h> |
61 | #include <linux/mutex.h> | ||
61 | 62 | ||
62 | #define DRIVER_NAME "ntb_perf" | 63 | #define DRIVER_NAME "ntb_perf" |
63 | #define DRIVER_DESCRIPTION "PCIe NTB Performance Measurement Tool" | 64 | #define DRIVER_DESCRIPTION "PCIe NTB Performance Measurement Tool" |
@@ -121,6 +122,7 @@ struct pthr_ctx { | |||
121 | int dma_prep_err; | 122 | int dma_prep_err; |
122 | int src_idx; | 123 | int src_idx; |
123 | void *srcs[MAX_SRCS]; | 124 | void *srcs[MAX_SRCS]; |
125 | wait_queue_head_t *wq; | ||
124 | }; | 126 | }; |
125 | 127 | ||
126 | struct perf_ctx { | 128 | struct perf_ctx { |
@@ -134,9 +136,11 @@ struct perf_ctx { | |||
134 | struct dentry *debugfs_run; | 136 | struct dentry *debugfs_run; |
135 | struct dentry *debugfs_threads; | 137 | struct dentry *debugfs_threads; |
136 | u8 perf_threads; | 138 | u8 perf_threads; |
137 | bool run; | 139 | /* mutex ensures only one set of threads run at once */ |
140 | struct mutex run_mutex; | ||
138 | struct pthr_ctx pthr_ctx[MAX_THREADS]; | 141 | struct pthr_ctx pthr_ctx[MAX_THREADS]; |
139 | atomic_t tsync; | 142 | atomic_t tsync; |
143 | atomic_t tdone; | ||
140 | }; | 144 | }; |
141 | 145 | ||
142 | enum { | 146 | enum { |
@@ -295,12 +299,18 @@ static int perf_move_data(struct pthr_ctx *pctx, char __iomem *dst, char *src, | |||
295 | set_current_state(TASK_INTERRUPTIBLE); | 299 | set_current_state(TASK_INTERRUPTIBLE); |
296 | schedule_timeout(1); | 300 | schedule_timeout(1); |
297 | } | 301 | } |
302 | |||
303 | if (unlikely(kthread_should_stop())) | ||
304 | break; | ||
298 | } | 305 | } |
299 | 306 | ||
300 | if (use_dma) { | 307 | if (use_dma) { |
301 | pr_info("%s: All DMA descriptors submitted\n", current->comm); | 308 | pr_info("%s: All DMA descriptors submitted\n", current->comm); |
302 | while (atomic_read(&pctx->dma_sync) != 0) | 309 | while (atomic_read(&pctx->dma_sync) != 0) { |
310 | if (kthread_should_stop()) | ||
311 | break; | ||
303 | msleep(20); | 312 | msleep(20); |
313 | } | ||
304 | } | 314 | } |
305 | 315 | ||
306 | kstop = ktime_get(); | 316 | kstop = ktime_get(); |
@@ -393,7 +403,10 @@ static int ntb_perf_thread(void *data) | |||
393 | pctx->srcs[i] = NULL; | 403 | pctx->srcs[i] = NULL; |
394 | } | 404 | } |
395 | 405 | ||
396 | return 0; | 406 | atomic_inc(&perf->tdone); |
407 | wake_up(pctx->wq); | ||
408 | rc = 0; | ||
409 | goto done; | ||
397 | 410 | ||
398 | err: | 411 | err: |
399 | for (i = 0; i < MAX_SRCS; i++) { | 412 | for (i = 0; i < MAX_SRCS; i++) { |
@@ -406,6 +419,16 @@ err: | |||
406 | pctx->dma_chan = NULL; | 419 | pctx->dma_chan = NULL; |
407 | } | 420 | } |
408 | 421 | ||
422 | done: | ||
423 | /* Wait until we are told to stop */ | ||
424 | for (;;) { | ||
425 | set_current_state(TASK_INTERRUPTIBLE); | ||
426 | if (kthread_should_stop()) | ||
427 | break; | ||
428 | schedule(); | ||
429 | } | ||
430 | __set_current_state(TASK_RUNNING); | ||
431 | |||
409 | return rc; | 432 | return rc; |
410 | } | 433 | } |
411 | 434 | ||
@@ -553,6 +576,7 @@ static ssize_t debugfs_run_read(struct file *filp, char __user *ubuf, | |||
553 | struct perf_ctx *perf = filp->private_data; | 576 | struct perf_ctx *perf = filp->private_data; |
554 | char *buf; | 577 | char *buf; |
555 | ssize_t ret, out_offset; | 578 | ssize_t ret, out_offset; |
579 | int running; | ||
556 | 580 | ||
557 | if (!perf) | 581 | if (!perf) |
558 | return 0; | 582 | return 0; |
@@ -560,7 +584,9 @@ static ssize_t debugfs_run_read(struct file *filp, char __user *ubuf, | |||
560 | buf = kmalloc(64, GFP_KERNEL); | 584 | buf = kmalloc(64, GFP_KERNEL); |
561 | if (!buf) | 585 | if (!buf) |
562 | return -ENOMEM; | 586 | return -ENOMEM; |
563 | out_offset = snprintf(buf, 64, "%d\n", perf->run); | 587 | |
588 | running = mutex_is_locked(&perf->run_mutex); | ||
589 | out_offset = snprintf(buf, 64, "%d\n", running); | ||
564 | ret = simple_read_from_buffer(ubuf, count, offp, buf, out_offset); | 590 | ret = simple_read_from_buffer(ubuf, count, offp, buf, out_offset); |
565 | kfree(buf); | 591 | kfree(buf); |
566 | 592 | ||
@@ -572,7 +598,6 @@ static void threads_cleanup(struct perf_ctx *perf) | |||
572 | struct pthr_ctx *pctx; | 598 | struct pthr_ctx *pctx; |
573 | int i; | 599 | int i; |
574 | 600 | ||
575 | perf->run = false; | ||
576 | for (i = 0; i < MAX_THREADS; i++) { | 601 | for (i = 0; i < MAX_THREADS; i++) { |
577 | pctx = &perf->pthr_ctx[i]; | 602 | pctx = &perf->pthr_ctx[i]; |
578 | if (pctx->thread) { | 603 | if (pctx->thread) { |
@@ -587,65 +612,66 @@ static ssize_t debugfs_run_write(struct file *filp, const char __user *ubuf, | |||
587 | { | 612 | { |
588 | struct perf_ctx *perf = filp->private_data; | 613 | struct perf_ctx *perf = filp->private_data; |
589 | int node, i; | 614 | int node, i; |
615 | DECLARE_WAIT_QUEUE_HEAD(wq); | ||
590 | 616 | ||
591 | if (!perf->link_is_up) | 617 | if (!perf->link_is_up) |
592 | return 0; | 618 | return -ENOLINK; |
593 | 619 | ||
594 | if (perf->perf_threads == 0) | 620 | if (perf->perf_threads == 0) |
595 | return 0; | 621 | return -EINVAL; |
596 | 622 | ||
597 | if (atomic_read(&perf->tsync) == 0) | 623 | if (!mutex_trylock(&perf->run_mutex)) |
598 | perf->run = false; | 624 | return -EBUSY; |
599 | 625 | ||
600 | if (perf->run) | 626 | if (perf->perf_threads > MAX_THREADS) { |
601 | threads_cleanup(perf); | 627 | perf->perf_threads = MAX_THREADS; |
602 | else { | 628 | pr_info("Reset total threads to: %u\n", MAX_THREADS); |
603 | perf->run = true; | 629 | } |
604 | 630 | ||
605 | if (perf->perf_threads > MAX_THREADS) { | 631 | /* no greater than 1M */ |
606 | perf->perf_threads = MAX_THREADS; | 632 | if (seg_order > MAX_SEG_ORDER) { |
607 | pr_info("Reset total threads to: %u\n", MAX_THREADS); | 633 | seg_order = MAX_SEG_ORDER; |
608 | } | 634 | pr_info("Fix seg_order to %u\n", seg_order); |
635 | } | ||
609 | 636 | ||
610 | /* no greater than 1M */ | 637 | if (run_order < seg_order) { |
611 | if (seg_order > MAX_SEG_ORDER) { | 638 | run_order = seg_order; |
612 | seg_order = MAX_SEG_ORDER; | 639 | pr_info("Fix run_order to %u\n", run_order); |
613 | pr_info("Fix seg_order to %u\n", seg_order); | 640 | } |
614 | } | ||
615 | 641 | ||
616 | if (run_order < seg_order) { | 642 | node = dev_to_node(&perf->ntb->pdev->dev); |
617 | run_order = seg_order; | 643 | atomic_set(&perf->tdone, 0); |
618 | pr_info("Fix run_order to %u\n", run_order); | ||
619 | } | ||
620 | 644 | ||
621 | node = dev_to_node(&perf->ntb->pdev->dev); | 645 | /* launch kernel thread */ |
622 | /* launch kernel thread */ | 646 | for (i = 0; i < perf->perf_threads; i++) { |
623 | for (i = 0; i < perf->perf_threads; i++) { | 647 | struct pthr_ctx *pctx; |
624 | struct pthr_ctx *pctx; | ||
625 | |||
626 | pctx = &perf->pthr_ctx[i]; | ||
627 | atomic_set(&pctx->dma_sync, 0); | ||
628 | pctx->perf = perf; | ||
629 | pctx->thread = | ||
630 | kthread_create_on_node(ntb_perf_thread, | ||
631 | (void *)pctx, | ||
632 | node, "ntb_perf %d", i); | ||
633 | if (IS_ERR(pctx->thread)) { | ||
634 | pctx->thread = NULL; | ||
635 | goto err; | ||
636 | } else | ||
637 | wake_up_process(pctx->thread); | ||
638 | |||
639 | if (perf->run == false) | ||
640 | return -ENXIO; | ||
641 | } | ||
642 | 648 | ||
649 | pctx = &perf->pthr_ctx[i]; | ||
650 | atomic_set(&pctx->dma_sync, 0); | ||
651 | pctx->perf = perf; | ||
652 | pctx->wq = &wq; | ||
653 | pctx->thread = | ||
654 | kthread_create_on_node(ntb_perf_thread, | ||
655 | (void *)pctx, | ||
656 | node, "ntb_perf %d", i); | ||
657 | if (IS_ERR(pctx->thread)) { | ||
658 | pctx->thread = NULL; | ||
659 | goto err; | ||
660 | } else { | ||
661 | wake_up_process(pctx->thread); | ||
662 | } | ||
643 | } | 663 | } |
644 | 664 | ||
665 | wait_event_interruptible(wq, | ||
666 | atomic_read(&perf->tdone) == perf->perf_threads); | ||
667 | |||
668 | threads_cleanup(perf); | ||
669 | mutex_unlock(&perf->run_mutex); | ||
645 | return count; | 670 | return count; |
646 | 671 | ||
647 | err: | 672 | err: |
648 | threads_cleanup(perf); | 673 | threads_cleanup(perf); |
674 | mutex_unlock(&perf->run_mutex); | ||
649 | return -ENXIO; | 675 | return -ENXIO; |
650 | } | 676 | } |
651 | 677 | ||
@@ -713,7 +739,7 @@ static int perf_probe(struct ntb_client *client, struct ntb_dev *ntb) | |||
713 | perf->ntb = ntb; | 739 | perf->ntb = ntb; |
714 | perf->perf_threads = 1; | 740 | perf->perf_threads = 1; |
715 | atomic_set(&perf->tsync, 0); | 741 | atomic_set(&perf->tsync, 0); |
716 | perf->run = false; | 742 | mutex_init(&perf->run_mutex); |
717 | spin_lock_init(&perf->db_lock); | 743 | spin_lock_init(&perf->db_lock); |
718 | perf_setup_mw(ntb, perf); | 744 | perf_setup_mw(ntb, perf); |
719 | INIT_DELAYED_WORK(&perf->link_work, perf_link_work); | 745 | INIT_DELAYED_WORK(&perf->link_work, perf_link_work); |
@@ -748,6 +774,8 @@ static void perf_remove(struct ntb_client *client, struct ntb_dev *ntb) | |||
748 | 774 | ||
749 | dev_dbg(&perf->ntb->dev, "%s called\n", __func__); | 775 | dev_dbg(&perf->ntb->dev, "%s called\n", __func__); |
750 | 776 | ||
777 | mutex_lock(&perf->run_mutex); | ||
778 | |||
751 | cancel_delayed_work_sync(&perf->link_work); | 779 | cancel_delayed_work_sync(&perf->link_work); |
752 | cancel_work_sync(&perf->link_cleanup); | 780 | cancel_work_sync(&perf->link_cleanup); |
753 | 781 | ||