ntb_perf: Improve thread handling to increase robustness

This commit accomplishes a few things: 1) Properly prevent multiple sets of threads from running at once using a mutex. Lots of race issues existed with the thread_cleanup. 2) The mutex allows us to ensure that threads are finished before tearing down the device or module. 3) Don't use kthread_stop when the threads can exit by themselves, as this is counter-indicated by the kthread_create documentation. Threads now wait for kthread_stop to occur. 4) Writing to the run file now blocks until the threads are complete. The test can then be safely interrupted by a SIGINT. Also, while I was at it: 5) debugfs_run_write shouldn't return 0 in the early check cases as this could cause debugfs_run_write to loop undesirably. Signed-off-by: Logan Gunthorpe <logang@deltatee.com> Acked-by: Dave Jiang <dave.jiang@intel.com> Signed-off-by: Jon Mason <jdmason@kudzu.us>
author: Logan Gunthorpe <logang@deltatee.com> 2016-06-20 15:15:05 -0400
committer: Jon Mason <jdmason@kudzu.us> 2016-08-05 10:21:06 -0400
commit: da573eaa3a13f60efafcbe25e4f4465cf1a1b40b (patch)
tree: 4de19d41638f8499bbc19a80c7cbd9c36c5bac1f
parent: fd2ecd885bab8e456298d0b702806ea736456c62 (diff)
1 files changed, 76 insertions, 48 deletions
diff --git a/drivers/ntb/test/ntb_perf.c b/drivers/ntb/test/ntb_perf.c
index 5008ccf600a9..db4dc61164ca 100644
--- a/drivers/ntb/test/ntb_perf.c
+++ b/drivers/ntb/test/ntb_perf.c
@@ -58,6 +58,7 @@
 #include <linux/delay.h>
 #include <linux/sizes.h>
 #include <linux/ntb.h>
+#include <linux/mutex.h>
 #define DRIVER_NAME             "ntb_perf"
 #define DRIVER_DESCRIPTION      "PCIe NTB Performance Measurement Tool"
@@ -121,6 +122,7 @@ struct pthr_ctx {
        int                     dma_prep_err;
        int                     src_idx;
        void                    *srcs[MAX_SRCS];
+        wait_queue_head_t       *wq;
 };
 struct perf_ctx {
@@ -134,9 +136,11 @@ struct perf_ctx {
        struct dentry           *debugfs_run;
        struct dentry           *debugfs_threads;
        u8                      perf_threads;
-        bool                    run;
+        /* mutex ensures only one set of threads run at once */
+        struct mutex            run_mutex;
        struct pthr_ctx         pthr_ctx[MAX_THREADS];
        atomic_t                tsync;
+        atomic_t                tdone;
 };
 enum {
@@ -295,12 +299,18 @@ static int perf_move_data(struct pthr_ctx *pctx, char __iomem *dst, char *src,
                        set_current_state(TASK_INTERRUPTIBLE);
                        schedule_timeout(1);
                }
+                if (unlikely(kthread_should_stop()))
+                        break;
        }
        if (use_dma) {
                pr_info("%s: All DMA descriptors submitted\n", current->comm);
-                while (atomic_read(&pctx->dma_sync) != 0)
+                while (atomic_read(&pctx->dma_sync) != 0) {
+                        if (kthread_should_stop())
+                                break;
                        msleep(20);
+                }
        }
        kstop = ktime_get();
@@ -393,7 +403,10 @@ static int ntb_perf_thread(void *data)
                pctx->srcs[i] = NULL;
        }
-        return 0;
+        atomic_inc(&perf->tdone);
+        wake_up(pctx->wq);
+        rc = 0;
+        goto done;
 err:
        for (i = 0; i < MAX_SRCS; i++) {
@@ -406,6 +419,16 @@ err:
                pctx->dma_chan = NULL;
        }
+done:
+        /* Wait until we are told to stop */
+        for (;;) {
+                set_current_state(TASK_INTERRUPTIBLE);
+                if (kthread_should_stop())
+                        break;
+                schedule();
+        }
+        __set_current_state(TASK_RUNNING);
        return rc;
 }
@@ -553,6 +576,7 @@ static ssize_t debugfs_run_read(struct file *filp, char __user *ubuf,
        struct perf_ctx *perf = filp->private_data;
        char *buf;
        ssize_t ret, out_offset;
+        int running;
        if (!perf)
                return 0;
@@ -560,7 +584,9 @@ static ssize_t debugfs_run_read(struct file *filp, char __user *ubuf,
        buf = kmalloc(64, GFP_KERNEL);
        if (!buf)
                return -ENOMEM;
-        out_offset = snprintf(buf, 64, "%d\n", perf->run);
+        running = mutex_is_locked(&perf->run_mutex);
+        out_offset = snprintf(buf, 64, "%d\n", running);
        ret = simple_read_from_buffer(ubuf, count, offp, buf, out_offset);
        kfree(buf);
@@ -572,7 +598,6 @@ static void threads_cleanup(struct perf_ctx *perf)
        struct pthr_ctx *pctx;
        int i;
-        perf->run = false;
        for (i = 0; i < MAX_THREADS; i++) {
                pctx = &perf->pthr_ctx[i];
                if (pctx->thread) {
@@ -587,65 +612,66 @@ static ssize_t debugfs_run_write(struct file *filp, const char __user *ubuf,
 {
        struct perf_ctx *perf = filp->private_data;
        int node, i;
+        DECLARE_WAIT_QUEUE_HEAD(wq);
        if (!perf->link_is_up)
-                return 0;
+                return -ENOLINK;
        if (perf->perf_threads == 0)
-                return 0;
+                return -EINVAL;
-        if (atomic_read(&perf->tsync) == 0)
+        if (!mutex_trylock(&perf->run_mutex))
-                perf->run = false;
+                return -EBUSY;
-        if (perf->run)
+        if (perf->perf_threads > MAX_THREADS) {
-                threads_cleanup(perf);
+                perf->perf_threads = MAX_THREADS;
-        else {
+                pr_info("Reset total threads to: %u\n", MAX_THREADS);
-                perf->run = true;
+        }
-                if (perf->perf_threads > MAX_THREADS) {
+        /* no greater than 1M */
-                        perf->perf_threads = MAX_THREADS;
+        if (seg_order > MAX_SEG_ORDER) {
-                        pr_info("Reset total threads to: %u\n", MAX_THREADS);
+                seg_order = MAX_SEG_ORDER;
-                }
+                pr_info("Fix seg_order to %u\n", seg_order);
+        }
-                /* no greater than 1M */
+        if (run_order < seg_order) {
-                if (seg_order > MAX_SEG_ORDER) {
+                run_order = seg_order;
-                        seg_order = MAX_SEG_ORDER;
+                pr_info("Fix run_order to %u\n", run_order);
-                        pr_info("Fix seg_order to %u\n", seg_order);
+        }
-                }
-                if (run_order < seg_order) {
+        node = dev_to_node(&perf->ntb->pdev->dev);
-                        run_order = seg_order;
+        atomic_set(&perf->tdone, 0);
-                        pr_info("Fix run_order to %u\n", run_order);
-                }
-                node = dev_to_node(&perf->ntb->pdev->dev);
+        /* launch kernel thread */
-                /* launch kernel thread */
+        for (i = 0; i < perf->perf_threads; i++) {
-                for (i = 0; i < perf->perf_threads; i++) {
+                struct pthr_ctx *pctx;
-                        struct pthr_ctx *pctx;
-                        pctx = &perf->pthr_ctx[i];
-                        atomic_set(&pctx->dma_sync, 0);
-                        pctx->perf = perf;
-                        pctx->thread =
-                                kthread_create_on_node(ntb_perf_thread,
-                                                       (void *)pctx,
-                                                       node, "ntb_perf %d", i);
-                        if (IS_ERR(pctx->thread)) {
-                                pctx->thread = NULL;
-                                goto err;
-                        } else
-                                wake_up_process(pctx->thread);
-                        if (perf->run == false)
-                                return -ENXIO;
-                }
+                pctx = &perf->pthr_ctx[i];
+                atomic_set(&pctx->dma_sync, 0);
+                pctx->perf = perf;
+                pctx->wq = &wq;
+                pctx->thread =
+                        kthread_create_on_node(ntb_perf_thread,
+                                               (void *)pctx,
+                                               node, "ntb_perf %d", i);
+                if (IS_ERR(pctx->thread)) {
+                        pctx->thread = NULL;
+                        goto err;
+                } else {
+                        wake_up_process(pctx->thread);
+                }
        }
+        wait_event_interruptible(wq,
+                atomic_read(&perf->tdone) == perf->perf_threads);
+        threads_cleanup(perf);
+        mutex_unlock(&perf->run_mutex);
        return count;
 err:
        threads_cleanup(perf);
+        mutex_unlock(&perf->run_mutex);
        return -ENXIO;
 }
@@ -713,7 +739,7 @@ static int perf_probe(struct ntb_client *client, struct ntb_dev *ntb)
        perf->ntb = ntb;
        perf->perf_threads = 1;
        atomic_set(&perf->tsync, 0);
-        perf->run = false;
+        mutex_init(&perf->run_mutex);
        spin_lock_init(&perf->db_lock);
        perf_setup_mw(ntb, perf);
        INIT_DELAYED_WORK(&perf->link_work, perf_link_work);
@@ -748,6 +774,8 @@ static void perf_remove(struct ntb_client *client, struct ntb_dev *ntb)
        dev_dbg(&perf->ntb->dev, "%s called\n", __func__);
+        mutex_lock(&perf->run_mutex);
        cancel_delayed_work_sync(&perf->link_work);
        cancel_work_sync(&perf->link_cleanup);
author	Logan Gunthorpe <logang@deltatee.com>	2016-06-20 15:15:05 -0400
committer	Jon Mason <jdmason@kudzu.us>	2016-08-05 10:21:06 -0400
commit	da573eaa3a13f60efafcbe25e4f4465cf1a1b40b (patch)
tree	4de19d41638f8499bbc19a80c7cbd9c36c5bac1f
parent	fd2ecd885bab8e456298d0b702806ea736456c62 (diff)

diff --git a/drivers/ntb/test/ntb_perf.c b/drivers/ntb/test/ntb_perf.c index 5008ccf600a9..db4dc61164ca 100644 --- a/drivers/ntb/test/ntb_perf.c +++ b/drivers/ntb/test/ntb_perf.c
@@ -58,6 +58,7 @@
58	#include <linux/delay.h>	58	#include <linux/delay.h>
59	#include <linux/sizes.h>	59	#include <linux/sizes.h>
60	#include <linux/ntb.h>	60	#include <linux/ntb.h>
		61	#include <linux/mutex.h>
61		62
62	#define DRIVER_NAME "ntb_perf"	63	#define DRIVER_NAME "ntb_perf"
63	#define DRIVER_DESCRIPTION "PCIe NTB Performance Measurement Tool"	64	#define DRIVER_DESCRIPTION "PCIe NTB Performance Measurement Tool"
@@ -121,6 +122,7 @@ struct pthr_ctx {
121	int dma_prep_err;	122	int dma_prep_err;
122	int src_idx;	123	int src_idx;
123	void *srcs[MAX_SRCS];	124	void *srcs[MAX_SRCS];
		125	wait_queue_head_t *wq;
124	};	126	};
125		127
126	struct perf_ctx {	128	struct perf_ctx {
@@ -134,9 +136,11 @@ struct perf_ctx {
134	struct dentry *debugfs_run;	136	struct dentry *debugfs_run;
135	struct dentry *debugfs_threads;	137	struct dentry *debugfs_threads;
136	u8 perf_threads;	138	u8 perf_threads;
137	bool run;	139	/* mutex ensures only one set of threads run at once */
		140	struct mutex run_mutex;
138	struct pthr_ctx pthr_ctx[MAX_THREADS];	141	struct pthr_ctx pthr_ctx[MAX_THREADS];
139	atomic_t tsync;	142	atomic_t tsync;
		143	atomic_t tdone;
140	};	144	};
141		145
142	enum {	146	enum {
@@ -295,12 +299,18 @@ static int perf_move_data(struct pthr_ctx pctx, char __iomem dst, char *src,
295	set_current_state(TASK_INTERRUPTIBLE);	299	set_current_state(TASK_INTERRUPTIBLE);
296	schedule_timeout(1);	300	schedule_timeout(1);
297	}	301	}
		302
		303	if (unlikely(kthread_should_stop()))
		304	break;
298	}	305	}
299		306
300	if (use_dma) {	307	if (use_dma) {
301	pr_info("%s: All DMA descriptors submitted\n", current->comm);	308	pr_info("%s: All DMA descriptors submitted\n", current->comm);
302	while (atomic_read(&pctx->dma_sync) != 0)	309	while (atomic_read(&pctx->dma_sync) != 0) {
		310	if (kthread_should_stop())
		311	break;
303	msleep(20);	312	msleep(20);
		313	}
304	}	314	}
305		315
306	kstop = ktime_get();	316	kstop = ktime_get();
@@ -393,7 +403,10 @@ static int ntb_perf_thread(void *data)
393	pctx->srcs[i] = NULL;	403	pctx->srcs[i] = NULL;
394	}	404	}
395		405
396	return 0;	406	atomic_inc(&perf->tdone);
		407	wake_up(pctx->wq);
		408	rc = 0;
		409	goto done;
397		410
398	err:	411	err:
399	for (i = 0; i < MAX_SRCS; i++) {	412	for (i = 0; i < MAX_SRCS; i++) {
@@ -406,6 +419,16 @@ err:
406	pctx->dma_chan = NULL;	419	pctx->dma_chan = NULL;
407	}	420	}
408		421
		422	done:
		423	/* Wait until we are told to stop */
		424	for (;;) {
		425	set_current_state(TASK_INTERRUPTIBLE);
		426	if (kthread_should_stop())
		427	break;
		428	schedule();
		429	}
		430	__set_current_state(TASK_RUNNING);
		431
409	return rc;	432	return rc;
410	}	433	}
411		434
@@ -553,6 +576,7 @@ static ssize_t debugfs_run_read(struct file filp, char __user ubuf,
553	struct perf_ctx *perf = filp->private_data;	576	struct perf_ctx *perf = filp->private_data;
554	char *buf;	577	char *buf;
555	ssize_t ret, out_offset;	578	ssize_t ret, out_offset;
		579	int running;
556		580
557	if (!perf)	581	if (!perf)
558	return 0;	582	return 0;
@@ -560,7 +584,9 @@ static ssize_t debugfs_run_read(struct file filp, char __user ubuf,
560	buf = kmalloc(64, GFP_KERNEL);	584	buf = kmalloc(64, GFP_KERNEL);
561	if (!buf)	585	if (!buf)
562	return -ENOMEM;	586	return -ENOMEM;
563	out_offset = snprintf(buf, 64, "%d\n", perf->run);	587
		588	running = mutex_is_locked(&perf->run_mutex);
		589	out_offset = snprintf(buf, 64, "%d\n", running);
564	ret = simple_read_from_buffer(ubuf, count, offp, buf, out_offset);	590	ret = simple_read_from_buffer(ubuf, count, offp, buf, out_offset);
565	kfree(buf);	591	kfree(buf);
566		592
@@ -572,7 +598,6 @@ static void threads_cleanup(struct perf_ctx *perf)
572	struct pthr_ctx *pctx;	598	struct pthr_ctx *pctx;
573	int i;	599	int i;
574		600
575	perf->run = false;
576	for (i = 0; i < MAX_THREADS; i++) {	601	for (i = 0; i < MAX_THREADS; i++) {
577	pctx = &perf->pthr_ctx[i];	602	pctx = &perf->pthr_ctx[i];
578	if (pctx->thread) {	603	if (pctx->thread) {
@@ -587,65 +612,66 @@ static ssize_t debugfs_run_write(struct file filp, const char __user ubuf,
587	{	612	{
588	struct perf_ctx *perf = filp->private_data;	613	struct perf_ctx *perf = filp->private_data;
589	int node, i;	614	int node, i;
		615	DECLARE_WAIT_QUEUE_HEAD(wq);
590		616
591	if (!perf->link_is_up)	617	if (!perf->link_is_up)
592	return 0;	618	return -ENOLINK;
593		619
594	if (perf->perf_threads == 0)	620	if (perf->perf_threads == 0)
595	return 0;	621	return -EINVAL;
596		622
597	if (atomic_read(&perf->tsync) == 0)	623	if (!mutex_trylock(&perf->run_mutex))
598	perf->run = false;	624	return -EBUSY;
599		625
600	if (perf->run)	626	if (perf->perf_threads > MAX_THREADS) {
601	threads_cleanup(perf);	627	perf->perf_threads = MAX_THREADS;
602	else {	628	pr_info("Reset total threads to: %u\n", MAX_THREADS);
603	perf->run = true;	629	}
604		630
605	if (perf->perf_threads > MAX_THREADS) {	631	/* no greater than 1M */
606	perf->perf_threads = MAX_THREADS;	632	if (seg_order > MAX_SEG_ORDER) {
607	pr_info("Reset total threads to: %u\n", MAX_THREADS);	633	seg_order = MAX_SEG_ORDER;
608	}	634	pr_info("Fix seg_order to %u\n", seg_order);
		635	}
609		636
610	/* no greater than 1M */	637	if (run_order < seg_order) {
611	if (seg_order > MAX_SEG_ORDER) {	638	run_order = seg_order;
612	seg_order = MAX_SEG_ORDER;	639	pr_info("Fix run_order to %u\n", run_order);
613	pr_info("Fix seg_order to %u\n", seg_order);	640	}
614	}
615		641
616	if (run_order < seg_order) {	642	node = dev_to_node(&perf->ntb->pdev->dev);
617	run_order = seg_order;	643	atomic_set(&perf->tdone, 0);
618	pr_info("Fix run_order to %u\n", run_order);
619	}
620		644
621	node = dev_to_node(&perf->ntb->pdev->dev);	645	/* launch kernel thread */
622	/* launch kernel thread */	646	for (i = 0; i < perf->perf_threads; i++) {
623	for (i = 0; i < perf->perf_threads; i++) {	647	struct pthr_ctx *pctx;
624	struct pthr_ctx *pctx;
625
626	pctx = &perf->pthr_ctx[i];
627	atomic_set(&pctx->dma_sync, 0);
628	pctx->perf = perf;
629	pctx->thread =
630	kthread_create_on_node(ntb_perf_thread,
631	(void *)pctx,
632	node, "ntb_perf %d", i);
633	if (IS_ERR(pctx->thread)) {
634	pctx->thread = NULL;
635	goto err;
636	} else
637	wake_up_process(pctx->thread);
638
639	if (perf->run == false)
640	return -ENXIO;
641	}
642		648
		649	pctx = &perf->pthr_ctx[i];
		650	atomic_set(&pctx->dma_sync, 0);
		651	pctx->perf = perf;
		652	pctx->wq = &wq;
		653	pctx->thread =
		654	kthread_create_on_node(ntb_perf_thread,
		655	(void *)pctx,
		656	node, "ntb_perf %d", i);
		657	if (IS_ERR(pctx->thread)) {
		658	pctx->thread = NULL;
		659	goto err;
		660	} else {
		661	wake_up_process(pctx->thread);
		662	}
643	}	663	}
644		664
		665	wait_event_interruptible(wq,
		666	atomic_read(&perf->tdone) == perf->perf_threads);
		667
		668	threads_cleanup(perf);
		669	mutex_unlock(&perf->run_mutex);
645	return count;	670	return count;
646		671
647	err:	672	err:
648	threads_cleanup(perf);	673	threads_cleanup(perf);
		674	mutex_unlock(&perf->run_mutex);
649	return -ENXIO;	675	return -ENXIO;
650	}	676	}
651		677
@@ -713,7 +739,7 @@ static int perf_probe(struct ntb_client client, struct ntb_dev ntb)
713	perf->ntb = ntb;	739	perf->ntb = ntb;
714	perf->perf_threads = 1;	740	perf->perf_threads = 1;
715	atomic_set(&perf->tsync, 0);	741	atomic_set(&perf->tsync, 0);
716	perf->run = false;	742	mutex_init(&perf->run_mutex);
717	spin_lock_init(&perf->db_lock);	743	spin_lock_init(&perf->db_lock);
718	perf_setup_mw(ntb, perf);	744	perf_setup_mw(ntb, perf);
719	INIT_DELAYED_WORK(&perf->link_work, perf_link_work);	745	INIT_DELAYED_WORK(&perf->link_work, perf_link_work);
@@ -748,6 +774,8 @@ static void perf_remove(struct ntb_client client, struct ntb_dev ntb)
748		774
749	dev_dbg(&perf->ntb->dev, "%s called\n", __func__);	775	dev_dbg(&perf->ntb->dev, "%s called\n", __func__);
750		776
		777	mutex_lock(&perf->run_mutex);
		778
751	cancel_delayed_work_sync(&perf->link_work);	779	cancel_delayed_work_sync(&perf->link_work);
752	cancel_work_sync(&perf->link_cleanup);	780	cancel_work_sync(&perf->link_cleanup);
753		781