1 files changed, 68 insertions, 9 deletions
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 61a0264e28f9..16e13d8628a3 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -301,11 +301,26 @@ static DEFINE_SPINLOCK(wq_mayday_lock);	/* protects wq->maydays list */
 static LIST_HEAD(workqueues);           /* PR: list of all workqueues */
 static bool workqueue_freezing;         /* PL: have wqs started freezing? */
-static cpumask_var_t wq_unbound_cpumask; /* PL: low level cpumask for all unbound wqs */
+/* PL: allowable cpus for unbound wqs and work items */
+static cpumask_var_t wq_unbound_cpumask;
+/* CPU where unbound work was last round robin scheduled from this CPU */
+static DEFINE_PER_CPU(int, wq_rr_cpu_last);
+/*
+ * Local execution of unbound work items is no longer guaranteed.  The
+ * following always forces round-robin CPU selection on unbound work items
+ * to uncover usages which depend on it.
+ */
+#ifdef CONFIG_DEBUG_WQ_FORCE_RR_CPU
+static bool wq_debug_force_rr_cpu = true;
+#else
+static bool wq_debug_force_rr_cpu = false;
+#endif
+module_param_named(debug_force_rr_cpu, wq_debug_force_rr_cpu, bool, 0644);
 /* the per-cpu worker pools */
-static DEFINE_PER_CPU_SHARED_ALIGNED(struct worker_pool [NR_STD_WORKER_POOLS],
+static DEFINE_PER_CPU_SHARED_ALIGNED(struct worker_pool [NR_STD_WORKER_POOLS], cpu_worker_pools);
-                                     cpu_worker_pools);
 static DEFINE_IDR(worker_pool_idr);     /* PR: idr of all pools */
@@ -570,6 +585,16 @@ static struct pool_workqueue *unbound_pwq_by_node(struct workqueue_struct *wq,
                                                  int node)
 {
        assert_rcu_or_wq_mutex_or_pool_mutex(wq);
+        /*
+         * XXX: @node can be NUMA_NO_NODE if CPU goes offline while a
+         * delayed item is pending.  The plan is to keep CPU -> NODE
+         * mapping valid and stable across CPU on/offlines.  Once that
+         * happens, this workaround can be removed.
+         */
+        if (unlikely(node == NUMA_NO_NODE))
+                return wq->dfl_pwq;
        return rcu_dereference_raw(wq->numa_pwq_tbl[node]);
 }
@@ -1298,6 +1323,39 @@ static bool is_chained_work(struct workqueue_struct *wq)
        return worker && worker->current_pwq->wq == wq;
 }
+/*
+ * When queueing an unbound work item to a wq, prefer local CPU if allowed
+ * by wq_unbound_cpumask.  Otherwise, round robin among the allowed ones to
+ * avoid perturbing sensitive tasks.
+ */
+static int wq_select_unbound_cpu(int cpu)
+{
+        static bool printed_dbg_warning;
+        int new_cpu;
+        if (likely(!wq_debug_force_rr_cpu)) {
+                if (cpumask_test_cpu(cpu, wq_unbound_cpumask))
+                        return cpu;
+        } else if (!printed_dbg_warning) {
+                pr_warn("workqueue: round-robin CPU selection forced, expect performance impact\n");
+                printed_dbg_warning = true;
+        }
+        if (cpumask_empty(wq_unbound_cpumask))
+                return cpu;
+        new_cpu = __this_cpu_read(wq_rr_cpu_last);
+        new_cpu = cpumask_next_and(new_cpu, wq_unbound_cpumask, cpu_online_mask);
+        if (unlikely(new_cpu >= nr_cpu_ids)) {
+                new_cpu = cpumask_first_and(wq_unbound_cpumask, cpu_online_mask);
+                if (unlikely(new_cpu >= nr_cpu_ids))
+                        return cpu;
+        }
+        __this_cpu_write(wq_rr_cpu_last, new_cpu);
+        return new_cpu;
+}
 static void __queue_work(int cpu, struct workqueue_struct *wq,
                         struct work_struct *work)
 {
@@ -1323,7 +1381,7 @@ static void __queue_work(int cpu, struct workqueue_struct *wq,
                return;
 retry:
        if (req_cpu == WORK_CPU_UNBOUND)
-                cpu = raw_smp_processor_id();
+                cpu = wq_select_unbound_cpu(raw_smp_processor_id());
        /* pwq which will be used unless @work is executing elsewhere */
        if (!(wq->flags & WQ_UNBOUND))
@@ -1464,13 +1522,13 @@ static void __queue_delayed_work(int cpu, struct workqueue_struct *wq,
        timer_stats_timer_set_start_info(&dwork->timer);
        dwork->wq = wq;
-        /* timer isn't guaranteed to run in this cpu, record earlier */
-        if (cpu == WORK_CPU_UNBOUND)
-                cpu = raw_smp_processor_id();
        dwork->cpu = cpu;
        timer->expires = jiffies + delay;
-        add_timer_on(timer, cpu);
+        if (unlikely(cpu != WORK_CPU_UNBOUND))
+                add_timer_on(timer, cpu);
+        else
+                add_timer(timer);
 }
 /**
@@ -2355,7 +2413,8 @@ static void check_flush_dependency(struct workqueue_struct *target_wq,
        WARN_ONCE(current->flags & PF_MEMALLOC,
                  "workqueue: PF_MEMALLOC task %d(%s) is flushing !WQ_MEM_RECLAIM %s:%pf",
                  current->pid, current->comm, target_wq->name, target_func);
-        WARN_ONCE(worker && (worker->current_pwq->wq->flags & WQ_MEM_RECLAIM),
+        WARN_ONCE(worker && ((worker->current_pwq->wq->flags &
+                              (WQ_MEM_RECLAIM | __WQ_LEGACY)) == WQ_MEM_RECLAIM),
                  "workqueue: WQ_MEM_RECLAIM %s:%pf is flushing !WQ_MEM_RECLAIM %s:%pf",
                  worker->current_pwq->wq->name, worker->current_func,
                  target_wq->name, target_func);

diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 61a0264e28f9..16e13d8628a3 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c
@@ -301,11 +301,26 @@ static DEFINE_SPINLOCK(wq_mayday_lock); /* protects wq->maydays list */
301	static LIST_HEAD(workqueues); /* PR: list of all workqueues */	301	static LIST_HEAD(workqueues); /* PR: list of all workqueues */
302	static bool workqueue_freezing; /* PL: have wqs started freezing? */	302	static bool workqueue_freezing; /* PL: have wqs started freezing? */
303		303
304	static cpumask_var_t wq_unbound_cpumask; /* PL: low level cpumask for all unbound wqs */	304	/* PL: allowable cpus for unbound wqs and work items */
		305	static cpumask_var_t wq_unbound_cpumask;
		306
		307	/* CPU where unbound work was last round robin scheduled from this CPU */
		308	static DEFINE_PER_CPU(int, wq_rr_cpu_last);
		309
		310	/*
		311	* Local execution of unbound work items is no longer guaranteed. The
		312	* following always forces round-robin CPU selection on unbound work items
		313	* to uncover usages which depend on it.
		314	*/
		315	#ifdef CONFIG_DEBUG_WQ_FORCE_RR_CPU
		316	static bool wq_debug_force_rr_cpu = true;
		317	#else
		318	static bool wq_debug_force_rr_cpu = false;
		319	#endif
		320	module_param_named(debug_force_rr_cpu, wq_debug_force_rr_cpu, bool, 0644);
305		321
306	/* the per-cpu worker pools */	322	/* the per-cpu worker pools */
307	static DEFINE_PER_CPU_SHARED_ALIGNED(struct worker_pool [NR_STD_WORKER_POOLS],	323	static DEFINE_PER_CPU_SHARED_ALIGNED(struct worker_pool [NR_STD_WORKER_POOLS], cpu_worker_pools);
308	cpu_worker_pools);
309		324
310	static DEFINE_IDR(worker_pool_idr); /* PR: idr of all pools */	325	static DEFINE_IDR(worker_pool_idr); /* PR: idr of all pools */
311		326
@@ -570,6 +585,16 @@ static struct pool_workqueue unbound_pwq_by_node(struct workqueue_struct wq,
570	int node)	585	int node)
571	{	586	{
572	assert_rcu_or_wq_mutex_or_pool_mutex(wq);	587	assert_rcu_or_wq_mutex_or_pool_mutex(wq);
		588
		589	/*
		590	* XXX: @node can be NUMA_NO_NODE if CPU goes offline while a
		591	* delayed item is pending. The plan is to keep CPU -> NODE
		592	* mapping valid and stable across CPU on/offlines. Once that
		593	* happens, this workaround can be removed.
		594	*/
		595	if (unlikely(node == NUMA_NO_NODE))
		596	return wq->dfl_pwq;
		597
573	return rcu_dereference_raw(wq->numa_pwq_tbl[node]);	598	return rcu_dereference_raw(wq->numa_pwq_tbl[node]);
574	}	599	}
575		600
@@ -1298,6 +1323,39 @@ static bool is_chained_work(struct workqueue_struct *wq)
1298	return worker && worker->current_pwq->wq == wq;	1323	return worker && worker->current_pwq->wq == wq;
1299	}	1324	}
1300		1325
		1326	/*
		1327	* When queueing an unbound work item to a wq, prefer local CPU if allowed
		1328	* by wq_unbound_cpumask. Otherwise, round robin among the allowed ones to
		1329	* avoid perturbing sensitive tasks.
		1330	*/
		1331	static int wq_select_unbound_cpu(int cpu)
		1332	{
		1333	static bool printed_dbg_warning;
		1334	int new_cpu;
		1335
		1336	if (likely(!wq_debug_force_rr_cpu)) {
		1337	if (cpumask_test_cpu(cpu, wq_unbound_cpumask))
		1338	return cpu;
		1339	} else if (!printed_dbg_warning) {
		1340	pr_warn("workqueue: round-robin CPU selection forced, expect performance impact\n");
		1341	printed_dbg_warning = true;
		1342	}
		1343
		1344	if (cpumask_empty(wq_unbound_cpumask))
		1345	return cpu;
		1346
		1347	new_cpu = __this_cpu_read(wq_rr_cpu_last);
		1348	new_cpu = cpumask_next_and(new_cpu, wq_unbound_cpumask, cpu_online_mask);
		1349	if (unlikely(new_cpu >= nr_cpu_ids)) {
		1350	new_cpu = cpumask_first_and(wq_unbound_cpumask, cpu_online_mask);
		1351	if (unlikely(new_cpu >= nr_cpu_ids))
		1352	return cpu;
		1353	}
		1354	__this_cpu_write(wq_rr_cpu_last, new_cpu);
		1355
		1356	return new_cpu;
		1357	}
		1358
1301	static void __queue_work(int cpu, struct workqueue_struct *wq,	1359	static void __queue_work(int cpu, struct workqueue_struct *wq,
1302	struct work_struct *work)	1360	struct work_struct *work)
1303	{	1361	{
@@ -1323,7 +1381,7 @@ static void __queue_work(int cpu, struct workqueue_struct *wq,
1323	return;	1381	return;
1324	retry:	1382	retry:
1325	if (req_cpu == WORK_CPU_UNBOUND)	1383	if (req_cpu == WORK_CPU_UNBOUND)
1326	cpu = raw_smp_processor_id();	1384	cpu = wq_select_unbound_cpu(raw_smp_processor_id());
1327		1385
1328	/* pwq which will be used unless @work is executing elsewhere */	1386	/* pwq which will be used unless @work is executing elsewhere */
1329	if (!(wq->flags & WQ_UNBOUND))	1387	if (!(wq->flags & WQ_UNBOUND))
@@ -1464,13 +1522,13 @@ static void __queue_delayed_work(int cpu, struct workqueue_struct *wq,
1464	timer_stats_timer_set_start_info(&dwork->timer);	1522	timer_stats_timer_set_start_info(&dwork->timer);
1465		1523
1466	dwork->wq = wq;	1524	dwork->wq = wq;
1467	/* timer isn't guaranteed to run in this cpu, record earlier */
1468	if (cpu == WORK_CPU_UNBOUND)
1469	cpu = raw_smp_processor_id();
1470	dwork->cpu = cpu;	1525	dwork->cpu = cpu;
1471	timer->expires = jiffies + delay;	1526	timer->expires = jiffies + delay;
1472		1527
1473	add_timer_on(timer, cpu);	1528	if (unlikely(cpu != WORK_CPU_UNBOUND))
		1529	add_timer_on(timer, cpu);
		1530	else
		1531	add_timer(timer);
1474	}	1532	}
1475		1533
1476	/**	1534	/**
@@ -2355,7 +2413,8 @@ static void check_flush_dependency(struct workqueue_struct *target_wq,
2355	WARN_ONCE(current->flags & PF_MEMALLOC,	2413	WARN_ONCE(current->flags & PF_MEMALLOC,
2356	"workqueue: PF_MEMALLOC task %d(%s) is flushing !WQ_MEM_RECLAIM %s:%pf",	2414	"workqueue: PF_MEMALLOC task %d(%s) is flushing !WQ_MEM_RECLAIM %s:%pf",
2357	current->pid, current->comm, target_wq->name, target_func);	2415	current->pid, current->comm, target_wq->name, target_func);
2358	WARN_ONCE(worker && (worker->current_pwq->wq->flags & WQ_MEM_RECLAIM),	2416	WARN_ONCE(worker && ((worker->current_pwq->wq->flags &
		2417	(WQ_MEM_RECLAIM \| __WQ_LEGACY)) == WQ_MEM_RECLAIM),
2359	"workqueue: WQ_MEM_RECLAIM %s:%pf is flushing !WQ_MEM_RECLAIM %s:%pf",	2418	"workqueue: WQ_MEM_RECLAIM %s:%pf is flushing !WQ_MEM_RECLAIM %s:%pf",
2360	worker->current_pwq->wq->name, worker->current_func,	2419	worker->current_pwq->wq->name, worker->current_func,
2361	target_wq->name, target_func);	2420	target_wq->name, target_func);