aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/kernel-parameters.txt11
-rw-r--r--include/linux/workqueue.h9
-rw-r--r--kernel/workqueue.c74
-rw-r--r--lib/Kconfig.debug15
4 files changed, 98 insertions, 11 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 551ecf09c8dd..9a53c929f017 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -4235,6 +4235,17 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
4235 The default value of this parameter is determined by 4235 The default value of this parameter is determined by
4236 the config option CONFIG_WQ_POWER_EFFICIENT_DEFAULT. 4236 the config option CONFIG_WQ_POWER_EFFICIENT_DEFAULT.
4237 4237
4238 workqueue.debug_force_rr_cpu
4239 Workqueue used to implicitly guarantee that work
4240 items queued without explicit CPU specified are put
4241 on the local CPU. This guarantee is no longer true
4242 and while local CPU is still preferred work items
4243 may be put on foreign CPUs. This debug option
4244 forces round-robin CPU selection to flush out
4245 usages which depend on the now broken guarantee.
4246 When enabled, memory and cache locality will be
4247 impacted.
4248
4238 x2apic_phys [X86-64,APIC] Use x2apic physical mode instead of 4249 x2apic_phys [X86-64,APIC] Use x2apic physical mode instead of
4239 default x2apic cluster mode on platforms 4250 default x2apic cluster mode on platforms
4240 supporting x2apic. 4251 supporting x2apic.
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 0e32bc71245e..ca73c503b92a 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -311,6 +311,7 @@ enum {
311 311
312 __WQ_DRAINING = 1 << 16, /* internal: workqueue is draining */ 312 __WQ_DRAINING = 1 << 16, /* internal: workqueue is draining */
313 __WQ_ORDERED = 1 << 17, /* internal: workqueue is ordered */ 313 __WQ_ORDERED = 1 << 17, /* internal: workqueue is ordered */
314 __WQ_LEGACY = 1 << 18, /* internal: create*_workqueue() */
314 315
315 WQ_MAX_ACTIVE = 512, /* I like 512, better ideas? */ 316 WQ_MAX_ACTIVE = 512, /* I like 512, better ideas? */
316 WQ_MAX_UNBOUND_PER_CPU = 4, /* 4 * #cpus for unbound wq */ 317 WQ_MAX_UNBOUND_PER_CPU = 4, /* 4 * #cpus for unbound wq */
@@ -411,12 +412,12 @@ __alloc_workqueue_key(const char *fmt, unsigned int flags, int max_active,
411 alloc_workqueue(fmt, WQ_UNBOUND | __WQ_ORDERED | (flags), 1, ##args) 412 alloc_workqueue(fmt, WQ_UNBOUND | __WQ_ORDERED | (flags), 1, ##args)
412 413
413#define create_workqueue(name) \ 414#define create_workqueue(name) \
414 alloc_workqueue("%s", WQ_MEM_RECLAIM, 1, (name)) 415 alloc_workqueue("%s", __WQ_LEGACY | WQ_MEM_RECLAIM, 1, (name))
415#define create_freezable_workqueue(name) \ 416#define create_freezable_workqueue(name) \
416 alloc_workqueue("%s", WQ_FREEZABLE | WQ_UNBOUND | WQ_MEM_RECLAIM, \ 417 alloc_workqueue("%s", __WQ_LEGACY | WQ_FREEZABLE | WQ_UNBOUND | \
417 1, (name)) 418 WQ_MEM_RECLAIM, 1, (name))
418#define create_singlethread_workqueue(name) \ 419#define create_singlethread_workqueue(name) \
419 alloc_ordered_workqueue("%s", WQ_MEM_RECLAIM, name) 420 alloc_ordered_workqueue("%s", __WQ_LEGACY | WQ_MEM_RECLAIM, name)
420 421
421extern void destroy_workqueue(struct workqueue_struct *wq); 422extern void destroy_workqueue(struct workqueue_struct *wq);
422 423
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 61a0264e28f9..7ff5dc7d2ac5 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -301,7 +301,23 @@ static DEFINE_SPINLOCK(wq_mayday_lock); /* protects wq->maydays list */
301static LIST_HEAD(workqueues); /* PR: list of all workqueues */ 301static LIST_HEAD(workqueues); /* PR: list of all workqueues */
302static bool workqueue_freezing; /* PL: have wqs started freezing? */ 302static bool workqueue_freezing; /* PL: have wqs started freezing? */
303 303
304static cpumask_var_t wq_unbound_cpumask; /* PL: low level cpumask for all unbound wqs */ 304/* PL: allowable cpus for unbound wqs and work items */
305static cpumask_var_t wq_unbound_cpumask;
306
307/* CPU where unbound work was last round robin scheduled from this CPU */
308static DEFINE_PER_CPU(int, wq_rr_cpu_last);
309
310/*
311 * Local execution of unbound work items is no longer guaranteed. The
312 * following always forces round-robin CPU selection on unbound work items
313 * to uncover usages which depend on it.
314 */
315#ifdef CONFIG_DEBUG_WQ_FORCE_RR_CPU
316static bool wq_debug_force_rr_cpu = true;
317#else
318static bool wq_debug_force_rr_cpu = false;
319#endif
320module_param_named(debug_force_rr_cpu, wq_debug_force_rr_cpu, bool, 0644);
305 321
306/* the per-cpu worker pools */ 322/* the per-cpu worker pools */
307static DEFINE_PER_CPU_SHARED_ALIGNED(struct worker_pool [NR_STD_WORKER_POOLS], 323static DEFINE_PER_CPU_SHARED_ALIGNED(struct worker_pool [NR_STD_WORKER_POOLS],
@@ -570,6 +586,16 @@ static struct pool_workqueue *unbound_pwq_by_node(struct workqueue_struct *wq,
570 int node) 586 int node)
571{ 587{
572 assert_rcu_or_wq_mutex_or_pool_mutex(wq); 588 assert_rcu_or_wq_mutex_or_pool_mutex(wq);
589
590 /*
591 * XXX: @node can be NUMA_NO_NODE if CPU goes offline while a
592 * delayed item is pending. The plan is to keep CPU -> NODE
593 * mapping valid and stable across CPU on/offlines. Once that
594 * happens, this workaround can be removed.
595 */
596 if (unlikely(node == NUMA_NO_NODE))
597 return wq->dfl_pwq;
598
573 return rcu_dereference_raw(wq->numa_pwq_tbl[node]); 599 return rcu_dereference_raw(wq->numa_pwq_tbl[node]);
574} 600}
575 601
@@ -1298,6 +1324,39 @@ static bool is_chained_work(struct workqueue_struct *wq)
1298 return worker && worker->current_pwq->wq == wq; 1324 return worker && worker->current_pwq->wq == wq;
1299} 1325}
1300 1326
1327/*
1328 * When queueing an unbound work item to a wq, prefer local CPU if allowed
1329 * by wq_unbound_cpumask. Otherwise, round robin among the allowed ones to
1330 * avoid perturbing sensitive tasks.
1331 */
1332static int wq_select_unbound_cpu(int cpu)
1333{
1334 static bool printed_dbg_warning;
1335 int new_cpu;
1336
1337 if (likely(!wq_debug_force_rr_cpu)) {
1338 if (cpumask_test_cpu(cpu, wq_unbound_cpumask))
1339 return cpu;
1340 } else if (!printed_dbg_warning) {
1341 pr_warn("workqueue: round-robin CPU selection forced, expect performance impact\n");
1342 printed_dbg_warning = true;
1343 }
1344
1345 if (cpumask_empty(wq_unbound_cpumask))
1346 return cpu;
1347
1348 new_cpu = __this_cpu_read(wq_rr_cpu_last);
1349 new_cpu = cpumask_next_and(new_cpu, wq_unbound_cpumask, cpu_online_mask);
1350 if (unlikely(new_cpu >= nr_cpu_ids)) {
1351 new_cpu = cpumask_first_and(wq_unbound_cpumask, cpu_online_mask);
1352 if (unlikely(new_cpu >= nr_cpu_ids))
1353 return cpu;
1354 }
1355 __this_cpu_write(wq_rr_cpu_last, new_cpu);
1356
1357 return new_cpu;
1358}
1359
1301static void __queue_work(int cpu, struct workqueue_struct *wq, 1360static void __queue_work(int cpu, struct workqueue_struct *wq,
1302 struct work_struct *work) 1361 struct work_struct *work)
1303{ 1362{
@@ -1323,7 +1382,7 @@ static void __queue_work(int cpu, struct workqueue_struct *wq,
1323 return; 1382 return;
1324retry: 1383retry:
1325 if (req_cpu == WORK_CPU_UNBOUND) 1384 if (req_cpu == WORK_CPU_UNBOUND)
1326 cpu = raw_smp_processor_id(); 1385 cpu = wq_select_unbound_cpu(raw_smp_processor_id());
1327 1386
1328 /* pwq which will be used unless @work is executing elsewhere */ 1387 /* pwq which will be used unless @work is executing elsewhere */
1329 if (!(wq->flags & WQ_UNBOUND)) 1388 if (!(wq->flags & WQ_UNBOUND))
@@ -1464,13 +1523,13 @@ static void __queue_delayed_work(int cpu, struct workqueue_struct *wq,
1464 timer_stats_timer_set_start_info(&dwork->timer); 1523 timer_stats_timer_set_start_info(&dwork->timer);
1465 1524
1466 dwork->wq = wq; 1525 dwork->wq = wq;
1467 /* timer isn't guaranteed to run in this cpu, record earlier */
1468 if (cpu == WORK_CPU_UNBOUND)
1469 cpu = raw_smp_processor_id();
1470 dwork->cpu = cpu; 1526 dwork->cpu = cpu;
1471 timer->expires = jiffies + delay; 1527 timer->expires = jiffies + delay;
1472 1528
1473 add_timer_on(timer, cpu); 1529 if (unlikely(cpu != WORK_CPU_UNBOUND))
1530 add_timer_on(timer, cpu);
1531 else
1532 add_timer(timer);
1474} 1533}
1475 1534
1476/** 1535/**
@@ -2355,7 +2414,8 @@ static void check_flush_dependency(struct workqueue_struct *target_wq,
2355 WARN_ONCE(current->flags & PF_MEMALLOC, 2414 WARN_ONCE(current->flags & PF_MEMALLOC,
2356 "workqueue: PF_MEMALLOC task %d(%s) is flushing !WQ_MEM_RECLAIM %s:%pf", 2415 "workqueue: PF_MEMALLOC task %d(%s) is flushing !WQ_MEM_RECLAIM %s:%pf",
2357 current->pid, current->comm, target_wq->name, target_func); 2416 current->pid, current->comm, target_wq->name, target_func);
2358 WARN_ONCE(worker && (worker->current_pwq->wq->flags & WQ_MEM_RECLAIM), 2417 WARN_ONCE(worker && ((worker->current_pwq->wq->flags &
2418 (WQ_MEM_RECLAIM | __WQ_LEGACY)) == WQ_MEM_RECLAIM),
2359 "workqueue: WQ_MEM_RECLAIM %s:%pf is flushing !WQ_MEM_RECLAIM %s:%pf", 2419 "workqueue: WQ_MEM_RECLAIM %s:%pf is flushing !WQ_MEM_RECLAIM %s:%pf",
2360 worker->current_pwq->wq->name, worker->current_func, 2420 worker->current_pwq->wq->name, worker->current_func,
2361 target_wq->name, target_func); 2421 target_wq->name, target_func);
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index ecb9e75614bf..8bfd1aca7a3d 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1400,6 +1400,21 @@ config RCU_EQS_DEBUG
1400 1400
1401endmenu # "RCU Debugging" 1401endmenu # "RCU Debugging"
1402 1402
1403config DEBUG_WQ_FORCE_RR_CPU
1404 bool "Force round-robin CPU selection for unbound work items"
1405 depends on DEBUG_KERNEL
1406 default n
1407 help
1408 Workqueue used to implicitly guarantee that work items queued
1409 without explicit CPU specified are put on the local CPU. This
1410 guarantee is no longer true and while local CPU is still
1411 preferred work items may be put on foreign CPUs. Kernel
1412 parameter "workqueue.debug_force_rr_cpu" is added to force
1413 round-robin CPU selection to flush out usages which depend on the
1414 now broken guarantee. This config option enables the debug
1415 feature by default. When enabled, memory and cache locality will
1416 be impacted.
1417
1403config DEBUG_BLOCK_EXT_DEVT 1418config DEBUG_BLOCK_EXT_DEVT
1404 bool "Force extended block device numbers and spread them" 1419 bool "Force extended block device numbers and spread them"
1405 depends on DEBUG_KERNEL 1420 depends on DEBUG_KERNEL