diff options
| -rw-r--r-- | Documentation/kernel-parameters.txt | 11 | ||||
| -rw-r--r-- | include/linux/workqueue.h | 9 | ||||
| -rw-r--r-- | kernel/workqueue.c | 74 | ||||
| -rw-r--r-- | lib/Kconfig.debug | 15 |
4 files changed, 98 insertions, 11 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 551ecf09c8dd..9a53c929f017 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt | |||
| @@ -4235,6 +4235,17 @@ bytes respectively. Such letter suffixes can also be entirely omitted. | |||
| 4235 | The default value of this parameter is determined by | 4235 | The default value of this parameter is determined by |
| 4236 | the config option CONFIG_WQ_POWER_EFFICIENT_DEFAULT. | 4236 | the config option CONFIG_WQ_POWER_EFFICIENT_DEFAULT. |
| 4237 | 4237 | ||
| 4238 | workqueue.debug_force_rr_cpu | ||
| 4239 | Workqueue used to implicitly guarantee that work | ||
| 4240 | items queued without explicit CPU specified are put | ||
| 4241 | on the local CPU. This guarantee is no longer true | ||
| 4242 | and while local CPU is still preferred work items | ||
| 4243 | may be put on foreign CPUs. This debug option | ||
| 4244 | forces round-robin CPU selection to flush out | ||
| 4245 | usages which depend on the now broken guarantee. | ||
| 4246 | When enabled, memory and cache locality will be | ||
| 4247 | impacted. | ||
| 4248 | |||
| 4238 | x2apic_phys [X86-64,APIC] Use x2apic physical mode instead of | 4249 | x2apic_phys [X86-64,APIC] Use x2apic physical mode instead of |
| 4239 | default x2apic cluster mode on platforms | 4250 | default x2apic cluster mode on platforms |
| 4240 | supporting x2apic. | 4251 | supporting x2apic. |
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 0e32bc71245e..ca73c503b92a 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h | |||
| @@ -311,6 +311,7 @@ enum { | |||
| 311 | 311 | ||
| 312 | __WQ_DRAINING = 1 << 16, /* internal: workqueue is draining */ | 312 | __WQ_DRAINING = 1 << 16, /* internal: workqueue is draining */ |
| 313 | __WQ_ORDERED = 1 << 17, /* internal: workqueue is ordered */ | 313 | __WQ_ORDERED = 1 << 17, /* internal: workqueue is ordered */ |
| 314 | __WQ_LEGACY = 1 << 18, /* internal: create*_workqueue() */ | ||
| 314 | 315 | ||
| 315 | WQ_MAX_ACTIVE = 512, /* I like 512, better ideas? */ | 316 | WQ_MAX_ACTIVE = 512, /* I like 512, better ideas? */ |
| 316 | WQ_MAX_UNBOUND_PER_CPU = 4, /* 4 * #cpus for unbound wq */ | 317 | WQ_MAX_UNBOUND_PER_CPU = 4, /* 4 * #cpus for unbound wq */ |
| @@ -411,12 +412,12 @@ __alloc_workqueue_key(const char *fmt, unsigned int flags, int max_active, | |||
| 411 | alloc_workqueue(fmt, WQ_UNBOUND | __WQ_ORDERED | (flags), 1, ##args) | 412 | alloc_workqueue(fmt, WQ_UNBOUND | __WQ_ORDERED | (flags), 1, ##args) |
| 412 | 413 | ||
| 413 | #define create_workqueue(name) \ | 414 | #define create_workqueue(name) \ |
| 414 | alloc_workqueue("%s", WQ_MEM_RECLAIM, 1, (name)) | 415 | alloc_workqueue("%s", __WQ_LEGACY | WQ_MEM_RECLAIM, 1, (name)) |
| 415 | #define create_freezable_workqueue(name) \ | 416 | #define create_freezable_workqueue(name) \ |
| 416 | alloc_workqueue("%s", WQ_FREEZABLE | WQ_UNBOUND | WQ_MEM_RECLAIM, \ | 417 | alloc_workqueue("%s", __WQ_LEGACY | WQ_FREEZABLE | WQ_UNBOUND | \ |
| 417 | 1, (name)) | 418 | WQ_MEM_RECLAIM, 1, (name)) |
| 418 | #define create_singlethread_workqueue(name) \ | 419 | #define create_singlethread_workqueue(name) \ |
| 419 | alloc_ordered_workqueue("%s", WQ_MEM_RECLAIM, name) | 420 | alloc_ordered_workqueue("%s", __WQ_LEGACY | WQ_MEM_RECLAIM, name) |
| 420 | 421 | ||
| 421 | extern void destroy_workqueue(struct workqueue_struct *wq); | 422 | extern void destroy_workqueue(struct workqueue_struct *wq); |
| 422 | 423 | ||
diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 61a0264e28f9..7ff5dc7d2ac5 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c | |||
| @@ -301,7 +301,23 @@ static DEFINE_SPINLOCK(wq_mayday_lock); /* protects wq->maydays list */ | |||
| 301 | static LIST_HEAD(workqueues); /* PR: list of all workqueues */ | 301 | static LIST_HEAD(workqueues); /* PR: list of all workqueues */ |
| 302 | static bool workqueue_freezing; /* PL: have wqs started freezing? */ | 302 | static bool workqueue_freezing; /* PL: have wqs started freezing? */ |
| 303 | 303 | ||
| 304 | static cpumask_var_t wq_unbound_cpumask; /* PL: low level cpumask for all unbound wqs */ | 304 | /* PL: allowable cpus for unbound wqs and work items */ |
| 305 | static cpumask_var_t wq_unbound_cpumask; | ||
| 306 | |||
| 307 | /* CPU where unbound work was last round robin scheduled from this CPU */ | ||
| 308 | static DEFINE_PER_CPU(int, wq_rr_cpu_last); | ||
| 309 | |||
| 310 | /* | ||
| 311 | * Local execution of unbound work items is no longer guaranteed. The | ||
| 312 | * following always forces round-robin CPU selection on unbound work items | ||
| 313 | * to uncover usages which depend on it. | ||
| 314 | */ | ||
| 315 | #ifdef CONFIG_DEBUG_WQ_FORCE_RR_CPU | ||
| 316 | static bool wq_debug_force_rr_cpu = true; | ||
| 317 | #else | ||
| 318 | static bool wq_debug_force_rr_cpu = false; | ||
| 319 | #endif | ||
| 320 | module_param_named(debug_force_rr_cpu, wq_debug_force_rr_cpu, bool, 0644); | ||
| 305 | 321 | ||
| 306 | /* the per-cpu worker pools */ | 322 | /* the per-cpu worker pools */ |
| 307 | static DEFINE_PER_CPU_SHARED_ALIGNED(struct worker_pool [NR_STD_WORKER_POOLS], | 323 | static DEFINE_PER_CPU_SHARED_ALIGNED(struct worker_pool [NR_STD_WORKER_POOLS], |
| @@ -570,6 +586,16 @@ static struct pool_workqueue *unbound_pwq_by_node(struct workqueue_struct *wq, | |||
| 570 | int node) | 586 | int node) |
| 571 | { | 587 | { |
| 572 | assert_rcu_or_wq_mutex_or_pool_mutex(wq); | 588 | assert_rcu_or_wq_mutex_or_pool_mutex(wq); |
| 589 | |||
| 590 | /* | ||
| 591 | * XXX: @node can be NUMA_NO_NODE if CPU goes offline while a | ||
| 592 | * delayed item is pending. The plan is to keep CPU -> NODE | ||
| 593 | * mapping valid and stable across CPU on/offlines. Once that | ||
| 594 | * happens, this workaround can be removed. | ||
| 595 | */ | ||
| 596 | if (unlikely(node == NUMA_NO_NODE)) | ||
| 597 | return wq->dfl_pwq; | ||
| 598 | |||
| 573 | return rcu_dereference_raw(wq->numa_pwq_tbl[node]); | 599 | return rcu_dereference_raw(wq->numa_pwq_tbl[node]); |
| 574 | } | 600 | } |
| 575 | 601 | ||
| @@ -1298,6 +1324,39 @@ static bool is_chained_work(struct workqueue_struct *wq) | |||
| 1298 | return worker && worker->current_pwq->wq == wq; | 1324 | return worker && worker->current_pwq->wq == wq; |
| 1299 | } | 1325 | } |
| 1300 | 1326 | ||
| 1327 | /* | ||
| 1328 | * When queueing an unbound work item to a wq, prefer local CPU if allowed | ||
| 1329 | * by wq_unbound_cpumask. Otherwise, round robin among the allowed ones to | ||
| 1330 | * avoid perturbing sensitive tasks. | ||
| 1331 | */ | ||
| 1332 | static int wq_select_unbound_cpu(int cpu) | ||
| 1333 | { | ||
| 1334 | static bool printed_dbg_warning; | ||
| 1335 | int new_cpu; | ||
| 1336 | |||
| 1337 | if (likely(!wq_debug_force_rr_cpu)) { | ||
| 1338 | if (cpumask_test_cpu(cpu, wq_unbound_cpumask)) | ||
| 1339 | return cpu; | ||
| 1340 | } else if (!printed_dbg_warning) { | ||
| 1341 | pr_warn("workqueue: round-robin CPU selection forced, expect performance impact\n"); | ||
| 1342 | printed_dbg_warning = true; | ||
| 1343 | } | ||
| 1344 | |||
| 1345 | if (cpumask_empty(wq_unbound_cpumask)) | ||
| 1346 | return cpu; | ||
| 1347 | |||
| 1348 | new_cpu = __this_cpu_read(wq_rr_cpu_last); | ||
| 1349 | new_cpu = cpumask_next_and(new_cpu, wq_unbound_cpumask, cpu_online_mask); | ||
| 1350 | if (unlikely(new_cpu >= nr_cpu_ids)) { | ||
| 1351 | new_cpu = cpumask_first_and(wq_unbound_cpumask, cpu_online_mask); | ||
| 1352 | if (unlikely(new_cpu >= nr_cpu_ids)) | ||
| 1353 | return cpu; | ||
| 1354 | } | ||
| 1355 | __this_cpu_write(wq_rr_cpu_last, new_cpu); | ||
| 1356 | |||
| 1357 | return new_cpu; | ||
| 1358 | } | ||
| 1359 | |||
| 1301 | static void __queue_work(int cpu, struct workqueue_struct *wq, | 1360 | static void __queue_work(int cpu, struct workqueue_struct *wq, |
| 1302 | struct work_struct *work) | 1361 | struct work_struct *work) |
| 1303 | { | 1362 | { |
| @@ -1323,7 +1382,7 @@ static void __queue_work(int cpu, struct workqueue_struct *wq, | |||
| 1323 | return; | 1382 | return; |
| 1324 | retry: | 1383 | retry: |
| 1325 | if (req_cpu == WORK_CPU_UNBOUND) | 1384 | if (req_cpu == WORK_CPU_UNBOUND) |
| 1326 | cpu = raw_smp_processor_id(); | 1385 | cpu = wq_select_unbound_cpu(raw_smp_processor_id()); |
| 1327 | 1386 | ||
| 1328 | /* pwq which will be used unless @work is executing elsewhere */ | 1387 | /* pwq which will be used unless @work is executing elsewhere */ |
| 1329 | if (!(wq->flags & WQ_UNBOUND)) | 1388 | if (!(wq->flags & WQ_UNBOUND)) |
| @@ -1464,13 +1523,13 @@ static void __queue_delayed_work(int cpu, struct workqueue_struct *wq, | |||
| 1464 | timer_stats_timer_set_start_info(&dwork->timer); | 1523 | timer_stats_timer_set_start_info(&dwork->timer); |
| 1465 | 1524 | ||
| 1466 | dwork->wq = wq; | 1525 | dwork->wq = wq; |
| 1467 | /* timer isn't guaranteed to run in this cpu, record earlier */ | ||
| 1468 | if (cpu == WORK_CPU_UNBOUND) | ||
| 1469 | cpu = raw_smp_processor_id(); | ||
| 1470 | dwork->cpu = cpu; | 1526 | dwork->cpu = cpu; |
| 1471 | timer->expires = jiffies + delay; | 1527 | timer->expires = jiffies + delay; |
| 1472 | 1528 | ||
| 1473 | add_timer_on(timer, cpu); | 1529 | if (unlikely(cpu != WORK_CPU_UNBOUND)) |
| 1530 | add_timer_on(timer, cpu); | ||
| 1531 | else | ||
| 1532 | add_timer(timer); | ||
| 1474 | } | 1533 | } |
| 1475 | 1534 | ||
| 1476 | /** | 1535 | /** |
| @@ -2355,7 +2414,8 @@ static void check_flush_dependency(struct workqueue_struct *target_wq, | |||
| 2355 | WARN_ONCE(current->flags & PF_MEMALLOC, | 2414 | WARN_ONCE(current->flags & PF_MEMALLOC, |
| 2356 | "workqueue: PF_MEMALLOC task %d(%s) is flushing !WQ_MEM_RECLAIM %s:%pf", | 2415 | "workqueue: PF_MEMALLOC task %d(%s) is flushing !WQ_MEM_RECLAIM %s:%pf", |
| 2357 | current->pid, current->comm, target_wq->name, target_func); | 2416 | current->pid, current->comm, target_wq->name, target_func); |
| 2358 | WARN_ONCE(worker && (worker->current_pwq->wq->flags & WQ_MEM_RECLAIM), | 2417 | WARN_ONCE(worker && ((worker->current_pwq->wq->flags & |
| 2418 | (WQ_MEM_RECLAIM | __WQ_LEGACY)) == WQ_MEM_RECLAIM), | ||
| 2359 | "workqueue: WQ_MEM_RECLAIM %s:%pf is flushing !WQ_MEM_RECLAIM %s:%pf", | 2419 | "workqueue: WQ_MEM_RECLAIM %s:%pf is flushing !WQ_MEM_RECLAIM %s:%pf", |
| 2360 | worker->current_pwq->wq->name, worker->current_func, | 2420 | worker->current_pwq->wq->name, worker->current_func, |
| 2361 | target_wq->name, target_func); | 2421 | target_wq->name, target_func); |
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index ecb9e75614bf..8bfd1aca7a3d 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug | |||
| @@ -1400,6 +1400,21 @@ config RCU_EQS_DEBUG | |||
| 1400 | 1400 | ||
| 1401 | endmenu # "RCU Debugging" | 1401 | endmenu # "RCU Debugging" |
| 1402 | 1402 | ||
| 1403 | config DEBUG_WQ_FORCE_RR_CPU | ||
| 1404 | bool "Force round-robin CPU selection for unbound work items" | ||
| 1405 | depends on DEBUG_KERNEL | ||
| 1406 | default n | ||
| 1407 | help | ||
| 1408 | Workqueue used to implicitly guarantee that work items queued | ||
| 1409 | without explicit CPU specified are put on the local CPU. This | ||
| 1410 | guarantee is no longer true and while local CPU is still | ||
| 1411 | preferred work items may be put on foreign CPUs. Kernel | ||
| 1412 | parameter "workqueue.debug_force_rr_cpu" is added to force | ||
| 1413 | round-robin CPU selection to flush out usages which depend on the | ||
| 1414 | now broken guarantee. This config option enables the debug | ||
| 1415 | feature by default. When enabled, memory and cache locality will | ||
| 1416 | be impacted. | ||
| 1417 | |||
| 1403 | config DEBUG_BLOCK_EXT_DEVT | 1418 | config DEBUG_BLOCK_EXT_DEVT |
| 1404 | bool "Force extended block device numbers and spread them" | 1419 | bool "Force extended block device numbers and spread them" |
| 1405 | depends on DEBUG_KERNEL | 1420 | depends on DEBUG_KERNEL |
