diff options
| -rw-r--r-- | Documentation/workqueue.txt | 40 | ||||
| -rw-r--r-- | kernel/workqueue.c | 8 |
2 files changed, 47 insertions, 1 deletions
diff --git a/Documentation/workqueue.txt b/Documentation/workqueue.txt index 01c513fac40e..a0b577de918f 100644 --- a/Documentation/workqueue.txt +++ b/Documentation/workqueue.txt | |||
| @@ -12,6 +12,7 @@ CONTENTS | |||
| 12 | 4. Application Programming Interface (API) | 12 | 4. Application Programming Interface (API) |
| 13 | 5. Example Execution Scenarios | 13 | 5. Example Execution Scenarios |
| 14 | 6. Guidelines | 14 | 6. Guidelines |
| 15 | 7. Debugging | ||
| 15 | 16 | ||
| 16 | 17 | ||
| 17 | 1. Introduction | 18 | 1. Introduction |
| @@ -379,3 +380,42 @@ If q1 has WQ_CPU_INTENSIVE set, | |||
| 379 | * Unless work items are expected to consume a huge amount of CPU | 380 | * Unless work items are expected to consume a huge amount of CPU |
| 380 | cycles, using a bound wq is usually beneficial due to the increased | 381 | cycles, using a bound wq is usually beneficial due to the increased |
| 381 | level of locality in wq operations and work item execution. | 382 | level of locality in wq operations and work item execution. |
| 383 | |||
| 384 | |||
| 385 | 7. Debugging | ||
| 386 | |||
| 387 | Because the work functions are executed by generic worker threads | ||
| 388 | there are a few tricks needed to shed some light on misbehaving | ||
| 389 | workqueue users. | ||
| 390 | |||
| 391 | Worker threads show up in the process list as: | ||
| 392 | |||
| 393 | root 5671 0.0 0.0 0 0 ? S 12:07 0:00 [kworker/0:1] | ||
| 394 | root 5672 0.0 0.0 0 0 ? S 12:07 0:00 [kworker/1:2] | ||
| 395 | root 5673 0.0 0.0 0 0 ? S 12:12 0:00 [kworker/0:0] | ||
| 396 | root 5674 0.0 0.0 0 0 ? S 12:13 0:00 [kworker/1:0] | ||
| 397 | |||
| 398 | If kworkers are going crazy (using too much cpu), there are two types | ||
| 399 | of possible problems: | ||
| 400 | |||
| 401 | 1. Something beeing scheduled in rapid succession | ||
| 402 | 2. A single work item that consumes lots of cpu cycles | ||
| 403 | |||
| 404 | The first one can be tracked using tracing: | ||
| 405 | |||
| 406 | $ echo workqueue:workqueue_queue_work > /sys/kernel/debug/tracing/set_event | ||
| 407 | $ cat /sys/kernel/debug/tracing/trace_pipe > out.txt | ||
| 408 | (wait a few secs) | ||
| 409 | ^C | ||
| 410 | |||
| 411 | If something is busy looping on work queueing, it would be dominating | ||
| 412 | the output and the offender can be determined with the work item | ||
| 413 | function. | ||
| 414 | |||
| 415 | For the second type of problems it should be possible to just check | ||
| 416 | the stack trace of the offending worker thread. | ||
| 417 | |||
| 418 | $ cat /proc/THE_OFFENDING_KWORKER/stack | ||
| 419 | |||
| 420 | The work item's function should be trivially visible in the stack | ||
| 421 | trace. | ||
diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 8859a41806dd..e3378e8d3a5c 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c | |||
| @@ -1291,8 +1291,14 @@ __acquires(&gcwq->lock) | |||
| 1291 | return true; | 1291 | return true; |
| 1292 | spin_unlock_irq(&gcwq->lock); | 1292 | spin_unlock_irq(&gcwq->lock); |
| 1293 | 1293 | ||
| 1294 | /* CPU has come up in between, retry migration */ | 1294 | /* |
| 1295 | * We've raced with CPU hot[un]plug. Give it a breather | ||
| 1296 | * and retry migration. cond_resched() is required here; | ||
| 1297 | * otherwise, we might deadlock against cpu_stop trying to | ||
| 1298 | * bring down the CPU on non-preemptive kernel. | ||
| 1299 | */ | ||
| 1295 | cpu_relax(); | 1300 | cpu_relax(); |
| 1301 | cond_resched(); | ||
| 1296 | } | 1302 | } |
| 1297 | } | 1303 | } |
| 1298 | 1304 | ||
