diff options
author | Steven Rostedt (VMware) <rostedt@goodmis.org> | 2018-01-10 08:24:17 -0500 |
---|---|---|
committer | Petr Mladek <pmladek@suse.com> | 2018-01-16 11:21:09 -0500 |
commit | dbdda842fe96f8932bae554f0adf463c27c42bc7 (patch) | |
tree | a021144a822752ced36f244466103c50a9f9c6d9 | |
parent | 11ca75d2d6d18d5a7ee4d7ec1da6f864f5c8c8be (diff) |
printk: Add console owner and waiter logic to load balance console writes
This patch implements what I discussed in Kernel Summit. I added
lockdep annotation (hopefully correctly), and it hasn't had any splats
(since I fixed some bugs in the first iterations). It did catch
problems when I had the owner covering too much. But now that the owner
is only set when actively calling the consoles, lockdep has stayed
quiet.
Here's the design again:
I added a "console_owner" which is set to a task that is actively
writing to the consoles. It is *not* the same as the owner of the
console_lock. It is only set when doing the calls to the console
functions. It is protected by a console_owner_lock which is a raw spin
lock.
There is a console_waiter. This is set when there is an active console
owner that is not current, and waiter is not set. This too is protected
by console_owner_lock.
In printk() when it tries to write to the consoles, we have:
if (console_trylock())
console_unlock();
Now I added an else, which will check if there is an active owner, and
no current waiter. If that is the case, then console_waiter is set, and
the task goes into a spin until it is no longer set.
When the active console owner finishes writing the current message to
the consoles, it grabs the console_owner_lock and sees if there is a
waiter, and clears console_owner.
If there is a waiter, then it breaks out of the loop, clears the waiter
flag (because that will release the waiter from its spin), and exits.
Note, it does *not* release the console semaphore. Because it is a
semaphore, there is no owner. Another task may release it. This means
that the waiter is guaranteed to be the new console owner! Which it
becomes.
Then the waiter calls console_unlock() and continues to write to the
consoles.
If another task comes along and does a printk() it too can become the
new waiter, and we wash rinse and repeat!
By Petr Mladek about possible new deadlocks:
The thing is that we move console_sem only to printk() call
that normally calls console_unlock() as well. It means that
the transferred owner should not bring new type of dependencies.
As Steven said somewhere: "If there is a deadlock, it was
there even before."
We could look at it from this side. The possible deadlock would
look like:
CPU0 CPU1
console_unlock()
console_owner = current;
spin_lockA()
printk()
spin = true;
while (...)
call_console_drivers()
spin_lockA()
This would be a deadlock. CPU0 would wait for the lock A.
While CPU1 would own the lockA and would wait for CPU0
to finish calling the console drivers and pass the console_sem
owner.
But if the above is true than the following scenario was
already possible before:
CPU0
spin_lockA()
printk()
console_unlock()
call_console_drivers()
spin_lockA()
By other words, this deadlock was there even before. Such
deadlocks are prevented by using printk_deferred() in
the sections guarded by the lock A.
By Steven Rostedt:
To demonstrate the issue, this module has been shown to lock up a
system with 4 CPUs and a slow console (like a serial console). It is
also able to lock up a 8 CPU system with only a fast (VGA) console, by
passing in "loops=100". The changes in this commit prevent this module
from locking up the system.
#include <linux/module.h>
#include <linux/delay.h>
#include <linux/sched.h>
#include <linux/mutex.h>
#include <linux/workqueue.h>
#include <linux/hrtimer.h>
static bool stop_testing;
static unsigned int loops = 1;
static void preempt_printk_workfn(struct work_struct *work)
{
int i;
while (!READ_ONCE(stop_testing)) {
for (i = 0; i < loops && !READ_ONCE(stop_testing); i++) {
preempt_disable();
pr_emerg("%5d%-75s\n", smp_processor_id(),
" XXX NOPREEMPT");
preempt_enable();
}
msleep(1);
}
}
static struct work_struct __percpu *works;
static void finish(void)
{
int cpu;
WRITE_ONCE(stop_testing, true);
for_each_online_cpu(cpu)
flush_work(per_cpu_ptr(works, cpu));
free_percpu(works);
}
static int __init test_init(void)
{
int cpu;
works = alloc_percpu(struct work_struct);
if (!works)
return -ENOMEM;
/*
* This is just a test module. This will break if you
* do any CPU hot plugging between loading and
* unloading the module.
*/
for_each_online_cpu(cpu) {
struct work_struct *work = per_cpu_ptr(works, cpu);
INIT_WORK(work, &preempt_printk_workfn);
schedule_work_on(cpu, work);
}
return 0;
}
static void __exit test_exit(void)
{
finish();
}
module_param(loops, uint, 0);
module_init(test_init);
module_exit(test_exit);
MODULE_LICENSE("GPL");
Link: http://lkml.kernel.org/r/20180110132418.7080-2-pmladek@suse.com
Cc: akpm@linux-foundation.org
Cc: linux-mm@kvack.org
Cc: Cong Wang <xiyou.wangcong@gmail.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Jan Kara <jack@suse.cz>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: Byungchul Park <byungchul.park@lge.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Pavel Machek <pavel@ucw.cz>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
[pmladek@suse.com: Commit message about possible deadlocks]
Acked-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Signed-off-by: Petr Mladek <pmladek@suse.com>
-rw-r--r-- | kernel/printk/printk.c | 108 |
1 files changed, 107 insertions, 1 deletions
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 5d81206a572d..040fb948924e 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c | |||
@@ -86,8 +86,15 @@ EXPORT_SYMBOL_GPL(console_drivers); | |||
86 | static struct lockdep_map console_lock_dep_map = { | 86 | static struct lockdep_map console_lock_dep_map = { |
87 | .name = "console_lock" | 87 | .name = "console_lock" |
88 | }; | 88 | }; |
89 | static struct lockdep_map console_owner_dep_map = { | ||
90 | .name = "console_owner" | ||
91 | }; | ||
89 | #endif | 92 | #endif |
90 | 93 | ||
94 | static DEFINE_RAW_SPINLOCK(console_owner_lock); | ||
95 | static struct task_struct *console_owner; | ||
96 | static bool console_waiter; | ||
97 | |||
91 | enum devkmsg_log_bits { | 98 | enum devkmsg_log_bits { |
92 | __DEVKMSG_LOG_BIT_ON = 0, | 99 | __DEVKMSG_LOG_BIT_ON = 0, |
93 | __DEVKMSG_LOG_BIT_OFF, | 100 | __DEVKMSG_LOG_BIT_OFF, |
@@ -1753,8 +1760,56 @@ asmlinkage int vprintk_emit(int facility, int level, | |||
1753 | * semaphore. The release will print out buffers and wake up | 1760 | * semaphore. The release will print out buffers and wake up |
1754 | * /dev/kmsg and syslog() users. | 1761 | * /dev/kmsg and syslog() users. |
1755 | */ | 1762 | */ |
1756 | if (console_trylock()) | 1763 | if (console_trylock()) { |
1757 | console_unlock(); | 1764 | console_unlock(); |
1765 | } else { | ||
1766 | struct task_struct *owner = NULL; | ||
1767 | bool waiter; | ||
1768 | bool spin = false; | ||
1769 | |||
1770 | printk_safe_enter_irqsave(flags); | ||
1771 | |||
1772 | raw_spin_lock(&console_owner_lock); | ||
1773 | owner = READ_ONCE(console_owner); | ||
1774 | waiter = READ_ONCE(console_waiter); | ||
1775 | if (!waiter && owner && owner != current) { | ||
1776 | WRITE_ONCE(console_waiter, true); | ||
1777 | spin = true; | ||
1778 | } | ||
1779 | raw_spin_unlock(&console_owner_lock); | ||
1780 | |||
1781 | /* | ||
1782 | * If there is an active printk() writing to the | ||
1783 | * consoles, instead of having it write our data too, | ||
1784 | * see if we can offload that load from the active | ||
1785 | * printer, and do some printing ourselves. | ||
1786 | * Go into a spin only if there isn't already a waiter | ||
1787 | * spinning, and there is an active printer, and | ||
1788 | * that active printer isn't us (recursive printk?). | ||
1789 | */ | ||
1790 | if (spin) { | ||
1791 | /* We spin waiting for the owner to release us */ | ||
1792 | spin_acquire(&console_owner_dep_map, 0, 0, _THIS_IP_); | ||
1793 | /* Owner will clear console_waiter on hand off */ | ||
1794 | while (READ_ONCE(console_waiter)) | ||
1795 | cpu_relax(); | ||
1796 | |||
1797 | spin_release(&console_owner_dep_map, 1, _THIS_IP_); | ||
1798 | printk_safe_exit_irqrestore(flags); | ||
1799 | |||
1800 | /* | ||
1801 | * The owner passed the console lock to us. | ||
1802 | * Since we did not spin on console lock, annotate | ||
1803 | * this as a trylock. Otherwise lockdep will | ||
1804 | * complain. | ||
1805 | */ | ||
1806 | mutex_acquire(&console_lock_dep_map, 0, 1, _THIS_IP_); | ||
1807 | console_unlock(); | ||
1808 | printk_safe_enter_irqsave(flags); | ||
1809 | } | ||
1810 | printk_safe_exit_irqrestore(flags); | ||
1811 | |||
1812 | } | ||
1758 | } | 1813 | } |
1759 | 1814 | ||
1760 | return printed_len; | 1815 | return printed_len; |
@@ -2141,6 +2196,7 @@ void console_unlock(void) | |||
2141 | static u64 seen_seq; | 2196 | static u64 seen_seq; |
2142 | unsigned long flags; | 2197 | unsigned long flags; |
2143 | bool wake_klogd = false; | 2198 | bool wake_klogd = false; |
2199 | bool waiter = false; | ||
2144 | bool do_cond_resched, retry; | 2200 | bool do_cond_resched, retry; |
2145 | 2201 | ||
2146 | if (console_suspended) { | 2202 | if (console_suspended) { |
@@ -2229,14 +2285,64 @@ skip: | |||
2229 | console_seq++; | 2285 | console_seq++; |
2230 | raw_spin_unlock(&logbuf_lock); | 2286 | raw_spin_unlock(&logbuf_lock); |
2231 | 2287 | ||
2288 | /* | ||
2289 | * While actively printing out messages, if another printk() | ||
2290 | * were to occur on another CPU, it may wait for this one to | ||
2291 | * finish. This task can not be preempted if there is a | ||
2292 | * waiter waiting to take over. | ||
2293 | */ | ||
2294 | raw_spin_lock(&console_owner_lock); | ||
2295 | console_owner = current; | ||
2296 | raw_spin_unlock(&console_owner_lock); | ||
2297 | |||
2298 | /* The waiter may spin on us after setting console_owner */ | ||
2299 | spin_acquire(&console_owner_dep_map, 0, 0, _THIS_IP_); | ||
2300 | |||
2232 | stop_critical_timings(); /* don't trace print latency */ | 2301 | stop_critical_timings(); /* don't trace print latency */ |
2233 | call_console_drivers(ext_text, ext_len, text, len); | 2302 | call_console_drivers(ext_text, ext_len, text, len); |
2234 | start_critical_timings(); | 2303 | start_critical_timings(); |
2304 | |||
2305 | raw_spin_lock(&console_owner_lock); | ||
2306 | waiter = READ_ONCE(console_waiter); | ||
2307 | console_owner = NULL; | ||
2308 | raw_spin_unlock(&console_owner_lock); | ||
2309 | |||
2310 | /* | ||
2311 | * If there is a waiter waiting for us, then pass the | ||
2312 | * rest of the work load over to that waiter. | ||
2313 | */ | ||
2314 | if (waiter) | ||
2315 | break; | ||
2316 | |||
2317 | /* There was no waiter, and nothing will spin on us here */ | ||
2318 | spin_release(&console_owner_dep_map, 1, _THIS_IP_); | ||
2319 | |||
2235 | printk_safe_exit_irqrestore(flags); | 2320 | printk_safe_exit_irqrestore(flags); |
2236 | 2321 | ||
2237 | if (do_cond_resched) | 2322 | if (do_cond_resched) |
2238 | cond_resched(); | 2323 | cond_resched(); |
2239 | } | 2324 | } |
2325 | |||
2326 | /* | ||
2327 | * If there is an active waiter waiting on the console_lock. | ||
2328 | * Pass off the printing to the waiter, and the waiter | ||
2329 | * will continue printing on its CPU, and when all writing | ||
2330 | * has finished, the last printer will wake up klogd. | ||
2331 | */ | ||
2332 | if (waiter) { | ||
2333 | WRITE_ONCE(console_waiter, false); | ||
2334 | /* The waiter is now free to continue */ | ||
2335 | spin_release(&console_owner_dep_map, 1, _THIS_IP_); | ||
2336 | /* | ||
2337 | * Hand off console_lock to waiter. The waiter will perform | ||
2338 | * the up(). After this, the waiter is the console_lock owner. | ||
2339 | */ | ||
2340 | mutex_release(&console_lock_dep_map, 1, _THIS_IP_); | ||
2341 | printk_safe_exit_irqrestore(flags); | ||
2342 | /* Note, if waiter is set, logbuf_lock is not held */ | ||
2343 | return; | ||
2344 | } | ||
2345 | |||
2240 | console_locked = 0; | 2346 | console_locked = 0; |
2241 | 2347 | ||
2242 | /* Release the exclusive_console once it is used */ | 2348 | /* Release the exclusive_console once it is used */ |