summaryrefslogtreecommitdiffstats
path: root/kernel/printk
diff options
context:
space:
mode:
authorSergey Senozhatsky <sergey.senozhatsky@gmail.com>2016-03-17 17:21:20 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2016-03-17 18:09:34 -0400
commita8199371afc27946d72f0d53e938e78d2ea0bae3 (patch)
tree928b09102c94bf5db44945356e89e6d49e80dec9 /kernel/printk
parentfaeb50b98a337abf7a11375e06a83cda23c8ca67 (diff)
printk: move can_use_console() out of console_trylock_for_printk()
console_unlock() allows to cond_resched() if its caller has set `console_may_schedule' to 1 (this functionality is present since 8d91f8b15361 ("printk: do cond_resched() between lines while outputting to consoles"). The rules are: -- console_lock() always sets `console_may_schedule' to 1 -- console_trylock() always sets `console_may_schedule' to 0 printk() calls console_unlock() with preemption desabled, which basically can lead to RCU stalls, watchdog soft lockups, etc. if something is simultaneously calling printk() frequent enough (IOW, console_sem owner always has new data to send to console divers and can't leave console_unlock() for a long time). printk()->console_trylock() callers do not necessarily execute in atomic contexts, and some of them can cond_resched() in console_unlock(). console_trylock() can set `console_may_schedule' to 1 (allow cond_resched() later in consoe_unlock()) when it's safe. This patch (of 3): vprintk_emit() disables preemption around console_trylock_for_printk() and console_unlock() calls for a strong reason -- can_use_console() check. The thing is that vprintl_emit() can be called on a CPU that is not fully brought up yet (!cpu_online()), which potentially can cause problems if console driver wants to access per-cpu data. A console driver can explicitly state that it's safe to call it from !online cpu by setting CON_ANYTIME bit in console ->flags. That's why for !cpu_online() can_use_console() iterates all the console to find out if there is a CON_ANYTIME console, otherwise console_unlock() must be avoided. can_use_console() ensures that console_unlock() call is safe in vprintk_emit() only; console_lock() and console_trylock() are not covered by this check. Even though call_console_drivers(), invoked from console_cont_flush() and console_unlock(), tests `!cpu_online() && CON_ANYTIME' for_each_console(), it may be too late, which can result in messages loss. Assume that we have 2 cpus -- CPU0 is online, CPU1 is !online, and no CON_ANYTIME consoles available. CPU0 online CPU1 !online console_trylock() ... console_unlock() console_cont_flush spin_lock logbuf_lock if (!cont.len) { spin_unlock logbuf_lock return } for (;;) { vprintk_emit spin_lock logbuf_lock log_store spin_unlock logbuf_lock spin_lock logbuf_lock !console_trylock_for_printk msg_print_text return console_idx = log_next() console_seq++ console_prev = msg->flags spin_unlock logbuf_lock call_console_drivers() for_each_console(con) { if (!cpu_online() && !(con->flags & CON_ANYTIME)) continue; } /* * no message printed, we lost it */ vprintk_emit spin_lock logbuf_lock log_store spin_unlock logbuf_lock !console_trylock_for_printk return /* * go to the beginning of the loop, * find out there are new messages, * lose it */ } console_trylock()/console_lock() call on CPU1 may come from cpu notifiers registered on that CPU. Since notifiers are not getting unregistered when CPU is going DOWN, all of the notifiers receive notifications during CPU UP. For example, on my x86_64, I see around 50 notification sent from offline CPU to itself [swapper/2] from cpu:2 to:2 action:CPU_STARTING hotplug_hrtick [swapper/2] from cpu:2 to:2 action:CPU_STARTING blk_mq_main_cpu_notify [swapper/2] from cpu:2 to:2 action:CPU_STARTING blk_mq_queue_reinit_notify [swapper/2] from cpu:2 to:2 action:CPU_STARTING console_cpu_notify while doing echo 0 > /sys/devices/system/cpu/cpu2/online echo 1 > /sys/devices/system/cpu/cpu2/online So grabbing the console_sem lock while CPU is !online is possible, in theory. This patch moves can_use_console() check out of console_trylock_for_printk(). Instead it calls it in console_unlock(), so now console_lock()/console_unlock() are also 'protected' by can_use_console(). This also means that console_trylock_for_printk() is not really needed anymore and can be removed. Signed-off-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com> Reviewed-by: Petr Mladek <pmladek@suse.com> Cc: Jan Kara <jack@suse.com> Cc: Tejun Heo <tj@kernel.org> Cc: Kyle McMartin <kyle@kernel.org> Cc: Dave Jones <davej@codemonkey.org.uk> Cc: Calvin Owens <calvinowens@fb.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'kernel/printk')
-rw-r--r--kernel/printk/printk.c97
1 files changed, 42 insertions, 55 deletions
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index c963ba534a78..2523332bd998 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -1483,58 +1483,6 @@ static void zap_locks(void)
1483 sema_init(&console_sem, 1); 1483 sema_init(&console_sem, 1);
1484} 1484}
1485 1485
1486/*
1487 * Check if we have any console that is capable of printing while cpu is
1488 * booting or shutting down. Requires console_sem.
1489 */
1490static int have_callable_console(void)
1491{
1492 struct console *con;
1493
1494 for_each_console(con)
1495 if (con->flags & CON_ANYTIME)
1496 return 1;
1497
1498 return 0;
1499}
1500
1501/*
1502 * Can we actually use the console at this time on this cpu?
1503 *
1504 * Console drivers may assume that per-cpu resources have been allocated. So
1505 * unless they're explicitly marked as being able to cope (CON_ANYTIME) don't
1506 * call them until this CPU is officially up.
1507 */
1508static inline int can_use_console(unsigned int cpu)
1509{
1510 return cpu_online(cpu) || have_callable_console();
1511}
1512
1513/*
1514 * Try to get console ownership to actually show the kernel
1515 * messages from a 'printk'. Return true (and with the
1516 * console_lock held, and 'console_locked' set) if it
1517 * is successful, false otherwise.
1518 */
1519static int console_trylock_for_printk(void)
1520{
1521 unsigned int cpu = smp_processor_id();
1522
1523 if (!console_trylock())
1524 return 0;
1525 /*
1526 * If we can't use the console, we need to release the console
1527 * semaphore by hand to avoid flushing the buffer. We need to hold the
1528 * console semaphore in order to do this test safely.
1529 */
1530 if (!can_use_console(cpu)) {
1531 console_locked = 0;
1532 up_console_sem();
1533 return 0;
1534 }
1535 return 1;
1536}
1537
1538int printk_delay_msec __read_mostly; 1486int printk_delay_msec __read_mostly;
1539 1487
1540static inline void printk_delay(void) 1488static inline void printk_delay(void)
@@ -1681,7 +1629,6 @@ asmlinkage int vprintk_emit(int facility, int level,
1681 boot_delay_msec(level); 1629 boot_delay_msec(level);
1682 printk_delay(); 1630 printk_delay();
1683 1631
1684 /* This stops the holder of console_sem just where we want him */
1685 local_irq_save(flags); 1632 local_irq_save(flags);
1686 this_cpu = smp_processor_id(); 1633 this_cpu = smp_processor_id();
1687 1634
@@ -1705,6 +1652,7 @@ asmlinkage int vprintk_emit(int facility, int level,
1705 } 1652 }
1706 1653
1707 lockdep_off(); 1654 lockdep_off();
1655 /* This stops the holder of console_sem just where we want him */
1708 raw_spin_lock(&logbuf_lock); 1656 raw_spin_lock(&logbuf_lock);
1709 logbuf_cpu = this_cpu; 1657 logbuf_cpu = this_cpu;
1710 1658
@@ -1821,7 +1769,7 @@ asmlinkage int vprintk_emit(int facility, int level,
1821 * semaphore. The release will print out buffers and wake up 1769 * semaphore. The release will print out buffers and wake up
1822 * /dev/kmsg and syslog() users. 1770 * /dev/kmsg and syslog() users.
1823 */ 1771 */
1824 if (console_trylock_for_printk()) 1772 if (console_trylock())
1825 console_unlock(); 1773 console_unlock();
1826 preempt_enable(); 1774 preempt_enable();
1827 lockdep_on(); 1775 lockdep_on();
@@ -2184,6 +2132,33 @@ int is_console_locked(void)
2184 return console_locked; 2132 return console_locked;
2185} 2133}
2186 2134
2135/*
2136 * Check if we have any console that is capable of printing while cpu is
2137 * booting or shutting down. Requires console_sem.
2138 */
2139static int have_callable_console(void)
2140{
2141 struct console *con;
2142
2143 for_each_console(con)
2144 if (con->flags & CON_ANYTIME)
2145 return 1;
2146
2147 return 0;
2148}
2149
2150/*
2151 * Can we actually use the console at this time on this cpu?
2152 *
2153 * Console drivers may assume that per-cpu resources have been allocated. So
2154 * unless they're explicitly marked as being able to cope (CON_ANYTIME) don't
2155 * call them until this CPU is officially up.
2156 */
2157static inline int can_use_console(void)
2158{
2159 return cpu_online(raw_smp_processor_id()) || have_callable_console();
2160}
2161
2187static void console_cont_flush(char *text, size_t size) 2162static void console_cont_flush(char *text, size_t size)
2188{ 2163{
2189 unsigned long flags; 2164 unsigned long flags;
@@ -2254,9 +2229,21 @@ void console_unlock(void)
2254 do_cond_resched = console_may_schedule; 2229 do_cond_resched = console_may_schedule;
2255 console_may_schedule = 0; 2230 console_may_schedule = 0;
2256 2231
2232again:
2233 /*
2234 * We released the console_sem lock, so we need to recheck if
2235 * cpu is online and (if not) is there at least one CON_ANYTIME
2236 * console.
2237 */
2238 if (!can_use_console()) {
2239 console_locked = 0;
2240 up_console_sem();
2241 return;
2242 }
2243
2257 /* flush buffered message fragment immediately to console */ 2244 /* flush buffered message fragment immediately to console */
2258 console_cont_flush(text, sizeof(text)); 2245 console_cont_flush(text, sizeof(text));
2259again: 2246
2260 for (;;) { 2247 for (;;) {
2261 struct printk_log *msg; 2248 struct printk_log *msg;
2262 size_t ext_len = 0; 2249 size_t ext_len = 0;