diff options
author | Luis R. Rodriguez <mcgrof@suse.com> | 2014-08-06 19:08:56 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-08-06 21:01:23 -0400 |
commit | 23b2899f7f194f06e09b52a1f46f027a21fae17c (patch) | |
tree | 8aca0009f4f8e55b212adc7d9545dec0097f1840 | |
parent | f54051722e5715d24cd4469606ebdf488b6d5779 (diff) |
printk: allow increasing the ring buffer depending on the number of CPUs
The default size of the ring buffer is too small for machines with a
large amount of CPUs under heavy load. What ends up happening when
debugging is the ring buffer overlaps and chews up old messages making
debugging impossible unless the size is passed as a kernel parameter.
An idle system upon boot up will on average spew out only about one or
two extra lines but where this really matters is on heavy load and that
will vary widely depending on the system and environment.
There are mechanisms to help increase the kernel ring buffer for tracing
through debugfs, and those interfaces even allow growing the kernel ring
buffer per CPU. We also have a static value which can be passed upon
boot. Relying on debugfs however is not ideal for production, and
relying on the value passed upon bootup is can only used *after* an
issue has creeped up. Instead of being reactive this adds a proactive
measure which lets you scale the amount of contributions you'd expect to
the kernel ring buffer under load by each CPU in the worst case
scenario.
We use num_possible_cpus() to avoid complexities which could be
introduced by dynamically changing the ring buffer size at run time,
num_possible_cpus() lets us use the upper limit on possible number of
CPUs therefore avoiding having to deal with hotplugging CPUs on and off.
This introduces the kernel configuration option LOG_CPU_MAX_BUF_SHIFT
which is used to specify the maximum amount of contributions to the
kernel ring buffer in the worst case before the kernel ring buffer flips
over, the size is specified as a power of 2. The total amount of
contributions made by each CPU must be greater than half of the default
kernel ring buffer size (1 << LOG_BUF_SHIFT bytes) in order to trigger
an increase upon bootup. The kernel ring buffer is increased to the
next power of two that would fit the required minimum kernel ring buffer
size plus the additional CPU contribution. For example if LOG_BUF_SHIFT
is 18 (256 KB) you'd require at least 128 KB contributions by other CPUs
in order to trigger an increase of the kernel ring buffer. With a
LOG_CPU_BUF_SHIFT of 12 (4 KB) you'd require at least anything over > 64
possible CPUs to trigger an increase. If you had 128 possible CPUs the
amount of minimum required kernel ring buffer bumps to:
((1 << 18) + ((128 - 1) * (1 << 12))) / 1024 = 764 KB
Since we require the ring buffer to be a power of two the new required
size would be 1024 KB.
This CPU contributions are ignored when the "log_buf_len" kernel
parameter is used as it forces the exact size of the ring buffer to an
expected power of two value.
[pmladek@suse.cz: fix build]
Signed-off-by: Luis R. Rodriguez <mcgrof@suse.com>
Signed-off-by: Petr Mladek <pmladek@suse.cz>
Tested-by: Davidlohr Bueso <davidlohr@hp.com>
Tested-by: Petr Mladek <pmladek@suse.cz>
Reviewed-by: Davidlohr Bueso <davidlohr@hp.com>
Cc: Andrew Lunn <andrew@lunn.ch>
Cc: Stephen Warren <swarren@wwwdotorg.org>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Petr Mladek <pmladek@suse.cz>
Cc: Joe Perches <joe@perches.com>
Cc: Arun KS <arunks.linux@gmail.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Davidlohr Bueso <davidlohr@hp.com>
Cc: Chris Metcalf <cmetcalf@tilera.com>
Cc: Jan Kara <jack@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | Documentation/kernel-parameters.txt | 8 | ||||
-rw-r--r-- | init/Kconfig | 46 | ||||
-rw-r--r-- | kernel/printk/printk.c | 34 |
3 files changed, 82 insertions, 6 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 883901b9ac4f..9344d833b7ea 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt | |||
@@ -1716,8 +1716,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted. | |||
1716 | 7 (KERN_DEBUG) debug-level messages | 1716 | 7 (KERN_DEBUG) debug-level messages |
1717 | 1717 | ||
1718 | log_buf_len=n[KMG] Sets the size of the printk ring buffer, | 1718 | log_buf_len=n[KMG] Sets the size of the printk ring buffer, |
1719 | in bytes. n must be a power of two. The default | 1719 | in bytes. n must be a power of two and greater |
1720 | size is set in the kernel config file. | 1720 | than the minimal size. The minimal size is defined |
1721 | by LOG_BUF_SHIFT kernel config parameter. There is | ||
1722 | also CONFIG_LOG_CPU_MAX_BUF_SHIFT config parameter | ||
1723 | that allows to increase the default size depending on | ||
1724 | the number of CPUs. See init/Kconfig for more details. | ||
1721 | 1725 | ||
1722 | logo.nologo [FB] Disables display of the built-in Linux logo. | 1726 | logo.nologo [FB] Disables display of the built-in Linux logo. |
1723 | This may be used to provide more screen space for | 1727 | This may be used to provide more screen space for |
diff --git a/init/Kconfig b/init/Kconfig index 41066e49e880..a291b7ef4738 100644 --- a/init/Kconfig +++ b/init/Kconfig | |||
@@ -807,15 +807,53 @@ config LOG_BUF_SHIFT | |||
807 | range 12 21 | 807 | range 12 21 |
808 | default 17 | 808 | default 17 |
809 | help | 809 | help |
810 | Select kernel log buffer size as a power of 2. | 810 | Select the minimal kernel log buffer size as a power of 2. |
811 | The final size is affected by LOG_CPU_MAX_BUF_SHIFT config | ||
812 | parameter, see below. Any higher size also might be forced | ||
813 | by "log_buf_len" boot parameter. | ||
814 | |||
811 | Examples: | 815 | Examples: |
812 | 17 => 128 KB | 816 | 17 => 128 KB |
813 | 16 => 64 KB | 817 | 16 => 64 KB |
814 | 15 => 32 KB | 818 | 15 => 32 KB |
815 | 14 => 16 KB | 819 | 14 => 16 KB |
816 | 13 => 8 KB | 820 | 13 => 8 KB |
817 | 12 => 4 KB | 821 | 12 => 4 KB |
818 | 822 | ||
823 | config LOG_CPU_MAX_BUF_SHIFT | ||
824 | int "CPU kernel log buffer size contribution (13 => 8 KB, 17 => 128KB)" | ||
825 | range 0 21 | ||
826 | default 12 if !BASE_SMALL | ||
827 | default 0 if BASE_SMALL | ||
828 | help | ||
829 | This option allows to increase the default ring buffer size | ||
830 | according to the number of CPUs. The value defines the contribution | ||
831 | of each CPU as a power of 2. The used space is typically only few | ||
832 | lines however it might be much more when problems are reported, | ||
833 | e.g. backtraces. | ||
834 | |||
835 | The increased size means that a new buffer has to be allocated and | ||
836 | the original static one is unused. It makes sense only on systems | ||
837 | with more CPUs. Therefore this value is used only when the sum of | ||
838 | contributions is greater than the half of the default kernel ring | ||
839 | buffer as defined by LOG_BUF_SHIFT. The default values are set | ||
840 | so that more than 64 CPUs are needed to trigger the allocation. | ||
841 | |||
842 | Also this option is ignored when "log_buf_len" kernel parameter is | ||
843 | used as it forces an exact (power of two) size of the ring buffer. | ||
844 | |||
845 | The number of possible CPUs is used for this computation ignoring | ||
846 | hotplugging making the compuation optimal for the the worst case | ||
847 | scenerio while allowing a simple algorithm to be used from bootup. | ||
848 | |||
849 | Examples shift values and their meaning: | ||
850 | 17 => 128 KB for each CPU | ||
851 | 16 => 64 KB for each CPU | ||
852 | 15 => 32 KB for each CPU | ||
853 | 14 => 16 KB for each CPU | ||
854 | 13 => 8 KB for each CPU | ||
855 | 12 => 4 KB for each CPU | ||
856 | |||
819 | # | 857 | # |
820 | # Architectures with an unreliable sched_clock() should select this: | 858 | # Architectures with an unreliable sched_clock() should select this: |
821 | # | 859 | # |
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index db290be32984..f855ec36dff9 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c | |||
@@ -266,6 +266,7 @@ static u32 clear_idx; | |||
266 | #define LOG_ALIGN __alignof__(struct printk_log) | 266 | #define LOG_ALIGN __alignof__(struct printk_log) |
267 | #endif | 267 | #endif |
268 | #define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT) | 268 | #define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT) |
269 | #define __LOG_CPU_MAX_BUF_LEN (1 << CONFIG_LOG_CPU_MAX_BUF_SHIFT) | ||
269 | static char __log_buf[__LOG_BUF_LEN] __aligned(LOG_ALIGN); | 270 | static char __log_buf[__LOG_BUF_LEN] __aligned(LOG_ALIGN); |
270 | static char *log_buf = __log_buf; | 271 | static char *log_buf = __log_buf; |
271 | static u32 log_buf_len = __LOG_BUF_LEN; | 272 | static u32 log_buf_len = __LOG_BUF_LEN; |
@@ -848,12 +849,45 @@ static int __init log_buf_len_setup(char *str) | |||
848 | } | 849 | } |
849 | early_param("log_buf_len", log_buf_len_setup); | 850 | early_param("log_buf_len", log_buf_len_setup); |
850 | 851 | ||
852 | static void __init log_buf_add_cpu(void) | ||
853 | { | ||
854 | unsigned int cpu_extra; | ||
855 | |||
856 | /* | ||
857 | * archs should set up cpu_possible_bits properly with | ||
858 | * set_cpu_possible() after setup_arch() but just in | ||
859 | * case lets ensure this is valid. | ||
860 | */ | ||
861 | if (num_possible_cpus() == 1) | ||
862 | return; | ||
863 | |||
864 | cpu_extra = (num_possible_cpus() - 1) * __LOG_CPU_MAX_BUF_LEN; | ||
865 | |||
866 | /* by default this will only continue through for large > 64 CPUs */ | ||
867 | if (cpu_extra <= __LOG_BUF_LEN / 2) | ||
868 | return; | ||
869 | |||
870 | pr_info("log_buf_len individual max cpu contribution: %d bytes\n", | ||
871 | __LOG_CPU_MAX_BUF_LEN); | ||
872 | pr_info("log_buf_len total cpu_extra contributions: %d bytes\n", | ||
873 | cpu_extra); | ||
874 | pr_info("log_buf_len min size: %d bytes\n", __LOG_BUF_LEN); | ||
875 | |||
876 | log_buf_len_update(cpu_extra + __LOG_BUF_LEN); | ||
877 | } | ||
878 | |||
851 | void __init setup_log_buf(int early) | 879 | void __init setup_log_buf(int early) |
852 | { | 880 | { |
853 | unsigned long flags; | 881 | unsigned long flags; |
854 | char *new_log_buf; | 882 | char *new_log_buf; |
855 | int free; | 883 | int free; |
856 | 884 | ||
885 | if (log_buf != __log_buf) | ||
886 | return; | ||
887 | |||
888 | if (!early && !new_log_buf_len) | ||
889 | log_buf_add_cpu(); | ||
890 | |||
857 | if (!new_log_buf_len) | 891 | if (!new_log_buf_len) |
858 | return; | 892 | return; |
859 | 893 | ||