aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorVaibhav Nagarnaik <vnagarnaik@google.com>2011-05-03 20:56:42 -0400
committerSteven Rostedt <rostedt@goodmis.org>2011-06-14 22:04:39 -0400
commit7ea5906405a1f3fc1c0033dfd7e02f2cfd1de5e5 (patch)
treef75e962db25ccd3efa81e1fc69ce3e60d228c7a6 /kernel
parente7e2ee89a9dbf48d70a922d5625cd7320a27cbff (diff)
tracing: Use NUMA allocation for per-cpu ring buffer pages
The tracing ring buffer is a group of per-cpu ring buffers where allocation and logging is done on a per-cpu basis. The events that are generated on a particular CPU are logged in the corresponding buffer. This is to provide wait-free writes between CPUs and good NUMA node locality while accessing the ring buffer. However, the allocation routines consider NUMA locality only for buffer page metadata and not for the actual buffer page. This causes the pages to be allocated on the NUMA node local to the CPU where the allocation routine is running at the time. This patch fixes the problem by using a NUMA node specific allocation routine so that the pages are allocated from a NUMA node local to the logging CPU. I tested with the getuid_microbench from autotest. It is a simple binary that calls getuid() in a loop and measures the average time for the syscall to complete. The following command was used to test: $ getuid_microbench 1000000 Compared the numbers found on kernel with and without this patch and found that logging latency decreases by 30-50 ns/call. tracing with non-NUMA allocation - 569 ns/call tracing with NUMA allocation - 512 ns/call Signed-off-by: Vaibhav Nagarnaik <vnagarnaik@google.com> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: Michael Rubin <mrubin@google.com> Cc: David Sharp <dhsharp@google.com> Link: http://lkml.kernel.org/r/1304470602-20366-1-git-send-email-vnagarnaik@google.com Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/trace/ring_buffer.c36
-rw-r--r--kernel/trace/ring_buffer_benchmark.c2
-rw-r--r--kernel/trace/trace.c7
3 files changed, 23 insertions, 22 deletions
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index b0c7aa407943..725153d6cf73 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -997,13 +997,14 @@ static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
997 unsigned nr_pages) 997 unsigned nr_pages)
998{ 998{
999 struct buffer_page *bpage, *tmp; 999 struct buffer_page *bpage, *tmp;
1000 unsigned long addr;
1001 LIST_HEAD(pages); 1000 LIST_HEAD(pages);
1002 unsigned i; 1001 unsigned i;
1003 1002
1004 WARN_ON(!nr_pages); 1003 WARN_ON(!nr_pages);
1005 1004
1006 for (i = 0; i < nr_pages; i++) { 1005 for (i = 0; i < nr_pages; i++) {
1006 struct page *page;
1007
1007 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()), 1008 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
1008 GFP_KERNEL, cpu_to_node(cpu_buffer->cpu)); 1009 GFP_KERNEL, cpu_to_node(cpu_buffer->cpu));
1009 if (!bpage) 1010 if (!bpage)
@@ -1013,10 +1014,11 @@ static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
1013 1014
1014 list_add(&bpage->list, &pages); 1015 list_add(&bpage->list, &pages);
1015 1016
1016 addr = __get_free_page(GFP_KERNEL); 1017 page = alloc_pages_node(cpu_to_node(cpu_buffer->cpu),
1017 if (!addr) 1018 GFP_KERNEL, 0);
1019 if (!page)
1018 goto free_pages; 1020 goto free_pages;
1019 bpage->page = (void *)addr; 1021 bpage->page = page_address(page);
1020 rb_init_page(bpage->page); 1022 rb_init_page(bpage->page);
1021 } 1023 }
1022 1024
@@ -1045,7 +1047,7 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
1045{ 1047{
1046 struct ring_buffer_per_cpu *cpu_buffer; 1048 struct ring_buffer_per_cpu *cpu_buffer;
1047 struct buffer_page *bpage; 1049 struct buffer_page *bpage;
1048 unsigned long addr; 1050 struct page *page;
1049 int ret; 1051 int ret;
1050 1052
1051 cpu_buffer = kzalloc_node(ALIGN(sizeof(*cpu_buffer), cache_line_size()), 1053 cpu_buffer = kzalloc_node(ALIGN(sizeof(*cpu_buffer), cache_line_size()),
@@ -1067,10 +1069,10 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
1067 rb_check_bpage(cpu_buffer, bpage); 1069 rb_check_bpage(cpu_buffer, bpage);
1068 1070
1069 cpu_buffer->reader_page = bpage; 1071 cpu_buffer->reader_page = bpage;
1070 addr = __get_free_page(GFP_KERNEL); 1072 page = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL, 0);
1071 if (!addr) 1073 if (!page)
1072 goto fail_free_reader; 1074 goto fail_free_reader;
1073 bpage->page = (void *)addr; 1075 bpage->page = page_address(page);
1074 rb_init_page(bpage->page); 1076 rb_init_page(bpage->page);
1075 1077
1076 INIT_LIST_HEAD(&cpu_buffer->reader_page->list); 1078 INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
@@ -1314,7 +1316,6 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
1314 unsigned nr_pages, rm_pages, new_pages; 1316 unsigned nr_pages, rm_pages, new_pages;
1315 struct buffer_page *bpage, *tmp; 1317 struct buffer_page *bpage, *tmp;
1316 unsigned long buffer_size; 1318 unsigned long buffer_size;
1317 unsigned long addr;
1318 LIST_HEAD(pages); 1319 LIST_HEAD(pages);
1319 int i, cpu; 1320 int i, cpu;
1320 1321
@@ -1375,16 +1376,17 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
1375 1376
1376 for_each_buffer_cpu(buffer, cpu) { 1377 for_each_buffer_cpu(buffer, cpu) {
1377 for (i = 0; i < new_pages; i++) { 1378 for (i = 0; i < new_pages; i++) {
1379 struct page *page;
1378 bpage = kzalloc_node(ALIGN(sizeof(*bpage), 1380 bpage = kzalloc_node(ALIGN(sizeof(*bpage),
1379 cache_line_size()), 1381 cache_line_size()),
1380 GFP_KERNEL, cpu_to_node(cpu)); 1382 GFP_KERNEL, cpu_to_node(cpu));
1381 if (!bpage) 1383 if (!bpage)
1382 goto free_pages; 1384 goto free_pages;
1383 list_add(&bpage->list, &pages); 1385 list_add(&bpage->list, &pages);
1384 addr = __get_free_page(GFP_KERNEL); 1386 page = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL, 0);
1385 if (!addr) 1387 if (!page)
1386 goto free_pages; 1388 goto free_pages;
1387 bpage->page = (void *)addr; 1389 bpage->page = page_address(page);
1388 rb_init_page(bpage->page); 1390 rb_init_page(bpage->page);
1389 } 1391 }
1390 } 1392 }
@@ -3730,16 +3732,16 @@ EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu);
3730 * Returns: 3732 * Returns:
3731 * The page allocated, or NULL on error. 3733 * The page allocated, or NULL on error.
3732 */ 3734 */
3733void *ring_buffer_alloc_read_page(struct ring_buffer *buffer) 3735void *ring_buffer_alloc_read_page(struct ring_buffer *buffer, int cpu)
3734{ 3736{
3735 struct buffer_data_page *bpage; 3737 struct buffer_data_page *bpage;
3736 unsigned long addr; 3738 struct page *page;
3737 3739
3738 addr = __get_free_page(GFP_KERNEL); 3740 page = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL, 0);
3739 if (!addr) 3741 if (!page)
3740 return NULL; 3742 return NULL;
3741 3743
3742 bpage = (void *)addr; 3744 bpage = page_address(page);
3743 3745
3744 rb_init_page(bpage); 3746 rb_init_page(bpage);
3745 3747
diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c
index 302f8a614635..a5457d577b98 100644
--- a/kernel/trace/ring_buffer_benchmark.c
+++ b/kernel/trace/ring_buffer_benchmark.c
@@ -106,7 +106,7 @@ static enum event_status read_page(int cpu)
106 int inc; 106 int inc;
107 int i; 107 int i;
108 108
109 bpage = ring_buffer_alloc_read_page(buffer); 109 bpage = ring_buffer_alloc_read_page(buffer, cpu);
110 if (!bpage) 110 if (!bpage)
111 return EVENT_DROPPED; 111 return EVENT_DROPPED;
112 112
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 71777c8fe36b..61fda6b6f1ab 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -3697,7 +3697,7 @@ tracing_buffers_read(struct file *filp, char __user *ubuf,
3697 return 0; 3697 return 0;
3698 3698
3699 if (!info->spare) 3699 if (!info->spare)
3700 info->spare = ring_buffer_alloc_read_page(info->tr->buffer); 3700 info->spare = ring_buffer_alloc_read_page(info->tr->buffer, info->cpu);
3701 if (!info->spare) 3701 if (!info->spare)
3702 return -ENOMEM; 3702 return -ENOMEM;
3703 3703
@@ -3854,7 +3854,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
3854 3854
3855 ref->ref = 1; 3855 ref->ref = 1;
3856 ref->buffer = info->tr->buffer; 3856 ref->buffer = info->tr->buffer;
3857 ref->page = ring_buffer_alloc_read_page(ref->buffer); 3857 ref->page = ring_buffer_alloc_read_page(ref->buffer, info->cpu);
3858 if (!ref->page) { 3858 if (!ref->page) {
3859 kfree(ref); 3859 kfree(ref);
3860 break; 3860 break;
@@ -3863,8 +3863,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
3863 r = ring_buffer_read_page(ref->buffer, &ref->page, 3863 r = ring_buffer_read_page(ref->buffer, &ref->page,
3864 len, info->cpu, 1); 3864 len, info->cpu, 1);
3865 if (r < 0) { 3865 if (r < 0) {
3866 ring_buffer_free_read_page(ref->buffer, 3866 ring_buffer_free_read_page(ref->buffer, ref->page);
3867 ref->page);
3868 kfree(ref); 3867 kfree(ref);
3869 break; 3868 break;
3870 } 3869 }