diff options
author | Vaibhav Nagarnaik <vnagarnaik@google.com> | 2011-05-03 20:56:42 -0400 |
---|---|---|
committer | Steven Rostedt <rostedt@goodmis.org> | 2011-06-14 22:04:39 -0400 |
commit | 7ea5906405a1f3fc1c0033dfd7e02f2cfd1de5e5 (patch) | |
tree | f75e962db25ccd3efa81e1fc69ce3e60d228c7a6 /kernel | |
parent | e7e2ee89a9dbf48d70a922d5625cd7320a27cbff (diff) |
tracing: Use NUMA allocation for per-cpu ring buffer pages
The tracing ring buffer is a group of per-cpu ring buffers where
allocation and logging is done on a per-cpu basis. The events that are
generated on a particular CPU are logged in the corresponding buffer.
This is to provide wait-free writes between CPUs and good NUMA node
locality while accessing the ring buffer.
However, the allocation routines consider NUMA locality only for buffer
page metadata and not for the actual buffer page. This causes the pages
to be allocated on the NUMA node local to the CPU where the allocation
routine is running at the time.
This patch fixes the problem by using a NUMA node specific allocation
routine so that the pages are allocated from a NUMA node local to the
logging CPU.
I tested with the getuid_microbench from autotest. It is a simple binary
that calls getuid() in a loop and measures the average time for the
syscall to complete. The following command was used to test:
$ getuid_microbench 1000000
Compared the numbers found on kernel with and without this patch and
found that logging latency decreases by 30-50 ns/call.
tracing with non-NUMA allocation - 569 ns/call
tracing with NUMA allocation - 512 ns/call
Signed-off-by: Vaibhav Nagarnaik <vnagarnaik@google.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Michael Rubin <mrubin@google.com>
Cc: David Sharp <dhsharp@google.com>
Link: http://lkml.kernel.org/r/1304470602-20366-1-git-send-email-vnagarnaik@google.com
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/trace/ring_buffer.c | 36 | ||||
-rw-r--r-- | kernel/trace/ring_buffer_benchmark.c | 2 | ||||
-rw-r--r-- | kernel/trace/trace.c | 7 |
3 files changed, 23 insertions, 22 deletions
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index b0c7aa407943..725153d6cf73 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c | |||
@@ -997,13 +997,14 @@ static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer, | |||
997 | unsigned nr_pages) | 997 | unsigned nr_pages) |
998 | { | 998 | { |
999 | struct buffer_page *bpage, *tmp; | 999 | struct buffer_page *bpage, *tmp; |
1000 | unsigned long addr; | ||
1001 | LIST_HEAD(pages); | 1000 | LIST_HEAD(pages); |
1002 | unsigned i; | 1001 | unsigned i; |
1003 | 1002 | ||
1004 | WARN_ON(!nr_pages); | 1003 | WARN_ON(!nr_pages); |
1005 | 1004 | ||
1006 | for (i = 0; i < nr_pages; i++) { | 1005 | for (i = 0; i < nr_pages; i++) { |
1006 | struct page *page; | ||
1007 | |||
1007 | bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()), | 1008 | bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()), |
1008 | GFP_KERNEL, cpu_to_node(cpu_buffer->cpu)); | 1009 | GFP_KERNEL, cpu_to_node(cpu_buffer->cpu)); |
1009 | if (!bpage) | 1010 | if (!bpage) |
@@ -1013,10 +1014,11 @@ static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer, | |||
1013 | 1014 | ||
1014 | list_add(&bpage->list, &pages); | 1015 | list_add(&bpage->list, &pages); |
1015 | 1016 | ||
1016 | addr = __get_free_page(GFP_KERNEL); | 1017 | page = alloc_pages_node(cpu_to_node(cpu_buffer->cpu), |
1017 | if (!addr) | 1018 | GFP_KERNEL, 0); |
1019 | if (!page) | ||
1018 | goto free_pages; | 1020 | goto free_pages; |
1019 | bpage->page = (void *)addr; | 1021 | bpage->page = page_address(page); |
1020 | rb_init_page(bpage->page); | 1022 | rb_init_page(bpage->page); |
1021 | } | 1023 | } |
1022 | 1024 | ||
@@ -1045,7 +1047,7 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu) | |||
1045 | { | 1047 | { |
1046 | struct ring_buffer_per_cpu *cpu_buffer; | 1048 | struct ring_buffer_per_cpu *cpu_buffer; |
1047 | struct buffer_page *bpage; | 1049 | struct buffer_page *bpage; |
1048 | unsigned long addr; | 1050 | struct page *page; |
1049 | int ret; | 1051 | int ret; |
1050 | 1052 | ||
1051 | cpu_buffer = kzalloc_node(ALIGN(sizeof(*cpu_buffer), cache_line_size()), | 1053 | cpu_buffer = kzalloc_node(ALIGN(sizeof(*cpu_buffer), cache_line_size()), |
@@ -1067,10 +1069,10 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu) | |||
1067 | rb_check_bpage(cpu_buffer, bpage); | 1069 | rb_check_bpage(cpu_buffer, bpage); |
1068 | 1070 | ||
1069 | cpu_buffer->reader_page = bpage; | 1071 | cpu_buffer->reader_page = bpage; |
1070 | addr = __get_free_page(GFP_KERNEL); | 1072 | page = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL, 0); |
1071 | if (!addr) | 1073 | if (!page) |
1072 | goto fail_free_reader; | 1074 | goto fail_free_reader; |
1073 | bpage->page = (void *)addr; | 1075 | bpage->page = page_address(page); |
1074 | rb_init_page(bpage->page); | 1076 | rb_init_page(bpage->page); |
1075 | 1077 | ||
1076 | INIT_LIST_HEAD(&cpu_buffer->reader_page->list); | 1078 | INIT_LIST_HEAD(&cpu_buffer->reader_page->list); |
@@ -1314,7 +1316,6 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size) | |||
1314 | unsigned nr_pages, rm_pages, new_pages; | 1316 | unsigned nr_pages, rm_pages, new_pages; |
1315 | struct buffer_page *bpage, *tmp; | 1317 | struct buffer_page *bpage, *tmp; |
1316 | unsigned long buffer_size; | 1318 | unsigned long buffer_size; |
1317 | unsigned long addr; | ||
1318 | LIST_HEAD(pages); | 1319 | LIST_HEAD(pages); |
1319 | int i, cpu; | 1320 | int i, cpu; |
1320 | 1321 | ||
@@ -1375,16 +1376,17 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size) | |||
1375 | 1376 | ||
1376 | for_each_buffer_cpu(buffer, cpu) { | 1377 | for_each_buffer_cpu(buffer, cpu) { |
1377 | for (i = 0; i < new_pages; i++) { | 1378 | for (i = 0; i < new_pages; i++) { |
1379 | struct page *page; | ||
1378 | bpage = kzalloc_node(ALIGN(sizeof(*bpage), | 1380 | bpage = kzalloc_node(ALIGN(sizeof(*bpage), |
1379 | cache_line_size()), | 1381 | cache_line_size()), |
1380 | GFP_KERNEL, cpu_to_node(cpu)); | 1382 | GFP_KERNEL, cpu_to_node(cpu)); |
1381 | if (!bpage) | 1383 | if (!bpage) |
1382 | goto free_pages; | 1384 | goto free_pages; |
1383 | list_add(&bpage->list, &pages); | 1385 | list_add(&bpage->list, &pages); |
1384 | addr = __get_free_page(GFP_KERNEL); | 1386 | page = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL, 0); |
1385 | if (!addr) | 1387 | if (!page) |
1386 | goto free_pages; | 1388 | goto free_pages; |
1387 | bpage->page = (void *)addr; | 1389 | bpage->page = page_address(page); |
1388 | rb_init_page(bpage->page); | 1390 | rb_init_page(bpage->page); |
1389 | } | 1391 | } |
1390 | } | 1392 | } |
@@ -3730,16 +3732,16 @@ EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu); | |||
3730 | * Returns: | 3732 | * Returns: |
3731 | * The page allocated, or NULL on error. | 3733 | * The page allocated, or NULL on error. |
3732 | */ | 3734 | */ |
3733 | void *ring_buffer_alloc_read_page(struct ring_buffer *buffer) | 3735 | void *ring_buffer_alloc_read_page(struct ring_buffer *buffer, int cpu) |
3734 | { | 3736 | { |
3735 | struct buffer_data_page *bpage; | 3737 | struct buffer_data_page *bpage; |
3736 | unsigned long addr; | 3738 | struct page *page; |
3737 | 3739 | ||
3738 | addr = __get_free_page(GFP_KERNEL); | 3740 | page = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL, 0); |
3739 | if (!addr) | 3741 | if (!page) |
3740 | return NULL; | 3742 | return NULL; |
3741 | 3743 | ||
3742 | bpage = (void *)addr; | 3744 | bpage = page_address(page); |
3743 | 3745 | ||
3744 | rb_init_page(bpage); | 3746 | rb_init_page(bpage); |
3745 | 3747 | ||
diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c index 302f8a614635..a5457d577b98 100644 --- a/kernel/trace/ring_buffer_benchmark.c +++ b/kernel/trace/ring_buffer_benchmark.c | |||
@@ -106,7 +106,7 @@ static enum event_status read_page(int cpu) | |||
106 | int inc; | 106 | int inc; |
107 | int i; | 107 | int i; |
108 | 108 | ||
109 | bpage = ring_buffer_alloc_read_page(buffer); | 109 | bpage = ring_buffer_alloc_read_page(buffer, cpu); |
110 | if (!bpage) | 110 | if (!bpage) |
111 | return EVENT_DROPPED; | 111 | return EVENT_DROPPED; |
112 | 112 | ||
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 71777c8fe36b..61fda6b6f1ab 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c | |||
@@ -3697,7 +3697,7 @@ tracing_buffers_read(struct file *filp, char __user *ubuf, | |||
3697 | return 0; | 3697 | return 0; |
3698 | 3698 | ||
3699 | if (!info->spare) | 3699 | if (!info->spare) |
3700 | info->spare = ring_buffer_alloc_read_page(info->tr->buffer); | 3700 | info->spare = ring_buffer_alloc_read_page(info->tr->buffer, info->cpu); |
3701 | if (!info->spare) | 3701 | if (!info->spare) |
3702 | return -ENOMEM; | 3702 | return -ENOMEM; |
3703 | 3703 | ||
@@ -3854,7 +3854,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, | |||
3854 | 3854 | ||
3855 | ref->ref = 1; | 3855 | ref->ref = 1; |
3856 | ref->buffer = info->tr->buffer; | 3856 | ref->buffer = info->tr->buffer; |
3857 | ref->page = ring_buffer_alloc_read_page(ref->buffer); | 3857 | ref->page = ring_buffer_alloc_read_page(ref->buffer, info->cpu); |
3858 | if (!ref->page) { | 3858 | if (!ref->page) { |
3859 | kfree(ref); | 3859 | kfree(ref); |
3860 | break; | 3860 | break; |
@@ -3863,8 +3863,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, | |||
3863 | r = ring_buffer_read_page(ref->buffer, &ref->page, | 3863 | r = ring_buffer_read_page(ref->buffer, &ref->page, |
3864 | len, info->cpu, 1); | 3864 | len, info->cpu, 1); |
3865 | if (r < 0) { | 3865 | if (r < 0) { |
3866 | ring_buffer_free_read_page(ref->buffer, | 3866 | ring_buffer_free_read_page(ref->buffer, ref->page); |
3867 | ref->page); | ||
3868 | kfree(ref); | 3867 | kfree(ref); |
3869 | break; | 3868 | break; |
3870 | } | 3869 | } |