aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSteven Rostedt <srostedt@redhat.com>2008-05-12 15:20:43 -0400
committerThomas Gleixner <tglx@linutronix.de>2008-05-23 14:38:51 -0400
commit4c11d7aed389375253b59e2b1865eec96663c65d (patch)
treeb43b5e38c33d69d39b5fb87860e5723d1be10416
parent5072c59fd45e9976d02ee6f18c7336ef97623cbc (diff)
ftrace: convert single large buffer into single pages.
Allocating large buffers for the tracer may fail easily. This patch converts the buffer from a large ordered allocation to single pages. It uses the struct page LRU field to link the pages together. Later patches may also implement dynamic increasing and decreasing of the trace buffers. Signed-off-by: Steven Rostedt <srostedt@redhat.com> Signed-off-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-rw-r--r--kernel/trace/trace.c245
-rw-r--r--kernel/trace/trace.h8
2 files changed, 195 insertions, 58 deletions
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 1b8eca7650d4..d7ad030a4c49 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -15,6 +15,7 @@
15#include <linux/kallsyms.h> 15#include <linux/kallsyms.h>
16#include <linux/seq_file.h> 16#include <linux/seq_file.h>
17#include <linux/debugfs.h> 17#include <linux/debugfs.h>
18#include <linux/pagemap.h>
18#include <linux/hardirq.h> 19#include <linux/hardirq.h>
19#include <linux/linkage.h> 20#include <linux/linkage.h>
20#include <linux/uaccess.h> 21#include <linux/uaccess.h>
@@ -49,7 +50,7 @@ static struct trace_array max_tr;
49static DEFINE_PER_CPU(struct trace_array_cpu, max_data); 50static DEFINE_PER_CPU(struct trace_array_cpu, max_data);
50 51
51static int tracer_enabled; 52static int tracer_enabled;
52static unsigned long trace_nr_entries = 4096UL; 53static unsigned long trace_nr_entries = 16384UL;
53 54
54static struct tracer *trace_types __read_mostly; 55static struct tracer *trace_types __read_mostly;
55static struct tracer *current_trace __read_mostly; 56static struct tracer *current_trace __read_mostly;
@@ -57,6 +58,8 @@ static int max_tracer_type_len;
57 58
58static DEFINE_MUTEX(trace_types_lock); 59static DEFINE_MUTEX(trace_types_lock);
59 60
61#define ENTRIES_PER_PAGE (PAGE_SIZE / sizeof(struct trace_entry))
62
60static int __init set_nr_entries(char *str) 63static int __init set_nr_entries(char *str)
61{ 64{
62 if (!str) 65 if (!str)
@@ -103,6 +106,7 @@ static const char *trace_options[] = {
103 106
104static unsigned trace_flags; 107static unsigned trace_flags;
105 108
109static DEFINE_SPINLOCK(ftrace_max_lock);
106 110
107/* 111/*
108 * Copy the new maximum trace into the separate maximum-trace 112 * Copy the new maximum trace into the separate maximum-trace
@@ -136,17 +140,23 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
136{ 140{
137 struct trace_array_cpu *data; 141 struct trace_array_cpu *data;
138 void *save_trace; 142 void *save_trace;
143 struct list_head save_pages;
139 int i; 144 int i;
140 145
146 WARN_ON_ONCE(!irqs_disabled());
147 spin_lock(&ftrace_max_lock);
141 /* clear out all the previous traces */ 148 /* clear out all the previous traces */
142 for_each_possible_cpu(i) { 149 for_each_possible_cpu(i) {
143 data = tr->data[i]; 150 data = tr->data[i];
144 save_trace = max_tr.data[i]->trace; 151 save_trace = max_tr.data[i]->trace;
152 save_pages = max_tr.data[i]->trace_pages;
145 memcpy(max_tr.data[i], data, sizeof(*data)); 153 memcpy(max_tr.data[i], data, sizeof(*data));
146 data->trace = save_trace; 154 data->trace = save_trace;
155 data->trace_pages = save_pages;
147 } 156 }
148 157
149 __update_max_tr(tr, tsk, cpu); 158 __update_max_tr(tr, tsk, cpu);
159 spin_unlock(&ftrace_max_lock);
150} 160}
151 161
152/** 162/**
@@ -160,16 +170,22 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
160{ 170{
161 struct trace_array_cpu *data = tr->data[cpu]; 171 struct trace_array_cpu *data = tr->data[cpu];
162 void *save_trace; 172 void *save_trace;
173 struct list_head save_pages;
163 int i; 174 int i;
164 175
176 WARN_ON_ONCE(!irqs_disabled());
177 spin_lock(&ftrace_max_lock);
165 for_each_possible_cpu(i) 178 for_each_possible_cpu(i)
166 tracing_reset(max_tr.data[i]); 179 tracing_reset(max_tr.data[i]);
167 180
168 save_trace = max_tr.data[cpu]->trace; 181 save_trace = max_tr.data[cpu]->trace;
182 save_pages = max_tr.data[cpu]->trace_pages;
169 memcpy(max_tr.data[cpu], data, sizeof(*data)); 183 memcpy(max_tr.data[cpu], data, sizeof(*data));
170 data->trace = save_trace; 184 data->trace = save_trace;
185 data->trace_pages = save_pages;
171 186
172 __update_max_tr(tr, tsk, cpu); 187 __update_max_tr(tr, tsk, cpu);
188 spin_unlock(&ftrace_max_lock);
173} 189}
174 190
175int register_tracer(struct tracer *type) 191int register_tracer(struct tracer *type)
@@ -236,7 +252,8 @@ void unregister_tracer(struct tracer *type)
236void notrace tracing_reset(struct trace_array_cpu *data) 252void notrace tracing_reset(struct trace_array_cpu *data)
237{ 253{
238 data->trace_idx = 0; 254 data->trace_idx = 0;
239 atomic_set(&data->underrun, 0); 255 data->trace_current = data->trace;
256 data->trace_current_idx = 0;
240} 257}
241 258
242#ifdef CONFIG_FTRACE 259#ifdef CONFIG_FTRACE
@@ -367,21 +384,27 @@ tracing_get_trace_entry(struct trace_array *tr,
367{ 384{
368 unsigned long idx, idx_next; 385 unsigned long idx, idx_next;
369 struct trace_entry *entry; 386 struct trace_entry *entry;
387 struct page *page;
388 struct list_head *next;
370 389
371 idx = data->trace_idx; 390 data->trace_idx++;
391 idx = data->trace_current_idx;
372 idx_next = idx + 1; 392 idx_next = idx + 1;
373 393
374 if (unlikely(idx_next >= tr->entries)) { 394 entry = data->trace_current + idx * TRACE_ENTRY_SIZE;
375 atomic_inc(&data->underrun); 395
396 if (unlikely(idx_next >= ENTRIES_PER_PAGE)) {
397 page = virt_to_page(data->trace_current);
398 if (unlikely(&page->lru == data->trace_pages.prev))
399 next = data->trace_pages.next;
400 else
401 next = page->lru.next;
402 page = list_entry(next, struct page, lru);
403 data->trace_current = page_address(page);
376 idx_next = 0; 404 idx_next = 0;
377 } 405 }
378 406
379 data->trace_idx = idx_next; 407 data->trace_current_idx = idx_next;
380
381 if (unlikely(idx_next != 0 && atomic_read(&data->underrun)))
382 atomic_inc(&data->underrun);
383
384 entry = data->trace + idx * TRACE_ENTRY_SIZE;
385 408
386 return entry; 409 return entry;
387} 410}
@@ -442,21 +465,38 @@ enum trace_file_type {
442}; 465};
443 466
444static struct trace_entry * 467static struct trace_entry *
445trace_entry_idx(struct trace_array *tr, unsigned long idx, int cpu) 468trace_entry_idx(struct trace_array *tr, struct trace_array_cpu *data,
469 struct trace_iterator *iter, int cpu)
446{ 470{
447 struct trace_entry *array = tr->data[cpu]->trace; 471 struct page *page;
448 unsigned long underrun; 472 struct trace_entry *array;
449 473
450 if (idx >= tr->entries) 474 if (iter->next_idx[cpu] >= tr->entries ||
475 iter->next_idx[cpu] >= data->trace_idx)
451 return NULL; 476 return NULL;
452 477
453 underrun = atomic_read(&tr->data[cpu]->underrun); 478 if (!iter->next_page[cpu]) {
454 if (underrun) 479 /*
455 idx = ((underrun - 1) + idx) % tr->entries; 480 * Initialize. If the count of elements in
456 else if (idx >= tr->data[cpu]->trace_idx) 481 * this buffer is greater than the max entries
457 return NULL; 482 * we had an underrun. Which means we looped around.
483 * We can simply use the current pointer as our
484 * starting point.
485 */
486 if (data->trace_idx >= tr->entries) {
487 page = virt_to_page(data->trace_current);
488 iter->next_page[cpu] = &page->lru;
489 iter->next_page_idx[cpu] = data->trace_current_idx;
490 } else {
491 iter->next_page[cpu] = data->trace_pages.next;
492 iter->next_page_idx[cpu] = 0;
493 }
494 }
458 495
459 return &array[idx]; 496 page = list_entry(iter->next_page[cpu], struct page, lru);
497 array = page_address(page);
498
499 return &array[iter->next_page_idx[cpu]];
460} 500}
461 501
462static struct notrace trace_entry * 502static struct notrace trace_entry *
@@ -470,7 +510,7 @@ find_next_entry(struct trace_iterator *iter, int *ent_cpu)
470 for_each_possible_cpu(cpu) { 510 for_each_possible_cpu(cpu) {
471 if (!tr->data[cpu]->trace) 511 if (!tr->data[cpu]->trace)
472 continue; 512 continue;
473 ent = trace_entry_idx(tr, iter->next_idx[cpu], cpu); 513 ent = trace_entry_idx(tr, tr->data[cpu], iter, cpu);
474 if (ent && 514 if (ent &&
475 (!next || (long)(next->idx - ent->idx) > 0)) { 515 (!next || (long)(next->idx - ent->idx) > 0)) {
476 next = ent; 516 next = ent;
@@ -492,8 +532,19 @@ static void *find_next_entry_inc(struct trace_iterator *iter)
492 next = find_next_entry(iter, &next_cpu); 532 next = find_next_entry(iter, &next_cpu);
493 533
494 if (next) { 534 if (next) {
495 iter->next_idx[next_cpu]++;
496 iter->idx++; 535 iter->idx++;
536 iter->next_idx[next_cpu]++;
537 iter->next_page_idx[next_cpu]++;
538 if (iter->next_page_idx[next_cpu] >= ENTRIES_PER_PAGE) {
539 struct trace_array_cpu *data = iter->tr->data[next_cpu];
540
541 iter->next_page_idx[next_cpu] = 0;
542 iter->next_page[next_cpu] =
543 iter->next_page[next_cpu]->next;
544 if (iter->next_page[next_cpu] == &data->trace_pages)
545 iter->next_page[next_cpu] =
546 data->trace_pages.next;
547 }
497 } 548 }
498 iter->ent = next; 549 iter->ent = next;
499 iter->cpu = next_cpu; 550 iter->cpu = next_cpu;
@@ -554,14 +605,16 @@ static void *s_start(struct seq_file *m, loff_t *pos)
554 iter->cpu = 0; 605 iter->cpu = 0;
555 iter->idx = -1; 606 iter->idx = -1;
556 607
557 for (i = 0; i < NR_CPUS; i++) 608 for_each_possible_cpu(i) {
558 iter->next_idx[i] = 0; 609 iter->next_idx[i] = 0;
610 iter->next_page[i] = NULL;
611 }
559 612
560 for (p = iter; p && l < *pos; p = s_next(m, p, &l)) 613 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
561 ; 614 ;
562 615
563 } else { 616 } else {
564 l = *pos; 617 l = *pos - 1;
565 p = s_next(m, p, &l); 618 p = s_next(m, p, &l);
566 } 619 }
567 620
@@ -654,9 +707,8 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter)
654 struct trace_array *tr = iter->tr; 707 struct trace_array *tr = iter->tr;
655 struct trace_array_cpu *data = tr->data[tr->cpu]; 708 struct trace_array_cpu *data = tr->data[tr->cpu];
656 struct tracer *type = current_trace; 709 struct tracer *type = current_trace;
657 unsigned long underruns = 0; 710 unsigned long total = 0;
658 unsigned long underrun; 711 unsigned long entries = 0;
659 unsigned long entries = 0;
660 int cpu; 712 int cpu;
661 const char *name = "preemption"; 713 const char *name = "preemption";
662 714
@@ -665,11 +717,10 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter)
665 717
666 for_each_possible_cpu(cpu) { 718 for_each_possible_cpu(cpu) {
667 if (tr->data[cpu]->trace) { 719 if (tr->data[cpu]->trace) {
668 underrun = atomic_read(&tr->data[cpu]->underrun); 720 total += tr->data[cpu]->trace_idx;
669 if (underrun) { 721 if (tr->data[cpu]->trace_idx > tr->entries)
670 underruns += underrun;
671 entries += tr->entries; 722 entries += tr->entries;
672 } else 723 else
673 entries += tr->data[cpu]->trace_idx; 724 entries += tr->data[cpu]->trace_idx;
674 } 725 }
675 } 726 }
@@ -682,7 +733,7 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter)
682 " (M:%s VP:%d, KP:%d, SP:%d HP:%d", 733 " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
683 data->saved_latency, 734 data->saved_latency,
684 entries, 735 entries,
685 (entries + underruns), 736 total,
686 tr->cpu, 737 tr->cpu,
687#if defined(CONFIG_PREEMPT_NONE) 738#if defined(CONFIG_PREEMPT_NONE)
688 "server", 739 "server",
@@ -882,8 +933,7 @@ static int trace_empty(struct trace_iterator *iter)
882 data = iter->tr->data[cpu]; 933 data = iter->tr->data[cpu];
883 934
884 if (data->trace && 935 if (data->trace &&
885 (data->trace_idx || 936 data->trace_idx)
886 atomic_read(&data->underrun)))
887 return 0; 937 return 0;
888 } 938 }
889 return 1; 939 return 1;
@@ -1464,42 +1514,109 @@ static struct tracer no_tracer __read_mostly =
1464 .name = "none", 1514 .name = "none",
1465}; 1515};
1466 1516
1467static inline notrace int page_order(const unsigned long size) 1517static int trace_alloc_page(void)
1468{ 1518{
1469 const unsigned long nr_pages = DIV_ROUND_UP(size, PAGE_SIZE); 1519 struct trace_array_cpu *data;
1470 return ilog2(roundup_pow_of_two(nr_pages)); 1520 void *array;
1521 struct page *page, *tmp;
1522 LIST_HEAD(pages);
1523 int i;
1524
1525 /* first allocate a page for each CPU */
1526 for_each_possible_cpu(i) {
1527 array = (void *)__get_free_page(GFP_KERNEL);
1528 if (array == NULL) {
1529 printk(KERN_ERR "tracer: failed to allocate page"
1530 "for trace buffer!\n");
1531 goto free_pages;
1532 }
1533
1534 page = virt_to_page(array);
1535 list_add(&page->lru, &pages);
1536
1537/* Only allocate if we are actually using the max trace */
1538#ifdef CONFIG_TRACER_MAX_TRACE
1539 array = (void *)__get_free_page(GFP_KERNEL);
1540 if (array == NULL) {
1541 printk(KERN_ERR "tracer: failed to allocate page"
1542 "for trace buffer!\n");
1543 goto free_pages;
1544 }
1545 page = virt_to_page(array);
1546 list_add(&page->lru, &pages);
1547#endif
1548 }
1549
1550 /* Now that we successfully allocate a page per CPU, add them */
1551 for_each_possible_cpu(i) {
1552 data = global_trace.data[i];
1553 page = list_entry(pages.next, struct page, lru);
1554 list_del(&page->lru);
1555 list_add_tail(&page->lru, &data->trace_pages);
1556 ClearPageLRU(page);
1557
1558#ifdef CONFIG_TRACER_MAX_TRACE
1559 data = max_tr.data[i];
1560 page = list_entry(pages.next, struct page, lru);
1561 list_del(&page->lru);
1562 list_add_tail(&page->lru, &data->trace_pages);
1563 SetPageLRU(page);
1564#endif
1565 }
1566 global_trace.entries += ENTRIES_PER_PAGE;
1567
1568 return 0;
1569
1570 free_pages:
1571 list_for_each_entry_safe(page, tmp, &pages, lru) {
1572 list_del(&page->lru);
1573 __free_page(page);
1574 }
1575 return -ENOMEM;
1471} 1576}
1472 1577
1473__init static int tracer_alloc_buffers(void) 1578__init static int tracer_alloc_buffers(void)
1474{ 1579{
1475 const int order = page_order(trace_nr_entries * TRACE_ENTRY_SIZE); 1580 struct trace_array_cpu *data;
1476 const unsigned long size = (1UL << order) << PAGE_SHIFT; 1581 void *array;
1477 struct trace_entry *array; 1582 struct page *page;
1583 int pages = 0;
1478 int i; 1584 int i;
1479 1585
1586 /* Allocate the first page for all buffers */
1480 for_each_possible_cpu(i) { 1587 for_each_possible_cpu(i) {
1481 global_trace.data[i] = &per_cpu(global_trace_cpu, i); 1588 data = global_trace.data[i] = &per_cpu(global_trace_cpu, i);
1482 max_tr.data[i] = &per_cpu(max_data, i); 1589 max_tr.data[i] = &per_cpu(max_data, i);
1483 1590
1484 array = (struct trace_entry *) 1591 array = (void *)__get_free_page(GFP_KERNEL);
1485 __get_free_pages(GFP_KERNEL, order);
1486 if (array == NULL) { 1592 if (array == NULL) {
1487 printk(KERN_ERR "tracer: failed to allocate" 1593 printk(KERN_ERR "tracer: failed to allocate page"
1488 " %ld bytes for trace buffer!\n", size); 1594 "for trace buffer!\n");
1489 goto free_buffers; 1595 goto free_buffers;
1490 } 1596 }
1491 global_trace.data[i]->trace = array; 1597 data->trace = array;
1598
1599 /* set the array to the list */
1600 INIT_LIST_HEAD(&data->trace_pages);
1601 page = virt_to_page(array);
1602 list_add(&page->lru, &data->trace_pages);
1603 /* use the LRU flag to differentiate the two buffers */
1604 ClearPageLRU(page);
1492 1605
1493/* Only allocate if we are actually using the max trace */ 1606/* Only allocate if we are actually using the max trace */
1494#ifdef CONFIG_TRACER_MAX_TRACE 1607#ifdef CONFIG_TRACER_MAX_TRACE
1495 array = (struct trace_entry *) 1608 array = (void *)__get_free_page(GFP_KERNEL);
1496 __get_free_pages(GFP_KERNEL, order);
1497 if (array == NULL) { 1609 if (array == NULL) {
1498 printk(KERN_ERR "wakeup tracer: failed to allocate" 1610 printk(KERN_ERR "tracer: failed to allocate page"
1499 " %ld bytes for trace buffer!\n", size); 1611 "for trace buffer!\n");
1500 goto free_buffers; 1612 goto free_buffers;
1501 } 1613 }
1502 max_tr.data[i]->trace = array; 1614 max_tr.data[i]->trace = array;
1615
1616 INIT_LIST_HEAD(&max_tr.data[i]->trace_pages);
1617 page = virt_to_page(array);
1618 list_add(&page->lru, &max_tr.data[i]->trace_pages);
1619 SetPageLRU(page);
1503#endif 1620#endif
1504 } 1621 }
1505 1622
@@ -1507,11 +1624,18 @@ __init static int tracer_alloc_buffers(void)
1507 * Since we allocate by orders of pages, we may be able to 1624 * Since we allocate by orders of pages, we may be able to
1508 * round up a bit. 1625 * round up a bit.
1509 */ 1626 */
1510 global_trace.entries = size / TRACE_ENTRY_SIZE; 1627 global_trace.entries = ENTRIES_PER_PAGE;
1511 max_tr.entries = global_trace.entries; 1628 max_tr.entries = global_trace.entries;
1629 pages++;
1630
1631 while (global_trace.entries < trace_nr_entries) {
1632 if (trace_alloc_page())
1633 break;
1634 pages++;
1635 }
1512 1636
1513 pr_info("tracer: %ld bytes allocated for %ld", 1637 pr_info("tracer: %d pages allocated for %ld",
1514 size, trace_nr_entries); 1638 pages, trace_nr_entries);
1515 pr_info(" entries of %ld bytes\n", (long)TRACE_ENTRY_SIZE); 1639 pr_info(" entries of %ld bytes\n", (long)TRACE_ENTRY_SIZE);
1516 pr_info(" actual entries %ld\n", global_trace.entries); 1640 pr_info(" actual entries %ld\n", global_trace.entries);
1517 1641
@@ -1526,17 +1650,26 @@ __init static int tracer_alloc_buffers(void)
1526 1650
1527 free_buffers: 1651 free_buffers:
1528 for (i-- ; i >= 0; i--) { 1652 for (i-- ; i >= 0; i--) {
1653 struct page *page, *tmp;
1529 struct trace_array_cpu *data = global_trace.data[i]; 1654 struct trace_array_cpu *data = global_trace.data[i];
1530 1655
1531 if (data && data->trace) { 1656 if (data && data->trace) {
1532 free_pages((unsigned long)data->trace, order); 1657 list_for_each_entry_safe(page, tmp,
1658 &data->trace_pages, lru) {
1659 list_del(&page->lru);
1660 __free_page(page);
1661 }
1533 data->trace = NULL; 1662 data->trace = NULL;
1534 } 1663 }
1535 1664
1536#ifdef CONFIG_TRACER_MAX_TRACE 1665#ifdef CONFIG_TRACER_MAX_TRACE
1537 data = max_tr.data[i]; 1666 data = max_tr.data[i];
1538 if (data && data->trace) { 1667 if (data && data->trace) {
1539 free_pages((unsigned long)data->trace, order); 1668 list_for_each_entry_safe(page, tmp,
1669 &data->trace_pages, lru) {
1670 list_del(&page->lru);
1671 __free_page(page);
1672 }
1540 data->trace = NULL; 1673 data->trace = NULL;
1541 } 1674 }
1542#endif 1675#endif
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 3173a93561d4..83e257e38084 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -54,9 +54,11 @@ struct trace_entry {
54 */ 54 */
55struct trace_array_cpu { 55struct trace_array_cpu {
56 void *trace; 56 void *trace;
57 void *trace_current;
58 unsigned trace_current_idx;
59 struct list_head trace_pages;
57 unsigned long trace_idx; 60 unsigned long trace_idx;
58 atomic_t disabled; 61 atomic_t disabled;
59 atomic_t underrun;
60 unsigned long saved_latency; 62 unsigned long saved_latency;
61 unsigned long critical_start; 63 unsigned long critical_start;
62 unsigned long critical_end; 64 unsigned long critical_end;
@@ -112,8 +114,10 @@ struct trace_iterator {
112 unsigned long iter_flags; 114 unsigned long iter_flags;
113 loff_t pos; 115 loff_t pos;
114 unsigned long next_idx[NR_CPUS]; 116 unsigned long next_idx[NR_CPUS];
117 struct list_head *next_page[NR_CPUS];
118 unsigned next_page_idx[NR_CPUS];
119 long idx;
115 int cpu; 120 int cpu;
116 int idx;
117}; 121};
118 122
119void notrace tracing_reset(struct trace_array_cpu *data); 123void notrace tracing_reset(struct trace_array_cpu *data);