aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/trace
diff options
context:
space:
mode:
authorRusty Russell <rusty@rustcorp.com.au>2008-12-31 18:42:22 -0500
committerRusty Russell <rusty@rustcorp.com.au>2008-12-31 18:42:22 -0500
commit9e01c1b74c9531e301c900edaa92a99fcb7738f2 (patch)
treef71425ecfecf32046bd6aecb5ca6e504e9258b30 /kernel/trace
parent333af15341b2f6cd813c054e1b441d7b6d8e9318 (diff)
cpumask: convert kernel trace functions
Impact: Reduce future memory usage, use new cpumask API. (Eventually, cpumask_var_t will be allocated based on nr_cpu_ids, not NR_CPUS). Convert kernel trace functions to use struct cpumask API: 1) Use cpumask_copy/cpumask_test_cpu/for_each_cpu. 2) Use cpumask_var_t and alloc_cpumask_var/free_cpumask_var everywhere. 3) Use on_each_cpu instead of playing with current->cpus_allowed. Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> Signed-off-by: Mike Travis <travis@sgi.com> Acked-by: Steven Rostedt <rostedt@goodmis.org>
Diffstat (limited to 'kernel/trace')
-rw-r--r--kernel/trace/ring_buffer.c42
-rw-r--r--kernel/trace/trace.c60
-rw-r--r--kernel/trace/trace_sysprof.c13
3 files changed, 64 insertions, 51 deletions
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 1d601a7c4587..a9d9760dc7b6 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -195,7 +195,7 @@ void *ring_buffer_event_data(struct ring_buffer_event *event)
195EXPORT_SYMBOL_GPL(ring_buffer_event_data); 195EXPORT_SYMBOL_GPL(ring_buffer_event_data);
196 196
197#define for_each_buffer_cpu(buffer, cpu) \ 197#define for_each_buffer_cpu(buffer, cpu) \
198 for_each_cpu_mask(cpu, buffer->cpumask) 198 for_each_cpu(cpu, buffer->cpumask)
199 199
200#define TS_SHIFT 27 200#define TS_SHIFT 27
201#define TS_MASK ((1ULL << TS_SHIFT) - 1) 201#define TS_MASK ((1ULL << TS_SHIFT) - 1)
@@ -267,7 +267,7 @@ struct ring_buffer {
267 unsigned pages; 267 unsigned pages;
268 unsigned flags; 268 unsigned flags;
269 int cpus; 269 int cpus;
270 cpumask_t cpumask; 270 cpumask_var_t cpumask;
271 atomic_t record_disabled; 271 atomic_t record_disabled;
272 272
273 struct mutex mutex; 273 struct mutex mutex;
@@ -458,6 +458,9 @@ struct ring_buffer *ring_buffer_alloc(unsigned long size, unsigned flags)
458 if (!buffer) 458 if (!buffer)
459 return NULL; 459 return NULL;
460 460
461 if (!alloc_cpumask_var(&buffer->cpumask, GFP_KERNEL))
462 goto fail_free_buffer;
463
461 buffer->pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE); 464 buffer->pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
462 buffer->flags = flags; 465 buffer->flags = flags;
463 466
@@ -465,14 +468,14 @@ struct ring_buffer *ring_buffer_alloc(unsigned long size, unsigned flags)
465 if (buffer->pages == 1) 468 if (buffer->pages == 1)
466 buffer->pages++; 469 buffer->pages++;
467 470
468 buffer->cpumask = cpu_possible_map; 471 cpumask_copy(buffer->cpumask, cpu_possible_mask);
469 buffer->cpus = nr_cpu_ids; 472 buffer->cpus = nr_cpu_ids;
470 473
471 bsize = sizeof(void *) * nr_cpu_ids; 474 bsize = sizeof(void *) * nr_cpu_ids;
472 buffer->buffers = kzalloc(ALIGN(bsize, cache_line_size()), 475 buffer->buffers = kzalloc(ALIGN(bsize, cache_line_size()),
473 GFP_KERNEL); 476 GFP_KERNEL);
474 if (!buffer->buffers) 477 if (!buffer->buffers)
475 goto fail_free_buffer; 478 goto fail_free_cpumask;
476 479
477 for_each_buffer_cpu(buffer, cpu) { 480 for_each_buffer_cpu(buffer, cpu) {
478 buffer->buffers[cpu] = 481 buffer->buffers[cpu] =
@@ -492,6 +495,9 @@ struct ring_buffer *ring_buffer_alloc(unsigned long size, unsigned flags)
492 } 495 }
493 kfree(buffer->buffers); 496 kfree(buffer->buffers);
494 497
498 fail_free_cpumask:
499 free_cpumask_var(buffer->cpumask);
500
495 fail_free_buffer: 501 fail_free_buffer:
496 kfree(buffer); 502 kfree(buffer);
497 return NULL; 503 return NULL;
@@ -510,6 +516,8 @@ ring_buffer_free(struct ring_buffer *buffer)
510 for_each_buffer_cpu(buffer, cpu) 516 for_each_buffer_cpu(buffer, cpu)
511 rb_free_cpu_buffer(buffer->buffers[cpu]); 517 rb_free_cpu_buffer(buffer->buffers[cpu]);
512 518
519 free_cpumask_var(buffer->cpumask);
520
513 kfree(buffer); 521 kfree(buffer);
514} 522}
515EXPORT_SYMBOL_GPL(ring_buffer_free); 523EXPORT_SYMBOL_GPL(ring_buffer_free);
@@ -1283,7 +1291,7 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer,
1283 1291
1284 cpu = raw_smp_processor_id(); 1292 cpu = raw_smp_processor_id();
1285 1293
1286 if (!cpu_isset(cpu, buffer->cpumask)) 1294 if (!cpumask_test_cpu(cpu, buffer->cpumask))
1287 goto out; 1295 goto out;
1288 1296
1289 cpu_buffer = buffer->buffers[cpu]; 1297 cpu_buffer = buffer->buffers[cpu];
@@ -1396,7 +1404,7 @@ int ring_buffer_write(struct ring_buffer *buffer,
1396 1404
1397 cpu = raw_smp_processor_id(); 1405 cpu = raw_smp_processor_id();
1398 1406
1399 if (!cpu_isset(cpu, buffer->cpumask)) 1407 if (!cpumask_test_cpu(cpu, buffer->cpumask))
1400 goto out; 1408 goto out;
1401 1409
1402 cpu_buffer = buffer->buffers[cpu]; 1410 cpu_buffer = buffer->buffers[cpu];
@@ -1478,7 +1486,7 @@ void ring_buffer_record_disable_cpu(struct ring_buffer *buffer, int cpu)
1478{ 1486{
1479 struct ring_buffer_per_cpu *cpu_buffer; 1487 struct ring_buffer_per_cpu *cpu_buffer;
1480 1488
1481 if (!cpu_isset(cpu, buffer->cpumask)) 1489 if (!cpumask_test_cpu(cpu, buffer->cpumask))
1482 return; 1490 return;
1483 1491
1484 cpu_buffer = buffer->buffers[cpu]; 1492 cpu_buffer = buffer->buffers[cpu];
@@ -1498,7 +1506,7 @@ void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu)
1498{ 1506{
1499 struct ring_buffer_per_cpu *cpu_buffer; 1507 struct ring_buffer_per_cpu *cpu_buffer;
1500 1508
1501 if (!cpu_isset(cpu, buffer->cpumask)) 1509 if (!cpumask_test_cpu(cpu, buffer->cpumask))
1502 return; 1510 return;
1503 1511
1504 cpu_buffer = buffer->buffers[cpu]; 1512 cpu_buffer = buffer->buffers[cpu];
@@ -1515,7 +1523,7 @@ unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu)
1515{ 1523{
1516 struct ring_buffer_per_cpu *cpu_buffer; 1524 struct ring_buffer_per_cpu *cpu_buffer;
1517 1525
1518 if (!cpu_isset(cpu, buffer->cpumask)) 1526 if (!cpumask_test_cpu(cpu, buffer->cpumask))
1519 return 0; 1527 return 0;
1520 1528
1521 cpu_buffer = buffer->buffers[cpu]; 1529 cpu_buffer = buffer->buffers[cpu];
@@ -1532,7 +1540,7 @@ unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu)
1532{ 1540{
1533 struct ring_buffer_per_cpu *cpu_buffer; 1541 struct ring_buffer_per_cpu *cpu_buffer;
1534 1542
1535 if (!cpu_isset(cpu, buffer->cpumask)) 1543 if (!cpumask_test_cpu(cpu, buffer->cpumask))
1536 return 0; 1544 return 0;
1537 1545
1538 cpu_buffer = buffer->buffers[cpu]; 1546 cpu_buffer = buffer->buffers[cpu];
@@ -1850,7 +1858,7 @@ rb_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
1850 struct buffer_page *reader; 1858 struct buffer_page *reader;
1851 int nr_loops = 0; 1859 int nr_loops = 0;
1852 1860
1853 if (!cpu_isset(cpu, buffer->cpumask)) 1861 if (!cpumask_test_cpu(cpu, buffer->cpumask))
1854 return NULL; 1862 return NULL;
1855 1863
1856 cpu_buffer = buffer->buffers[cpu]; 1864 cpu_buffer = buffer->buffers[cpu];
@@ -2025,7 +2033,7 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
2025 struct ring_buffer_event *event; 2033 struct ring_buffer_event *event;
2026 unsigned long flags; 2034 unsigned long flags;
2027 2035
2028 if (!cpu_isset(cpu, buffer->cpumask)) 2036 if (!cpumask_test_cpu(cpu, buffer->cpumask))
2029 return NULL; 2037 return NULL;
2030 2038
2031 spin_lock_irqsave(&cpu_buffer->reader_lock, flags); 2039 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
@@ -2062,7 +2070,7 @@ ring_buffer_read_start(struct ring_buffer *buffer, int cpu)
2062 struct ring_buffer_iter *iter; 2070 struct ring_buffer_iter *iter;
2063 unsigned long flags; 2071 unsigned long flags;
2064 2072
2065 if (!cpu_isset(cpu, buffer->cpumask)) 2073 if (!cpumask_test_cpu(cpu, buffer->cpumask))
2066 return NULL; 2074 return NULL;
2067 2075
2068 iter = kmalloc(sizeof(*iter), GFP_KERNEL); 2076 iter = kmalloc(sizeof(*iter), GFP_KERNEL);
@@ -2172,7 +2180,7 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
2172 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; 2180 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
2173 unsigned long flags; 2181 unsigned long flags;
2174 2182
2175 if (!cpu_isset(cpu, buffer->cpumask)) 2183 if (!cpumask_test_cpu(cpu, buffer->cpumask))
2176 return; 2184 return;
2177 2185
2178 spin_lock_irqsave(&cpu_buffer->reader_lock, flags); 2186 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
@@ -2228,7 +2236,7 @@ int ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu)
2228{ 2236{
2229 struct ring_buffer_per_cpu *cpu_buffer; 2237 struct ring_buffer_per_cpu *cpu_buffer;
2230 2238
2231 if (!cpu_isset(cpu, buffer->cpumask)) 2239 if (!cpumask_test_cpu(cpu, buffer->cpumask))
2232 return 1; 2240 return 1;
2233 2241
2234 cpu_buffer = buffer->buffers[cpu]; 2242 cpu_buffer = buffer->buffers[cpu];
@@ -2252,8 +2260,8 @@ int ring_buffer_swap_cpu(struct ring_buffer *buffer_a,
2252 struct ring_buffer_per_cpu *cpu_buffer_a; 2260 struct ring_buffer_per_cpu *cpu_buffer_a;
2253 struct ring_buffer_per_cpu *cpu_buffer_b; 2261 struct ring_buffer_per_cpu *cpu_buffer_b;
2254 2262
2255 if (!cpu_isset(cpu, buffer_a->cpumask) || 2263 if (!cpumask_test_cpu(cpu, buffer_a->cpumask) ||
2256 !cpu_isset(cpu, buffer_b->cpumask)) 2264 !cpumask_test_cpu(cpu, buffer_b->cpumask))
2257 return -EINVAL; 2265 return -EINVAL;
2258 2266
2259 /* At least make sure the two buffers are somewhat the same */ 2267 /* At least make sure the two buffers are somewhat the same */
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 0e91f43b6baf..5d04e27f3b40 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -89,10 +89,10 @@ static inline void ftrace_enable_cpu(void)
89 preempt_enable(); 89 preempt_enable();
90} 90}
91 91
92static cpumask_t __read_mostly tracing_buffer_mask; 92static cpumask_var_t __read_mostly tracing_buffer_mask;
93 93
94#define for_each_tracing_cpu(cpu) \ 94#define for_each_tracing_cpu(cpu) \
95 for_each_cpu_mask(cpu, tracing_buffer_mask) 95 for_each_cpu(cpu, tracing_buffer_mask)
96 96
97/* 97/*
98 * ftrace_dump_on_oops - variable to dump ftrace buffer on oops 98 * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
@@ -2646,13 +2646,7 @@ static struct file_operations show_traces_fops = {
2646/* 2646/*
2647 * Only trace on a CPU if the bitmask is set: 2647 * Only trace on a CPU if the bitmask is set:
2648 */ 2648 */
2649static cpumask_t tracing_cpumask = CPU_MASK_ALL; 2649static cpumask_var_t tracing_cpumask;
2650
2651/*
2652 * When tracing/tracing_cpu_mask is modified then this holds
2653 * the new bitmask we are about to install:
2654 */
2655static cpumask_t tracing_cpumask_new;
2656 2650
2657/* 2651/*
2658 * The tracer itself will not take this lock, but still we want 2652 * The tracer itself will not take this lock, but still we want
@@ -2674,7 +2668,7 @@ tracing_cpumask_read(struct file *filp, char __user *ubuf,
2674 2668
2675 mutex_lock(&tracing_cpumask_update_lock); 2669 mutex_lock(&tracing_cpumask_update_lock);
2676 2670
2677 len = cpumask_scnprintf(mask_str, count, &tracing_cpumask); 2671 len = cpumask_scnprintf(mask_str, count, tracing_cpumask);
2678 if (count - len < 2) { 2672 if (count - len < 2) {
2679 count = -EINVAL; 2673 count = -EINVAL;
2680 goto out_err; 2674 goto out_err;
@@ -2693,9 +2687,13 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf,
2693 size_t count, loff_t *ppos) 2687 size_t count, loff_t *ppos)
2694{ 2688{
2695 int err, cpu; 2689 int err, cpu;
2690 cpumask_var_t tracing_cpumask_new;
2691
2692 if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
2693 return -ENOMEM;
2696 2694
2697 mutex_lock(&tracing_cpumask_update_lock); 2695 mutex_lock(&tracing_cpumask_update_lock);
2698 err = cpumask_parse_user(ubuf, count, &tracing_cpumask_new); 2696 err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
2699 if (err) 2697 if (err)
2700 goto err_unlock; 2698 goto err_unlock;
2701 2699
@@ -2706,26 +2704,28 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf,
2706 * Increase/decrease the disabled counter if we are 2704 * Increase/decrease the disabled counter if we are
2707 * about to flip a bit in the cpumask: 2705 * about to flip a bit in the cpumask:
2708 */ 2706 */
2709 if (cpu_isset(cpu, tracing_cpumask) && 2707 if (cpumask_test_cpu(cpu, tracing_cpumask) &&
2710 !cpu_isset(cpu, tracing_cpumask_new)) { 2708 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
2711 atomic_inc(&global_trace.data[cpu]->disabled); 2709 atomic_inc(&global_trace.data[cpu]->disabled);
2712 } 2710 }
2713 if (!cpu_isset(cpu, tracing_cpumask) && 2711 if (!cpumask_test_cpu(cpu, tracing_cpumask) &&
2714 cpu_isset(cpu, tracing_cpumask_new)) { 2712 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
2715 atomic_dec(&global_trace.data[cpu]->disabled); 2713 atomic_dec(&global_trace.data[cpu]->disabled);
2716 } 2714 }
2717 } 2715 }
2718 __raw_spin_unlock(&ftrace_max_lock); 2716 __raw_spin_unlock(&ftrace_max_lock);
2719 local_irq_enable(); 2717 local_irq_enable();
2720 2718
2721 tracing_cpumask = tracing_cpumask_new; 2719 cpumask_copy(tracing_cpumask, tracing_cpumask_new);
2722 2720
2723 mutex_unlock(&tracing_cpumask_update_lock); 2721 mutex_unlock(&tracing_cpumask_update_lock);
2722 free_cpumask_var(tracing_cpumask_new);
2724 2723
2725 return count; 2724 return count;
2726 2725
2727err_unlock: 2726err_unlock:
2728 mutex_unlock(&tracing_cpumask_update_lock); 2727 mutex_unlock(&tracing_cpumask_update_lock);
2728 free_cpumask_var(tracing_cpumask);
2729 2729
2730 return err; 2730 return err;
2731} 2731}
@@ -3752,7 +3752,6 @@ void ftrace_dump(void)
3752 static DEFINE_SPINLOCK(ftrace_dump_lock); 3752 static DEFINE_SPINLOCK(ftrace_dump_lock);
3753 /* use static because iter can be a bit big for the stack */ 3753 /* use static because iter can be a bit big for the stack */
3754 static struct trace_iterator iter; 3754 static struct trace_iterator iter;
3755 static cpumask_t mask;
3756 static int dump_ran; 3755 static int dump_ran;
3757 unsigned long flags; 3756 unsigned long flags;
3758 int cnt = 0, cpu; 3757 int cnt = 0, cpu;
@@ -3786,8 +3785,6 @@ void ftrace_dump(void)
3786 * and then release the locks again. 3785 * and then release the locks again.
3787 */ 3786 */
3788 3787
3789 cpus_clear(mask);
3790
3791 while (!trace_empty(&iter)) { 3788 while (!trace_empty(&iter)) {
3792 3789
3793 if (!cnt) 3790 if (!cnt)
@@ -3823,19 +3820,28 @@ __init static int tracer_alloc_buffers(void)
3823{ 3820{
3824 struct trace_array_cpu *data; 3821 struct trace_array_cpu *data;
3825 int i; 3822 int i;
3823 int ret = -ENOMEM;
3826 3824
3827 /* TODO: make the number of buffers hot pluggable with CPUS */ 3825 if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
3828 tracing_buffer_mask = cpu_possible_map; 3826 goto out;
3827
3828 if (!alloc_cpumask_var(&tracing_cpumask, GFP_KERNEL))
3829 goto out_free_buffer_mask;
3829 3830
3831 cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
3832 cpumask_copy(tracing_cpumask, cpu_all_mask);
3833
3834 /* TODO: make the number of buffers hot pluggable with CPUS */
3830 global_trace.buffer = ring_buffer_alloc(trace_buf_size, 3835 global_trace.buffer = ring_buffer_alloc(trace_buf_size,
3831 TRACE_BUFFER_FLAGS); 3836 TRACE_BUFFER_FLAGS);
3832 if (!global_trace.buffer) { 3837 if (!global_trace.buffer) {
3833 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n"); 3838 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
3834 WARN_ON(1); 3839 WARN_ON(1);
3835 return 0; 3840 goto out_free_cpumask;
3836 } 3841 }
3837 global_trace.entries = ring_buffer_size(global_trace.buffer); 3842 global_trace.entries = ring_buffer_size(global_trace.buffer);
3838 3843
3844
3839#ifdef CONFIG_TRACER_MAX_TRACE 3845#ifdef CONFIG_TRACER_MAX_TRACE
3840 max_tr.buffer = ring_buffer_alloc(trace_buf_size, 3846 max_tr.buffer = ring_buffer_alloc(trace_buf_size,
3841 TRACE_BUFFER_FLAGS); 3847 TRACE_BUFFER_FLAGS);
@@ -3843,7 +3849,7 @@ __init static int tracer_alloc_buffers(void)
3843 printk(KERN_ERR "tracer: failed to allocate max ring buffer!\n"); 3849 printk(KERN_ERR "tracer: failed to allocate max ring buffer!\n");
3844 WARN_ON(1); 3850 WARN_ON(1);
3845 ring_buffer_free(global_trace.buffer); 3851 ring_buffer_free(global_trace.buffer);
3846 return 0; 3852 goto out_free_cpumask;
3847 } 3853 }
3848 max_tr.entries = ring_buffer_size(max_tr.buffer); 3854 max_tr.entries = ring_buffer_size(max_tr.buffer);
3849 WARN_ON(max_tr.entries != global_trace.entries); 3855 WARN_ON(max_tr.entries != global_trace.entries);
@@ -3873,8 +3879,14 @@ __init static int tracer_alloc_buffers(void)
3873 &trace_panic_notifier); 3879 &trace_panic_notifier);
3874 3880
3875 register_die_notifier(&trace_die_notifier); 3881 register_die_notifier(&trace_die_notifier);
3882 ret = 0;
3876 3883
3877 return 0; 3884out_free_cpumask:
3885 free_cpumask_var(tracing_cpumask);
3886out_free_buffer_mask:
3887 free_cpumask_var(tracing_buffer_mask);
3888out:
3889 return ret;
3878} 3890}
3879early_initcall(tracer_alloc_buffers); 3891early_initcall(tracer_alloc_buffers);
3880fs_initcall(tracer_init_debugfs); 3892fs_initcall(tracer_init_debugfs);
diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c
index a5779bd975db..eaca5ad803ff 100644
--- a/kernel/trace/trace_sysprof.c
+++ b/kernel/trace/trace_sysprof.c
@@ -196,9 +196,9 @@ static enum hrtimer_restart stack_trace_timer_fn(struct hrtimer *hrtimer)
196 return HRTIMER_RESTART; 196 return HRTIMER_RESTART;
197} 197}
198 198
199static void start_stack_timer(int cpu) 199static void start_stack_timer(void *unused)
200{ 200{
201 struct hrtimer *hrtimer = &per_cpu(stack_trace_hrtimer, cpu); 201 struct hrtimer *hrtimer = &__get_cpu_var(stack_trace_hrtimer);
202 202
203 hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 203 hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
204 hrtimer->function = stack_trace_timer_fn; 204 hrtimer->function = stack_trace_timer_fn;
@@ -208,14 +208,7 @@ static void start_stack_timer(int cpu)
208 208
209static void start_stack_timers(void) 209static void start_stack_timers(void)
210{ 210{
211 cpumask_t saved_mask = current->cpus_allowed; 211 on_each_cpu(start_stack_timer, NULL, 1);
212 int cpu;
213
214 for_each_online_cpu(cpu) {
215 set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
216 start_stack_timer(cpu);
217 }
218 set_cpus_allowed_ptr(current, &saved_mask);
219} 212}
220 213
221static void stop_stack_timer(int cpu) 214static void stop_stack_timer(int cpu)
n> unsigned long start, unsigned long end) { unsigned long pfn; start &= PAGE_SECTION_MASK; mminit_validate_memmodel_limits(&start, &end); for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) { unsigned long section = pfn_to_section_nr(pfn); struct mem_section *ms; sparse_index_init(section, nid); set_section_nid(section, nid); ms = __nr_to_section(section); if (!ms->section_mem_map) ms->section_mem_map = sparse_encode_early_nid(nid) | SECTION_MARKED_PRESENT; } } /* * Only used by the i386 NUMA architecures, but relatively * generic code. */ unsigned long __init node_memmap_size_bytes(int nid, unsigned long start_pfn, unsigned long end_pfn) { unsigned long pfn; unsigned long nr_pages = 0; mminit_validate_memmodel_limits(&start_pfn, &end_pfn); for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) { if (nid != early_pfn_to_nid(pfn)) continue; if (pfn_present(pfn)) nr_pages += PAGES_PER_SECTION; } return nr_pages * sizeof(struct page); } /* * Subtle, we encode the real pfn into the mem_map such that * the identity pfn - section_mem_map will return the actual * physical page frame number. */ static unsigned long sparse_encode_mem_map(struct page *mem_map, unsigned long pnum) { return (unsigned long)(mem_map - (section_nr_to_pfn(pnum))); } /* * Decode mem_map from the coded memmap */ struct page *sparse_decode_mem_map(unsigned long coded_mem_map, unsigned long pnum) { /* mask off the extra low bits of information */ coded_mem_map &= SECTION_MAP_MASK; return ((struct page *)coded_mem_map) + section_nr_to_pfn(pnum); } static int __meminit sparse_init_one_section(struct mem_section *ms, unsigned long pnum, struct page *mem_map, unsigned long *pageblock_bitmap) { if (!present_section(ms)) return -EINVAL; ms->section_mem_map &= ~SECTION_MAP_MASK; ms->section_mem_map |= sparse_encode_mem_map(mem_map, pnum) | SECTION_HAS_MEM_MAP; ms->pageblock_flags = pageblock_bitmap; return 1; } unsigned long usemap_size(void) { unsigned long size_bytes; size_bytes = roundup(SECTION_BLOCKFLAGS_BITS, 8) / 8; size_bytes = roundup(size_bytes, sizeof(unsigned long)); return size_bytes; } #ifdef CONFIG_MEMORY_HOTPLUG static unsigned long *__kmalloc_section_usemap(void) { return kmalloc(usemap_size(), GFP_KERNEL); } #endif /* CONFIG_MEMORY_HOTPLUG */ #ifdef CONFIG_MEMORY_HOTREMOVE static unsigned long * __init sparse_early_usemap_alloc_pgdat_section(struct pglist_data *pgdat) { unsigned long section_nr; /* * A page may contain usemaps for other sections preventing the * page being freed and making a section unremovable while * other sections referencing the usemap retmain active. Similarly, * a pgdat can prevent a section being removed. If section A * contains a pgdat and section B contains the usemap, both * sections become inter-dependent. This allocates usemaps * from the same section as the pgdat where possible to avoid * this problem. */ section_nr = pfn_to_section_nr(__pa(pgdat) >> PAGE_SHIFT); return alloc_bootmem_section(usemap_size(), section_nr); } static void __init check_usemap_section_nr(int nid, unsigned long *usemap) { unsigned long usemap_snr, pgdat_snr; static unsigned long old_usemap_snr = NR_MEM_SECTIONS; static unsigned long old_pgdat_snr = NR_MEM_SECTIONS; struct pglist_data *pgdat = NODE_DATA(nid); int usemap_nid; usemap_snr = pfn_to_section_nr(__pa(usemap) >> PAGE_SHIFT); pgdat_snr = pfn_to_section_nr(__pa(pgdat) >> PAGE_SHIFT); if (usemap_snr == pgdat_snr) return; if (old_usemap_snr == usemap_snr && old_pgdat_snr == pgdat_snr) /* skip redundant message */ return; old_usemap_snr = usemap_snr; old_pgdat_snr = pgdat_snr; usemap_nid = sparse_early_nid(__nr_to_section(usemap_snr)); if (usemap_nid != nid) { printk(KERN_INFO "node %d must be removed before remove section %ld\n", nid, usemap_snr); return; } /* * There is a circular dependency. * Some platforms allow un-removable section because they will just * gather other removable sections for dynamic partitioning. * Just notify un-removable section's number here. */ printk(KERN_INFO "Section %ld and %ld (node %d)", usemap_snr, pgdat_snr, nid); printk(KERN_CONT " have a circular dependency on usemap and pgdat allocations\n"); } #else static unsigned long * __init sparse_early_usemap_alloc_pgdat_section(struct pglist_data *pgdat) { return NULL; } static void __init check_usemap_section_nr(int nid, unsigned long *usemap) { } #endif /* CONFIG_MEMORY_HOTREMOVE */ static unsigned long *__init sparse_early_usemap_alloc(unsigned long pnum) { unsigned long *usemap; struct mem_section *ms = __nr_to_section(pnum); int nid = sparse_early_nid(ms); usemap = sparse_early_usemap_alloc_pgdat_section(NODE_DATA(nid)); if (usemap) return usemap; usemap = alloc_bootmem_node(NODE_DATA(nid), usemap_size()); if (usemap) { check_usemap_section_nr(nid, usemap); return usemap; } /* Stupid: suppress gcc warning for SPARSEMEM && !NUMA */ nid = 0; printk(KERN_WARNING "%s: allocation failed\n", __func__); return NULL; } #ifndef CONFIG_SPARSEMEM_VMEMMAP struct page __init *sparse_mem_map_populate(unsigned long pnum, int nid) { struct page *map; map = alloc_remap(nid, sizeof(struct page) * PAGES_PER_SECTION); if (map) return map; map = alloc_bootmem_pages_node(NODE_DATA(nid), PAGE_ALIGN(sizeof(struct page) * PAGES_PER_SECTION)); return map; } #endif /* !CONFIG_SPARSEMEM_VMEMMAP */ static struct page __init *sparse_early_mem_map_alloc(unsigned long pnum) { struct page *map; struct mem_section *ms = __nr_to_section(pnum); int nid = sparse_early_nid(ms); map = sparse_mem_map_populate(pnum, nid); if (map) return map; printk(KERN_ERR "%s: sparsemem memory map backing failed " "some memory will not be available.\n", __func__); ms->section_mem_map = 0; return NULL; } void __attribute__((weak)) __meminit vmemmap_populate_print_last(void) { } /* * Allocate the accumulated non-linear sections, allocate a mem_map * for each and record the physical to section mapping. */ void __init sparse_init(void) { unsigned long pnum; struct page *map; unsigned long *usemap; unsigned long **usemap_map; int size; /* * map is using big page (aka 2M in x86 64 bit) * usemap is less one page (aka 24 bytes) * so alloc 2M (with 2M align) and 24 bytes in turn will * make next 2M slip to one more 2M later. * then in big system, the memory will have a lot of holes... * here try to allocate 2M pages continously. * * powerpc need to call sparse_init_one_section right after each * sparse_early_mem_map_alloc, so allocate usemap_map at first. */ size = sizeof(unsigned long *) * NR_MEM_SECTIONS; usemap_map = alloc_bootmem(size); if (!usemap_map) panic("can not allocate usemap_map\n"); for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) { if (!present_section_nr(pnum)) continue; usemap_map[pnum] = sparse_early_usemap_alloc(pnum); } for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) { if (!present_section_nr(pnum)) continue; usemap = usemap_map[pnum]; if (!usemap) continue; map = sparse_early_mem_map_alloc(pnum); if (!map) continue; sparse_init_one_section(__nr_to_section(pnum), pnum, map, usemap); } vmemmap_populate_print_last(); free_bootmem(__pa(usemap_map), size); } #ifdef CONFIG_MEMORY_HOTPLUG #ifdef CONFIG_SPARSEMEM_VMEMMAP static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid, unsigned long nr_pages) { /* This will make the necessary allocations eventually. */ return sparse_mem_map_populate(pnum, nid); } static void __kfree_section_memmap(struct page *memmap, unsigned long nr_pages) { return; /* XXX: Not implemented yet */ } static void free_map_bootmem(struct page *page, unsigned long nr_pages) { } #else static struct page *__kmalloc_section_memmap(unsigned long nr_pages) { struct page *page, *ret; unsigned long memmap_size = sizeof(struct page) * nr_pages; page = alloc_pages(GFP_KERNEL|__GFP_NOWARN, get_order(memmap_size)); if (page) goto got_map_page; ret = vmalloc(memmap_size); if (ret) goto got_map_ptr; return NULL; got_map_page: ret = (struct page *)pfn_to_kaddr(page_to_pfn(page)); got_map_ptr: memset(ret, 0, memmap_size); return ret; } static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid, unsigned long nr_pages) { return __kmalloc_section_memmap(nr_pages); } static void __kfree_section_memmap(struct page *memmap, unsigned long nr_pages) { if (is_vmalloc_addr(memmap)) vfree(memmap); else free_pages((unsigned long)memmap, get_order(sizeof(struct page) * nr_pages)); } static void free_map_bootmem(struct page *page, unsigned long nr_pages) { unsigned long maps_section_nr, removing_section_nr, i; int magic; for (i = 0; i < nr_pages; i++, page++) { magic = atomic_read(&page->_mapcount); BUG_ON(magic == NODE_INFO); maps_section_nr = pfn_to_section_nr(page_to_pfn(page)); removing_section_nr = page->private; /* * When this function is called, the removing section is * logical offlined state. This means all pages are isolated * from page allocator. If removing section's memmap is placed * on the same section, it must not be freed. * If it is freed, page allocator may allocate it which will * be removed physically soon. */ if (maps_section_nr != removing_section_nr) put_page_bootmem(page); } } #endif /* CONFIG_SPARSEMEM_VMEMMAP */ static void free_section_usemap(struct page *memmap, unsigned long *usemap) { struct page *usemap_page; unsigned long nr_pages; if (!usemap) return; usemap_page = virt_to_page(usemap); /* * Check to see if allocation came from hot-plug-add */ if (PageSlab(usemap_page)) { kfree(usemap); if (memmap) __kfree_section_memmap(memmap, PAGES_PER_SECTION); return; } /* * The usemap came from bootmem. This is packed with other usemaps * on the section which has pgdat at boot time. Just keep it as is now. */ if (memmap) { struct page *memmap_page; memmap_page = virt_to_page(memmap); nr_pages = PAGE_ALIGN(PAGES_PER_SECTION * sizeof(struct page)) >> PAGE_SHIFT; free_map_bootmem(memmap_page, nr_pages); } } /* * returns the number of sections whose mem_maps were properly * set. If this is <=0, then that means that the passed-in * map was not consumed and must be freed. */ int __meminit sparse_add_one_section(struct zone *zone, unsigned long start_pfn, int nr_pages) { unsigned long section_nr = pfn_to_section_nr(start_pfn); struct pglist_data *pgdat = zone->zone_pgdat; struct mem_section *ms; struct page *memmap; unsigned long *usemap; unsigned long flags; int ret; /* * no locking for this, because it does its own * plus, it does a kmalloc */ ret = sparse_index_init(section_nr, pgdat->node_id); if (ret < 0 && ret != -EEXIST) return ret; memmap = kmalloc_section_memmap(section_nr, pgdat->node_id, nr_pages); if (!memmap) return -ENOMEM; usemap = __kmalloc_section_usemap(); if (!usemap) { __kfree_section_memmap(memmap, nr_pages); return -ENOMEM; } pgdat_resize_lock(pgdat, &flags); ms = __pfn_to_section(start_pfn); if (ms->section_mem_map & SECTION_MARKED_PRESENT) { ret = -EEXIST; goto out; } ms->section_mem_map |= SECTION_MARKED_PRESENT; ret = sparse_init_one_section(ms, section_nr, memmap, usemap); out: pgdat_resize_unlock(pgdat, &flags); if (ret <= 0) { kfree(usemap); __kfree_section_memmap(memmap, nr_pages); } return ret; } void sparse_remove_one_section(struct zone *zone, struct mem_section *ms) { struct page *memmap = NULL; unsigned long *usemap = NULL; if (ms->section_mem_map) { usemap = ms->pageblock_flags; memmap = sparse_decode_mem_map(ms->section_mem_map, __section_nr(ms)); ms->section_mem_map = 0; ms->pageblock_flags = NULL; } free_section_usemap(memmap, usemap); } #endif