diff options
author | Ingo Molnar <mingo@elte.hu> | 2009-09-19 05:27:32 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-09-19 05:28:41 -0400 |
commit | 929bf0d0156562ce631728b6fa53d68004d456d2 (patch) | |
tree | 739063990a8077b29ef97e69d73bce94573daae4 /kernel | |
parent | def0a9b2573e00ab0b486cb5382625203ab4c4a6 (diff) | |
parent | 202c4675c55ddf6b443c7e057d2dff6b42ef71aa (diff) |
Merge branch 'linus' into perfcounters/core
Merge reason: Bring in tracing changes we depend on.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel')
44 files changed, 2009 insertions, 1543 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index b833bd5cc127..3d9c7e27e3f9 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
@@ -90,7 +90,6 @@ obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o | |||
90 | obj-$(CONFIG_MARKERS) += marker.o | 90 | obj-$(CONFIG_MARKERS) += marker.o |
91 | obj-$(CONFIG_TRACEPOINTS) += tracepoint.o | 91 | obj-$(CONFIG_TRACEPOINTS) += tracepoint.o |
92 | obj-$(CONFIG_LATENCYTOP) += latencytop.o | 92 | obj-$(CONFIG_LATENCYTOP) += latencytop.o |
93 | obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o | ||
94 | obj-$(CONFIG_FUNCTION_TRACER) += trace/ | 93 | obj-$(CONFIG_FUNCTION_TRACER) += trace/ |
95 | obj-$(CONFIG_TRACING) += trace/ | 94 | obj-$(CONFIG_TRACING) += trace/ |
96 | obj-$(CONFIG_X86_DS) += trace/ | 95 | obj-$(CONFIG_X86_DS) += trace/ |
@@ -117,7 +116,7 @@ $(obj)/config_data.gz: .config FORCE | |||
117 | $(call if_changed,gzip) | 116 | $(call if_changed,gzip) |
118 | 117 | ||
119 | quiet_cmd_ikconfiggz = IKCFG $@ | 118 | quiet_cmd_ikconfiggz = IKCFG $@ |
120 | cmd_ikconfiggz = (echo "static const char kernel_config_data[] = MAGIC_START"; cat $< | scripts/bin2c; echo "MAGIC_END;") > $@ | 119 | cmd_ikconfiggz = (echo "static const char kernel_config_data[] __used = MAGIC_START"; cat $< | scripts/bin2c; echo "MAGIC_END;") > $@ |
121 | targets += config_data.h | 120 | targets += config_data.h |
122 | $(obj)/config_data.h: $(obj)/config_data.gz FORCE | 121 | $(obj)/config_data.h: $(obj)/config_data.gz FORCE |
123 | $(call if_changed,ikconfiggz) | 122 | $(call if_changed,ikconfiggz) |
diff --git a/kernel/cpu.c b/kernel/cpu.c index 8ce10043e4ac..6ba0f1ecb212 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c | |||
@@ -401,6 +401,7 @@ int disable_nonboot_cpus(void) | |||
401 | break; | 401 | break; |
402 | } | 402 | } |
403 | } | 403 | } |
404 | |||
404 | if (!error) { | 405 | if (!error) { |
405 | BUG_ON(num_online_cpus() > 1); | 406 | BUG_ON(num_online_cpus() > 1); |
406 | /* Make sure the CPUs won't be enabled by someone else */ | 407 | /* Make sure the CPUs won't be enabled by someone else */ |
@@ -413,6 +414,14 @@ int disable_nonboot_cpus(void) | |||
413 | return error; | 414 | return error; |
414 | } | 415 | } |
415 | 416 | ||
417 | void __weak arch_enable_nonboot_cpus_begin(void) | ||
418 | { | ||
419 | } | ||
420 | |||
421 | void __weak arch_enable_nonboot_cpus_end(void) | ||
422 | { | ||
423 | } | ||
424 | |||
416 | void __ref enable_nonboot_cpus(void) | 425 | void __ref enable_nonboot_cpus(void) |
417 | { | 426 | { |
418 | int cpu, error; | 427 | int cpu, error; |
@@ -424,6 +433,9 @@ void __ref enable_nonboot_cpus(void) | |||
424 | goto out; | 433 | goto out; |
425 | 434 | ||
426 | printk("Enabling non-boot CPUs ...\n"); | 435 | printk("Enabling non-boot CPUs ...\n"); |
436 | |||
437 | arch_enable_nonboot_cpus_begin(); | ||
438 | |||
427 | for_each_cpu(cpu, frozen_cpus) { | 439 | for_each_cpu(cpu, frozen_cpus) { |
428 | error = _cpu_up(cpu, 1); | 440 | error = _cpu_up(cpu, 1); |
429 | if (!error) { | 441 | if (!error) { |
@@ -432,6 +444,9 @@ void __ref enable_nonboot_cpus(void) | |||
432 | } | 444 | } |
433 | printk(KERN_WARNING "Error taking CPU%d up: %d\n", cpu, error); | 445 | printk(KERN_WARNING "Error taking CPU%d up: %d\n", cpu, error); |
434 | } | 446 | } |
447 | |||
448 | arch_enable_nonboot_cpus_end(); | ||
449 | |||
435 | cpumask_clear(frozen_cpus); | 450 | cpumask_clear(frozen_cpus); |
436 | out: | 451 | out: |
437 | cpu_maps_update_done(); | 452 | cpu_maps_update_done(); |
diff --git a/kernel/cred.c b/kernel/cred.c index 006fcab009d5..d7f7a01082eb 100644 --- a/kernel/cred.c +++ b/kernel/cred.c | |||
@@ -147,7 +147,8 @@ static void put_cred_rcu(struct rcu_head *rcu) | |||
147 | key_put(cred->thread_keyring); | 147 | key_put(cred->thread_keyring); |
148 | key_put(cred->request_key_auth); | 148 | key_put(cred->request_key_auth); |
149 | release_tgcred(cred); | 149 | release_tgcred(cred); |
150 | put_group_info(cred->group_info); | 150 | if (cred->group_info) |
151 | put_group_info(cred->group_info); | ||
151 | free_uid(cred->user); | 152 | free_uid(cred->user); |
152 | kmem_cache_free(cred_jar, cred); | 153 | kmem_cache_free(cred_jar, cred); |
153 | } | 154 | } |
diff --git a/kernel/dma-coherent.c b/kernel/dma-coherent.c deleted file mode 100644 index 962a3b574f21..000000000000 --- a/kernel/dma-coherent.c +++ /dev/null | |||
@@ -1,176 +0,0 @@ | |||
1 | /* | ||
2 | * Coherent per-device memory handling. | ||
3 | * Borrowed from i386 | ||
4 | */ | ||
5 | #include <linux/kernel.h> | ||
6 | #include <linux/dma-mapping.h> | ||
7 | |||
8 | struct dma_coherent_mem { | ||
9 | void *virt_base; | ||
10 | u32 device_base; | ||
11 | int size; | ||
12 | int flags; | ||
13 | unsigned long *bitmap; | ||
14 | }; | ||
15 | |||
16 | int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr, | ||
17 | dma_addr_t device_addr, size_t size, int flags) | ||
18 | { | ||
19 | void __iomem *mem_base = NULL; | ||
20 | int pages = size >> PAGE_SHIFT; | ||
21 | int bitmap_size = BITS_TO_LONGS(pages) * sizeof(long); | ||
22 | |||
23 | if ((flags & (DMA_MEMORY_MAP | DMA_MEMORY_IO)) == 0) | ||
24 | goto out; | ||
25 | if (!size) | ||
26 | goto out; | ||
27 | if (dev->dma_mem) | ||
28 | goto out; | ||
29 | |||
30 | /* FIXME: this routine just ignores DMA_MEMORY_INCLUDES_CHILDREN */ | ||
31 | |||
32 | mem_base = ioremap(bus_addr, size); | ||
33 | if (!mem_base) | ||
34 | goto out; | ||
35 | |||
36 | dev->dma_mem = kzalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL); | ||
37 | if (!dev->dma_mem) | ||
38 | goto out; | ||
39 | dev->dma_mem->bitmap = kzalloc(bitmap_size, GFP_KERNEL); | ||
40 | if (!dev->dma_mem->bitmap) | ||
41 | goto free1_out; | ||
42 | |||
43 | dev->dma_mem->virt_base = mem_base; | ||
44 | dev->dma_mem->device_base = device_addr; | ||
45 | dev->dma_mem->size = pages; | ||
46 | dev->dma_mem->flags = flags; | ||
47 | |||
48 | if (flags & DMA_MEMORY_MAP) | ||
49 | return DMA_MEMORY_MAP; | ||
50 | |||
51 | return DMA_MEMORY_IO; | ||
52 | |||
53 | free1_out: | ||
54 | kfree(dev->dma_mem); | ||
55 | out: | ||
56 | if (mem_base) | ||
57 | iounmap(mem_base); | ||
58 | return 0; | ||
59 | } | ||
60 | EXPORT_SYMBOL(dma_declare_coherent_memory); | ||
61 | |||
62 | void dma_release_declared_memory(struct device *dev) | ||
63 | { | ||
64 | struct dma_coherent_mem *mem = dev->dma_mem; | ||
65 | |||
66 | if (!mem) | ||
67 | return; | ||
68 | dev->dma_mem = NULL; | ||
69 | iounmap(mem->virt_base); | ||
70 | kfree(mem->bitmap); | ||
71 | kfree(mem); | ||
72 | } | ||
73 | EXPORT_SYMBOL(dma_release_declared_memory); | ||
74 | |||
75 | void *dma_mark_declared_memory_occupied(struct device *dev, | ||
76 | dma_addr_t device_addr, size_t size) | ||
77 | { | ||
78 | struct dma_coherent_mem *mem = dev->dma_mem; | ||
79 | int pos, err; | ||
80 | |||
81 | size += device_addr & ~PAGE_MASK; | ||
82 | |||
83 | if (!mem) | ||
84 | return ERR_PTR(-EINVAL); | ||
85 | |||
86 | pos = (device_addr - mem->device_base) >> PAGE_SHIFT; | ||
87 | err = bitmap_allocate_region(mem->bitmap, pos, get_order(size)); | ||
88 | if (err != 0) | ||
89 | return ERR_PTR(err); | ||
90 | return mem->virt_base + (pos << PAGE_SHIFT); | ||
91 | } | ||
92 | EXPORT_SYMBOL(dma_mark_declared_memory_occupied); | ||
93 | |||
94 | /** | ||
95 | * dma_alloc_from_coherent() - try to allocate memory from the per-device coherent area | ||
96 | * | ||
97 | * @dev: device from which we allocate memory | ||
98 | * @size: size of requested memory area | ||
99 | * @dma_handle: This will be filled with the correct dma handle | ||
100 | * @ret: This pointer will be filled with the virtual address | ||
101 | * to allocated area. | ||
102 | * | ||
103 | * This function should be only called from per-arch dma_alloc_coherent() | ||
104 | * to support allocation from per-device coherent memory pools. | ||
105 | * | ||
106 | * Returns 0 if dma_alloc_coherent should continue with allocating from | ||
107 | * generic memory areas, or !0 if dma_alloc_coherent should return @ret. | ||
108 | */ | ||
109 | int dma_alloc_from_coherent(struct device *dev, ssize_t size, | ||
110 | dma_addr_t *dma_handle, void **ret) | ||
111 | { | ||
112 | struct dma_coherent_mem *mem; | ||
113 | int order = get_order(size); | ||
114 | int pageno; | ||
115 | |||
116 | if (!dev) | ||
117 | return 0; | ||
118 | mem = dev->dma_mem; | ||
119 | if (!mem) | ||
120 | return 0; | ||
121 | |||
122 | *ret = NULL; | ||
123 | |||
124 | if (unlikely(size > (mem->size << PAGE_SHIFT))) | ||
125 | goto err; | ||
126 | |||
127 | pageno = bitmap_find_free_region(mem->bitmap, mem->size, order); | ||
128 | if (unlikely(pageno < 0)) | ||
129 | goto err; | ||
130 | |||
131 | /* | ||
132 | * Memory was found in the per-device area. | ||
133 | */ | ||
134 | *dma_handle = mem->device_base + (pageno << PAGE_SHIFT); | ||
135 | *ret = mem->virt_base + (pageno << PAGE_SHIFT); | ||
136 | memset(*ret, 0, size); | ||
137 | |||
138 | return 1; | ||
139 | |||
140 | err: | ||
141 | /* | ||
142 | * In the case where the allocation can not be satisfied from the | ||
143 | * per-device area, try to fall back to generic memory if the | ||
144 | * constraints allow it. | ||
145 | */ | ||
146 | return mem->flags & DMA_MEMORY_EXCLUSIVE; | ||
147 | } | ||
148 | EXPORT_SYMBOL(dma_alloc_from_coherent); | ||
149 | |||
150 | /** | ||
151 | * dma_release_from_coherent() - try to free the memory allocated from per-device coherent memory pool | ||
152 | * @dev: device from which the memory was allocated | ||
153 | * @order: the order of pages allocated | ||
154 | * @vaddr: virtual address of allocated pages | ||
155 | * | ||
156 | * This checks whether the memory was allocated from the per-device | ||
157 | * coherent memory pool and if so, releases that memory. | ||
158 | * | ||
159 | * Returns 1 if we correctly released the memory, or 0 if | ||
160 | * dma_release_coherent() should proceed with releasing memory from | ||
161 | * generic pools. | ||
162 | */ | ||
163 | int dma_release_from_coherent(struct device *dev, int order, void *vaddr) | ||
164 | { | ||
165 | struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL; | ||
166 | |||
167 | if (mem && vaddr >= mem->virt_base && vaddr < | ||
168 | (mem->virt_base + (mem->size << PAGE_SHIFT))) { | ||
169 | int page = (vaddr - mem->virt_base) >> PAGE_SHIFT; | ||
170 | |||
171 | bitmap_release_region(mem->bitmap, page, order); | ||
172 | return 1; | ||
173 | } | ||
174 | return 0; | ||
175 | } | ||
176 | EXPORT_SYMBOL(dma_release_from_coherent); | ||
diff --git a/kernel/gcov/Kconfig b/kernel/gcov/Kconfig index 22e9dcfaa3d3..654efd09f6a9 100644 --- a/kernel/gcov/Kconfig +++ b/kernel/gcov/Kconfig | |||
@@ -34,7 +34,7 @@ config GCOV_KERNEL | |||
34 | config GCOV_PROFILE_ALL | 34 | config GCOV_PROFILE_ALL |
35 | bool "Profile entire Kernel" | 35 | bool "Profile entire Kernel" |
36 | depends on GCOV_KERNEL | 36 | depends on GCOV_KERNEL |
37 | depends on S390 || X86 | 37 | depends on S390 || X86 || (PPC && EXPERIMENTAL) |
38 | default n | 38 | default n |
39 | ---help--- | 39 | ---help--- |
40 | This options activates profiling for the entire kernel. | 40 | This options activates profiling for the entire kernel. |
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 49da79ab8486..05071bf6a37b 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c | |||
@@ -485,6 +485,7 @@ void hrtimer_init_on_stack(struct hrtimer *timer, clockid_t clock_id, | |||
485 | debug_object_init_on_stack(timer, &hrtimer_debug_descr); | 485 | debug_object_init_on_stack(timer, &hrtimer_debug_descr); |
486 | __hrtimer_init(timer, clock_id, mode); | 486 | __hrtimer_init(timer, clock_id, mode); |
487 | } | 487 | } |
488 | EXPORT_SYMBOL_GPL(hrtimer_init_on_stack); | ||
488 | 489 | ||
489 | void destroy_hrtimer_on_stack(struct hrtimer *timer) | 490 | void destroy_hrtimer_on_stack(struct hrtimer *timer) |
490 | { | 491 | { |
@@ -1477,6 +1478,7 @@ void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *task) | |||
1477 | sl->timer.function = hrtimer_wakeup; | 1478 | sl->timer.function = hrtimer_wakeup; |
1478 | sl->task = task; | 1479 | sl->task = task; |
1479 | } | 1480 | } |
1481 | EXPORT_SYMBOL_GPL(hrtimer_init_sleeper); | ||
1480 | 1482 | ||
1481 | static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mode) | 1483 | static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mode) |
1482 | { | 1484 | { |
diff --git a/kernel/module.c b/kernel/module.c index 46580edff0cb..05ce49ced8f6 100644 --- a/kernel/module.c +++ b/kernel/module.c | |||
@@ -369,7 +369,7 @@ EXPORT_SYMBOL_GPL(find_module); | |||
369 | 369 | ||
370 | #ifdef CONFIG_SMP | 370 | #ifdef CONFIG_SMP |
371 | 371 | ||
372 | #ifdef CONFIG_HAVE_DYNAMIC_PER_CPU_AREA | 372 | #ifndef CONFIG_HAVE_LEGACY_PER_CPU_AREA |
373 | 373 | ||
374 | static void *percpu_modalloc(unsigned long size, unsigned long align, | 374 | static void *percpu_modalloc(unsigned long size, unsigned long align, |
375 | const char *name) | 375 | const char *name) |
@@ -394,7 +394,7 @@ static void percpu_modfree(void *freeme) | |||
394 | free_percpu(freeme); | 394 | free_percpu(freeme); |
395 | } | 395 | } |
396 | 396 | ||
397 | #else /* ... !CONFIG_HAVE_DYNAMIC_PER_CPU_AREA */ | 397 | #else /* ... CONFIG_HAVE_LEGACY_PER_CPU_AREA */ |
398 | 398 | ||
399 | /* Number of blocks used and allocated. */ | 399 | /* Number of blocks used and allocated. */ |
400 | static unsigned int pcpu_num_used, pcpu_num_allocated; | 400 | static unsigned int pcpu_num_used, pcpu_num_allocated; |
@@ -540,7 +540,7 @@ static int percpu_modinit(void) | |||
540 | } | 540 | } |
541 | __initcall(percpu_modinit); | 541 | __initcall(percpu_modinit); |
542 | 542 | ||
543 | #endif /* CONFIG_HAVE_DYNAMIC_PER_CPU_AREA */ | 543 | #endif /* CONFIG_HAVE_LEGACY_PER_CPU_AREA */ |
544 | 544 | ||
545 | static unsigned int find_pcpusec(Elf_Ehdr *hdr, | 545 | static unsigned int find_pcpusec(Elf_Ehdr *hdr, |
546 | Elf_Shdr *sechdrs, | 546 | Elf_Shdr *sechdrs, |
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 06d233a06da5..d013f4e89e9c 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c | |||
@@ -106,16 +106,16 @@ hw_perf_group_sched_in(struct perf_counter *group_leader, | |||
106 | 106 | ||
107 | void __weak perf_counter_print_debug(void) { } | 107 | void __weak perf_counter_print_debug(void) { } |
108 | 108 | ||
109 | static DEFINE_PER_CPU(int, disable_count); | 109 | static DEFINE_PER_CPU(int, perf_disable_count); |
110 | 110 | ||
111 | void __perf_disable(void) | 111 | void __perf_disable(void) |
112 | { | 112 | { |
113 | __get_cpu_var(disable_count)++; | 113 | __get_cpu_var(perf_disable_count)++; |
114 | } | 114 | } |
115 | 115 | ||
116 | bool __perf_enable(void) | 116 | bool __perf_enable(void) |
117 | { | 117 | { |
118 | return !--__get_cpu_var(disable_count); | 118 | return !--__get_cpu_var(perf_disable_count); |
119 | } | 119 | } |
120 | 120 | ||
121 | void perf_disable(void) | 121 | void perf_disable(void) |
@@ -4246,6 +4246,7 @@ static int perf_copy_attr(struct perf_counter_attr __user *uattr, | |||
4246 | if (val) | 4246 | if (val) |
4247 | goto err_size; | 4247 | goto err_size; |
4248 | } | 4248 | } |
4249 | size = sizeof(*attr); | ||
4249 | } | 4250 | } |
4250 | 4251 | ||
4251 | ret = copy_from_user(attr, uattr, size); | 4252 | ret = copy_from_user(attr, uattr, size); |
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index 72067cbdb37f..91e09d3b2eb2 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig | |||
@@ -208,3 +208,17 @@ config APM_EMULATION | |||
208 | random kernel OOPSes or reboots that don't seem to be related to | 208 | random kernel OOPSes or reboots that don't seem to be related to |
209 | anything, try disabling/enabling this option (or disabling/enabling | 209 | anything, try disabling/enabling this option (or disabling/enabling |
210 | APM in your BIOS). | 210 | APM in your BIOS). |
211 | |||
212 | config PM_RUNTIME | ||
213 | bool "Run-time PM core functionality" | ||
214 | depends on PM | ||
215 | ---help--- | ||
216 | Enable functionality allowing I/O devices to be put into energy-saving | ||
217 | (low power) states at run time (or autosuspended) after a specified | ||
218 | period of inactivity and woken up in response to a hardware-generated | ||
219 | wake-up event or a driver's request. | ||
220 | |||
221 | Hardware support is generally required for this functionality to work | ||
222 | and the bus type drivers of the buses the devices are on are | ||
223 | responsible for the actual handling of the autosuspend requests and | ||
224 | wake-up events. | ||
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 81d2e7464893..04b3a83d686f 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c | |||
@@ -298,8 +298,8 @@ int hibernation_snapshot(int platform_mode) | |||
298 | if (error) | 298 | if (error) |
299 | return error; | 299 | return error; |
300 | 300 | ||
301 | /* Free memory before shutting down devices. */ | 301 | /* Preallocate image memory before shutting down devices. */ |
302 | error = swsusp_shrink_memory(); | 302 | error = hibernate_preallocate_memory(); |
303 | if (error) | 303 | if (error) |
304 | goto Close; | 304 | goto Close; |
305 | 305 | ||
@@ -315,6 +315,10 @@ int hibernation_snapshot(int platform_mode) | |||
315 | /* Control returns here after successful restore */ | 315 | /* Control returns here after successful restore */ |
316 | 316 | ||
317 | Resume_devices: | 317 | Resume_devices: |
318 | /* We may need to release the preallocated image pages here. */ | ||
319 | if (error || !in_suspend) | ||
320 | swsusp_free(); | ||
321 | |||
318 | dpm_resume_end(in_suspend ? | 322 | dpm_resume_end(in_suspend ? |
319 | (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE); | 323 | (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE); |
320 | resume_console(); | 324 | resume_console(); |
@@ -460,11 +464,11 @@ int hibernation_platform_enter(void) | |||
460 | 464 | ||
461 | error = hibernation_ops->prepare(); | 465 | error = hibernation_ops->prepare(); |
462 | if (error) | 466 | if (error) |
463 | goto Platofrm_finish; | 467 | goto Platform_finish; |
464 | 468 | ||
465 | error = disable_nonboot_cpus(); | 469 | error = disable_nonboot_cpus(); |
466 | if (error) | 470 | if (error) |
467 | goto Platofrm_finish; | 471 | goto Platform_finish; |
468 | 472 | ||
469 | local_irq_disable(); | 473 | local_irq_disable(); |
470 | sysdev_suspend(PMSG_HIBERNATE); | 474 | sysdev_suspend(PMSG_HIBERNATE); |
@@ -476,7 +480,7 @@ int hibernation_platform_enter(void) | |||
476 | * We don't need to reenable the nonboot CPUs or resume consoles, since | 480 | * We don't need to reenable the nonboot CPUs or resume consoles, since |
477 | * the system is going to be halted anyway. | 481 | * the system is going to be halted anyway. |
478 | */ | 482 | */ |
479 | Platofrm_finish: | 483 | Platform_finish: |
480 | hibernation_ops->finish(); | 484 | hibernation_ops->finish(); |
481 | 485 | ||
482 | dpm_suspend_noirq(PMSG_RESTORE); | 486 | dpm_suspend_noirq(PMSG_RESTORE); |
@@ -578,7 +582,10 @@ int hibernate(void) | |||
578 | goto Thaw; | 582 | goto Thaw; |
579 | 583 | ||
580 | error = hibernation_snapshot(hibernation_mode == HIBERNATION_PLATFORM); | 584 | error = hibernation_snapshot(hibernation_mode == HIBERNATION_PLATFORM); |
581 | if (in_suspend && !error) { | 585 | if (error) |
586 | goto Thaw; | ||
587 | |||
588 | if (in_suspend) { | ||
582 | unsigned int flags = 0; | 589 | unsigned int flags = 0; |
583 | 590 | ||
584 | if (hibernation_mode == HIBERNATION_PLATFORM) | 591 | if (hibernation_mode == HIBERNATION_PLATFORM) |
@@ -590,8 +597,8 @@ int hibernate(void) | |||
590 | power_down(); | 597 | power_down(); |
591 | } else { | 598 | } else { |
592 | pr_debug("PM: Image restored successfully.\n"); | 599 | pr_debug("PM: Image restored successfully.\n"); |
593 | swsusp_free(); | ||
594 | } | 600 | } |
601 | |||
595 | Thaw: | 602 | Thaw: |
596 | thaw_processes(); | 603 | thaw_processes(); |
597 | Finish: | 604 | Finish: |
diff --git a/kernel/power/main.c b/kernel/power/main.c index f710e36930cc..347d2cc88cd0 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c | |||
@@ -11,6 +11,7 @@ | |||
11 | #include <linux/kobject.h> | 11 | #include <linux/kobject.h> |
12 | #include <linux/string.h> | 12 | #include <linux/string.h> |
13 | #include <linux/resume-trace.h> | 13 | #include <linux/resume-trace.h> |
14 | #include <linux/workqueue.h> | ||
14 | 15 | ||
15 | #include "power.h" | 16 | #include "power.h" |
16 | 17 | ||
@@ -217,8 +218,24 @@ static struct attribute_group attr_group = { | |||
217 | .attrs = g, | 218 | .attrs = g, |
218 | }; | 219 | }; |
219 | 220 | ||
221 | #ifdef CONFIG_PM_RUNTIME | ||
222 | struct workqueue_struct *pm_wq; | ||
223 | |||
224 | static int __init pm_start_workqueue(void) | ||
225 | { | ||
226 | pm_wq = create_freezeable_workqueue("pm"); | ||
227 | |||
228 | return pm_wq ? 0 : -ENOMEM; | ||
229 | } | ||
230 | #else | ||
231 | static inline int pm_start_workqueue(void) { return 0; } | ||
232 | #endif | ||
233 | |||
220 | static int __init pm_init(void) | 234 | static int __init pm_init(void) |
221 | { | 235 | { |
236 | int error = pm_start_workqueue(); | ||
237 | if (error) | ||
238 | return error; | ||
222 | power_kobj = kobject_create_and_add("power", NULL); | 239 | power_kobj = kobject_create_and_add("power", NULL); |
223 | if (!power_kobj) | 240 | if (!power_kobj) |
224 | return -ENOMEM; | 241 | return -ENOMEM; |
diff --git a/kernel/power/power.h b/kernel/power/power.h index 26d5a26f82e3..46c5a26630a3 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h | |||
@@ -74,7 +74,7 @@ extern asmlinkage int swsusp_arch_resume(void); | |||
74 | 74 | ||
75 | extern int create_basic_memory_bitmaps(void); | 75 | extern int create_basic_memory_bitmaps(void); |
76 | extern void free_basic_memory_bitmaps(void); | 76 | extern void free_basic_memory_bitmaps(void); |
77 | extern int swsusp_shrink_memory(void); | 77 | extern int hibernate_preallocate_memory(void); |
78 | 78 | ||
79 | /** | 79 | /** |
80 | * Auxiliary structure used for reading the snapshot image data and | 80 | * Auxiliary structure used for reading the snapshot image data and |
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 523a451b45d3..97955b0e44f4 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c | |||
@@ -233,7 +233,7 @@ static void *chain_alloc(struct chain_allocator *ca, unsigned int size) | |||
233 | 233 | ||
234 | #define BM_END_OF_MAP (~0UL) | 234 | #define BM_END_OF_MAP (~0UL) |
235 | 235 | ||
236 | #define BM_BITS_PER_BLOCK (PAGE_SIZE << 3) | 236 | #define BM_BITS_PER_BLOCK (PAGE_SIZE * BITS_PER_BYTE) |
237 | 237 | ||
238 | struct bm_block { | 238 | struct bm_block { |
239 | struct list_head hook; /* hook into a list of bitmap blocks */ | 239 | struct list_head hook; /* hook into a list of bitmap blocks */ |
@@ -275,7 +275,7 @@ static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free); | |||
275 | 275 | ||
276 | /** | 276 | /** |
277 | * create_bm_block_list - create a list of block bitmap objects | 277 | * create_bm_block_list - create a list of block bitmap objects |
278 | * @nr_blocks - number of blocks to allocate | 278 | * @pages - number of pages to track |
279 | * @list - list to put the allocated blocks into | 279 | * @list - list to put the allocated blocks into |
280 | * @ca - chain allocator to be used for allocating memory | 280 | * @ca - chain allocator to be used for allocating memory |
281 | */ | 281 | */ |
@@ -853,7 +853,7 @@ static unsigned int count_highmem_pages(void) | |||
853 | struct zone *zone; | 853 | struct zone *zone; |
854 | unsigned int n = 0; | 854 | unsigned int n = 0; |
855 | 855 | ||
856 | for_each_zone(zone) { | 856 | for_each_populated_zone(zone) { |
857 | unsigned long pfn, max_zone_pfn; | 857 | unsigned long pfn, max_zone_pfn; |
858 | 858 | ||
859 | if (!is_highmem(zone)) | 859 | if (!is_highmem(zone)) |
@@ -916,7 +916,7 @@ static unsigned int count_data_pages(void) | |||
916 | unsigned long pfn, max_zone_pfn; | 916 | unsigned long pfn, max_zone_pfn; |
917 | unsigned int n = 0; | 917 | unsigned int n = 0; |
918 | 918 | ||
919 | for_each_zone(zone) { | 919 | for_each_populated_zone(zone) { |
920 | if (is_highmem(zone)) | 920 | if (is_highmem(zone)) |
921 | continue; | 921 | continue; |
922 | 922 | ||
@@ -1010,7 +1010,7 @@ copy_data_pages(struct memory_bitmap *copy_bm, struct memory_bitmap *orig_bm) | |||
1010 | struct zone *zone; | 1010 | struct zone *zone; |
1011 | unsigned long pfn; | 1011 | unsigned long pfn; |
1012 | 1012 | ||
1013 | for_each_zone(zone) { | 1013 | for_each_populated_zone(zone) { |
1014 | unsigned long max_zone_pfn; | 1014 | unsigned long max_zone_pfn; |
1015 | 1015 | ||
1016 | mark_free_pages(zone); | 1016 | mark_free_pages(zone); |
@@ -1033,6 +1033,25 @@ copy_data_pages(struct memory_bitmap *copy_bm, struct memory_bitmap *orig_bm) | |||
1033 | static unsigned int nr_copy_pages; | 1033 | static unsigned int nr_copy_pages; |
1034 | /* Number of pages needed for saving the original pfns of the image pages */ | 1034 | /* Number of pages needed for saving the original pfns of the image pages */ |
1035 | static unsigned int nr_meta_pages; | 1035 | static unsigned int nr_meta_pages; |
1036 | /* | ||
1037 | * Numbers of normal and highmem page frames allocated for hibernation image | ||
1038 | * before suspending devices. | ||
1039 | */ | ||
1040 | unsigned int alloc_normal, alloc_highmem; | ||
1041 | /* | ||
1042 | * Memory bitmap used for marking saveable pages (during hibernation) or | ||
1043 | * hibernation image pages (during restore) | ||
1044 | */ | ||
1045 | static struct memory_bitmap orig_bm; | ||
1046 | /* | ||
1047 | * Memory bitmap used during hibernation for marking allocated page frames that | ||
1048 | * will contain copies of saveable pages. During restore it is initially used | ||
1049 | * for marking hibernation image pages, but then the set bits from it are | ||
1050 | * duplicated in @orig_bm and it is released. On highmem systems it is next | ||
1051 | * used for marking "safe" highmem pages, but it has to be reinitialized for | ||
1052 | * this purpose. | ||
1053 | */ | ||
1054 | static struct memory_bitmap copy_bm; | ||
1036 | 1055 | ||
1037 | /** | 1056 | /** |
1038 | * swsusp_free - free pages allocated for the suspend. | 1057 | * swsusp_free - free pages allocated for the suspend. |
@@ -1046,7 +1065,7 @@ void swsusp_free(void) | |||
1046 | struct zone *zone; | 1065 | struct zone *zone; |
1047 | unsigned long pfn, max_zone_pfn; | 1066 | unsigned long pfn, max_zone_pfn; |
1048 | 1067 | ||
1049 | for_each_zone(zone) { | 1068 | for_each_populated_zone(zone) { |
1050 | max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages; | 1069 | max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages; |
1051 | for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) | 1070 | for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) |
1052 | if (pfn_valid(pfn)) { | 1071 | if (pfn_valid(pfn)) { |
@@ -1064,74 +1083,286 @@ void swsusp_free(void) | |||
1064 | nr_meta_pages = 0; | 1083 | nr_meta_pages = 0; |
1065 | restore_pblist = NULL; | 1084 | restore_pblist = NULL; |
1066 | buffer = NULL; | 1085 | buffer = NULL; |
1086 | alloc_normal = 0; | ||
1087 | alloc_highmem = 0; | ||
1067 | } | 1088 | } |
1068 | 1089 | ||
1090 | /* Helper functions used for the shrinking of memory. */ | ||
1091 | |||
1092 | #define GFP_IMAGE (GFP_KERNEL | __GFP_NOWARN) | ||
1093 | |||
1069 | /** | 1094 | /** |
1070 | * swsusp_shrink_memory - Try to free as much memory as needed | 1095 | * preallocate_image_pages - Allocate a number of pages for hibernation image |
1071 | * | 1096 | * @nr_pages: Number of page frames to allocate. |
1072 | * ... but do not OOM-kill anyone | 1097 | * @mask: GFP flags to use for the allocation. |
1073 | * | 1098 | * |
1074 | * Notice: all userland should be stopped before it is called, or | 1099 | * Return value: Number of page frames actually allocated |
1075 | * livelock is possible. | 1100 | */ |
1101 | static unsigned long preallocate_image_pages(unsigned long nr_pages, gfp_t mask) | ||
1102 | { | ||
1103 | unsigned long nr_alloc = 0; | ||
1104 | |||
1105 | while (nr_pages > 0) { | ||
1106 | struct page *page; | ||
1107 | |||
1108 | page = alloc_image_page(mask); | ||
1109 | if (!page) | ||
1110 | break; | ||
1111 | memory_bm_set_bit(©_bm, page_to_pfn(page)); | ||
1112 | if (PageHighMem(page)) | ||
1113 | alloc_highmem++; | ||
1114 | else | ||
1115 | alloc_normal++; | ||
1116 | nr_pages--; | ||
1117 | nr_alloc++; | ||
1118 | } | ||
1119 | |||
1120 | return nr_alloc; | ||
1121 | } | ||
1122 | |||
1123 | static unsigned long preallocate_image_memory(unsigned long nr_pages) | ||
1124 | { | ||
1125 | return preallocate_image_pages(nr_pages, GFP_IMAGE); | ||
1126 | } | ||
1127 | |||
1128 | #ifdef CONFIG_HIGHMEM | ||
1129 | static unsigned long preallocate_image_highmem(unsigned long nr_pages) | ||
1130 | { | ||
1131 | return preallocate_image_pages(nr_pages, GFP_IMAGE | __GFP_HIGHMEM); | ||
1132 | } | ||
1133 | |||
1134 | /** | ||
1135 | * __fraction - Compute (an approximation of) x * (multiplier / base) | ||
1076 | */ | 1136 | */ |
1137 | static unsigned long __fraction(u64 x, u64 multiplier, u64 base) | ||
1138 | { | ||
1139 | x *= multiplier; | ||
1140 | do_div(x, base); | ||
1141 | return (unsigned long)x; | ||
1142 | } | ||
1143 | |||
1144 | static unsigned long preallocate_highmem_fraction(unsigned long nr_pages, | ||
1145 | unsigned long highmem, | ||
1146 | unsigned long total) | ||
1147 | { | ||
1148 | unsigned long alloc = __fraction(nr_pages, highmem, total); | ||
1077 | 1149 | ||
1078 | #define SHRINK_BITE 10000 | 1150 | return preallocate_image_pages(alloc, GFP_IMAGE | __GFP_HIGHMEM); |
1079 | static inline unsigned long __shrink_memory(long tmp) | 1151 | } |
1152 | #else /* CONFIG_HIGHMEM */ | ||
1153 | static inline unsigned long preallocate_image_highmem(unsigned long nr_pages) | ||
1080 | { | 1154 | { |
1081 | if (tmp > SHRINK_BITE) | 1155 | return 0; |
1082 | tmp = SHRINK_BITE; | ||
1083 | return shrink_all_memory(tmp); | ||
1084 | } | 1156 | } |
1085 | 1157 | ||
1086 | int swsusp_shrink_memory(void) | 1158 | static inline unsigned long preallocate_highmem_fraction(unsigned long nr_pages, |
1159 | unsigned long highmem, | ||
1160 | unsigned long total) | ||
1161 | { | ||
1162 | return 0; | ||
1163 | } | ||
1164 | #endif /* CONFIG_HIGHMEM */ | ||
1165 | |||
1166 | /** | ||
1167 | * free_unnecessary_pages - Release preallocated pages not needed for the image | ||
1168 | */ | ||
1169 | static void free_unnecessary_pages(void) | ||
1170 | { | ||
1171 | unsigned long save_highmem, to_free_normal, to_free_highmem; | ||
1172 | |||
1173 | to_free_normal = alloc_normal - count_data_pages(); | ||
1174 | save_highmem = count_highmem_pages(); | ||
1175 | if (alloc_highmem > save_highmem) { | ||
1176 | to_free_highmem = alloc_highmem - save_highmem; | ||
1177 | } else { | ||
1178 | to_free_highmem = 0; | ||
1179 | to_free_normal -= save_highmem - alloc_highmem; | ||
1180 | } | ||
1181 | |||
1182 | memory_bm_position_reset(©_bm); | ||
1183 | |||
1184 | while (to_free_normal > 0 && to_free_highmem > 0) { | ||
1185 | unsigned long pfn = memory_bm_next_pfn(©_bm); | ||
1186 | struct page *page = pfn_to_page(pfn); | ||
1187 | |||
1188 | if (PageHighMem(page)) { | ||
1189 | if (!to_free_highmem) | ||
1190 | continue; | ||
1191 | to_free_highmem--; | ||
1192 | alloc_highmem--; | ||
1193 | } else { | ||
1194 | if (!to_free_normal) | ||
1195 | continue; | ||
1196 | to_free_normal--; | ||
1197 | alloc_normal--; | ||
1198 | } | ||
1199 | memory_bm_clear_bit(©_bm, pfn); | ||
1200 | swsusp_unset_page_forbidden(page); | ||
1201 | swsusp_unset_page_free(page); | ||
1202 | __free_page(page); | ||
1203 | } | ||
1204 | } | ||
1205 | |||
1206 | /** | ||
1207 | * minimum_image_size - Estimate the minimum acceptable size of an image | ||
1208 | * @saveable: Number of saveable pages in the system. | ||
1209 | * | ||
1210 | * We want to avoid attempting to free too much memory too hard, so estimate the | ||
1211 | * minimum acceptable size of a hibernation image to use as the lower limit for | ||
1212 | * preallocating memory. | ||
1213 | * | ||
1214 | * We assume that the minimum image size should be proportional to | ||
1215 | * | ||
1216 | * [number of saveable pages] - [number of pages that can be freed in theory] | ||
1217 | * | ||
1218 | * where the second term is the sum of (1) reclaimable slab pages, (2) active | ||
1219 | * and (3) inactive anonymouns pages, (4) active and (5) inactive file pages, | ||
1220 | * minus mapped file pages. | ||
1221 | */ | ||
1222 | static unsigned long minimum_image_size(unsigned long saveable) | ||
1223 | { | ||
1224 | unsigned long size; | ||
1225 | |||
1226 | size = global_page_state(NR_SLAB_RECLAIMABLE) | ||
1227 | + global_page_state(NR_ACTIVE_ANON) | ||
1228 | + global_page_state(NR_INACTIVE_ANON) | ||
1229 | + global_page_state(NR_ACTIVE_FILE) | ||
1230 | + global_page_state(NR_INACTIVE_FILE) | ||
1231 | - global_page_state(NR_FILE_MAPPED); | ||
1232 | |||
1233 | return saveable <= size ? 0 : saveable - size; | ||
1234 | } | ||
1235 | |||
1236 | /** | ||
1237 | * hibernate_preallocate_memory - Preallocate memory for hibernation image | ||
1238 | * | ||
1239 | * To create a hibernation image it is necessary to make a copy of every page | ||
1240 | * frame in use. We also need a number of page frames to be free during | ||
1241 | * hibernation for allocations made while saving the image and for device | ||
1242 | * drivers, in case they need to allocate memory from their hibernation | ||
1243 | * callbacks (these two numbers are given by PAGES_FOR_IO and SPARE_PAGES, | ||
1244 | * respectively, both of which are rough estimates). To make this happen, we | ||
1245 | * compute the total number of available page frames and allocate at least | ||
1246 | * | ||
1247 | * ([page frames total] + PAGES_FOR_IO + [metadata pages]) / 2 + 2 * SPARE_PAGES | ||
1248 | * | ||
1249 | * of them, which corresponds to the maximum size of a hibernation image. | ||
1250 | * | ||
1251 | * If image_size is set below the number following from the above formula, | ||
1252 | * the preallocation of memory is continued until the total number of saveable | ||
1253 | * pages in the system is below the requested image size or the minimum | ||
1254 | * acceptable image size returned by minimum_image_size(), whichever is greater. | ||
1255 | */ | ||
1256 | int hibernate_preallocate_memory(void) | ||
1087 | { | 1257 | { |
1088 | long tmp; | ||
1089 | struct zone *zone; | 1258 | struct zone *zone; |
1090 | unsigned long pages = 0; | 1259 | unsigned long saveable, size, max_size, count, highmem, pages = 0; |
1091 | unsigned int i = 0; | 1260 | unsigned long alloc, save_highmem, pages_highmem; |
1092 | char *p = "-\\|/"; | ||
1093 | struct timeval start, stop; | 1261 | struct timeval start, stop; |
1262 | int error; | ||
1094 | 1263 | ||
1095 | printk(KERN_INFO "PM: Shrinking memory... "); | 1264 | printk(KERN_INFO "PM: Preallocating image memory... "); |
1096 | do_gettimeofday(&start); | 1265 | do_gettimeofday(&start); |
1097 | do { | ||
1098 | long size, highmem_size; | ||
1099 | |||
1100 | highmem_size = count_highmem_pages(); | ||
1101 | size = count_data_pages() + PAGES_FOR_IO + SPARE_PAGES; | ||
1102 | tmp = size; | ||
1103 | size += highmem_size; | ||
1104 | for_each_populated_zone(zone) { | ||
1105 | tmp += snapshot_additional_pages(zone); | ||
1106 | if (is_highmem(zone)) { | ||
1107 | highmem_size -= | ||
1108 | zone_page_state(zone, NR_FREE_PAGES); | ||
1109 | } else { | ||
1110 | tmp -= zone_page_state(zone, NR_FREE_PAGES); | ||
1111 | tmp += zone->lowmem_reserve[ZONE_NORMAL]; | ||
1112 | } | ||
1113 | } | ||
1114 | 1266 | ||
1115 | if (highmem_size < 0) | 1267 | error = memory_bm_create(&orig_bm, GFP_IMAGE, PG_ANY); |
1116 | highmem_size = 0; | 1268 | if (error) |
1269 | goto err_out; | ||
1117 | 1270 | ||
1118 | tmp += highmem_size; | 1271 | error = memory_bm_create(©_bm, GFP_IMAGE, PG_ANY); |
1119 | if (tmp > 0) { | 1272 | if (error) |
1120 | tmp = __shrink_memory(tmp); | 1273 | goto err_out; |
1121 | if (!tmp) | 1274 | |
1122 | return -ENOMEM; | 1275 | alloc_normal = 0; |
1123 | pages += tmp; | 1276 | alloc_highmem = 0; |
1124 | } else if (size > image_size / PAGE_SIZE) { | 1277 | |
1125 | tmp = __shrink_memory(size - (image_size / PAGE_SIZE)); | 1278 | /* Count the number of saveable data pages. */ |
1126 | pages += tmp; | 1279 | save_highmem = count_highmem_pages(); |
1127 | } | 1280 | saveable = count_data_pages(); |
1128 | printk("\b%c", p[i++%4]); | 1281 | |
1129 | } while (tmp > 0); | 1282 | /* |
1283 | * Compute the total number of page frames we can use (count) and the | ||
1284 | * number of pages needed for image metadata (size). | ||
1285 | */ | ||
1286 | count = saveable; | ||
1287 | saveable += save_highmem; | ||
1288 | highmem = save_highmem; | ||
1289 | size = 0; | ||
1290 | for_each_populated_zone(zone) { | ||
1291 | size += snapshot_additional_pages(zone); | ||
1292 | if (is_highmem(zone)) | ||
1293 | highmem += zone_page_state(zone, NR_FREE_PAGES); | ||
1294 | else | ||
1295 | count += zone_page_state(zone, NR_FREE_PAGES); | ||
1296 | } | ||
1297 | count += highmem; | ||
1298 | count -= totalreserve_pages; | ||
1299 | |||
1300 | /* Compute the maximum number of saveable pages to leave in memory. */ | ||
1301 | max_size = (count - (size + PAGES_FOR_IO)) / 2 - 2 * SPARE_PAGES; | ||
1302 | size = DIV_ROUND_UP(image_size, PAGE_SIZE); | ||
1303 | if (size > max_size) | ||
1304 | size = max_size; | ||
1305 | /* | ||
1306 | * If the maximum is not less than the current number of saveable pages | ||
1307 | * in memory, allocate page frames for the image and we're done. | ||
1308 | */ | ||
1309 | if (size >= saveable) { | ||
1310 | pages = preallocate_image_highmem(save_highmem); | ||
1311 | pages += preallocate_image_memory(saveable - pages); | ||
1312 | goto out; | ||
1313 | } | ||
1314 | |||
1315 | /* Estimate the minimum size of the image. */ | ||
1316 | pages = minimum_image_size(saveable); | ||
1317 | if (size < pages) | ||
1318 | size = min_t(unsigned long, pages, max_size); | ||
1319 | |||
1320 | /* | ||
1321 | * Let the memory management subsystem know that we're going to need a | ||
1322 | * large number of page frames to allocate and make it free some memory. | ||
1323 | * NOTE: If this is not done, performance will be hurt badly in some | ||
1324 | * test cases. | ||
1325 | */ | ||
1326 | shrink_all_memory(saveable - size); | ||
1327 | |||
1328 | /* | ||
1329 | * The number of saveable pages in memory was too high, so apply some | ||
1330 | * pressure to decrease it. First, make room for the largest possible | ||
1331 | * image and fail if that doesn't work. Next, try to decrease the size | ||
1332 | * of the image as much as indicated by 'size' using allocations from | ||
1333 | * highmem and non-highmem zones separately. | ||
1334 | */ | ||
1335 | pages_highmem = preallocate_image_highmem(highmem / 2); | ||
1336 | alloc = (count - max_size) - pages_highmem; | ||
1337 | pages = preallocate_image_memory(alloc); | ||
1338 | if (pages < alloc) | ||
1339 | goto err_out; | ||
1340 | size = max_size - size; | ||
1341 | alloc = size; | ||
1342 | size = preallocate_highmem_fraction(size, highmem, count); | ||
1343 | pages_highmem += size; | ||
1344 | alloc -= size; | ||
1345 | pages += preallocate_image_memory(alloc); | ||
1346 | pages += pages_highmem; | ||
1347 | |||
1348 | /* | ||
1349 | * We only need as many page frames for the image as there are saveable | ||
1350 | * pages in memory, but we have allocated more. Release the excessive | ||
1351 | * ones now. | ||
1352 | */ | ||
1353 | free_unnecessary_pages(); | ||
1354 | |||
1355 | out: | ||
1130 | do_gettimeofday(&stop); | 1356 | do_gettimeofday(&stop); |
1131 | printk("\bdone (%lu pages freed)\n", pages); | 1357 | printk(KERN_CONT "done (allocated %lu pages)\n", pages); |
1132 | swsusp_show_speed(&start, &stop, pages, "Freed"); | 1358 | swsusp_show_speed(&start, &stop, pages, "Allocated"); |
1133 | 1359 | ||
1134 | return 0; | 1360 | return 0; |
1361 | |||
1362 | err_out: | ||
1363 | printk(KERN_CONT "\n"); | ||
1364 | swsusp_free(); | ||
1365 | return -ENOMEM; | ||
1135 | } | 1366 | } |
1136 | 1367 | ||
1137 | #ifdef CONFIG_HIGHMEM | 1368 | #ifdef CONFIG_HIGHMEM |
@@ -1142,7 +1373,7 @@ int swsusp_shrink_memory(void) | |||
1142 | 1373 | ||
1143 | static unsigned int count_pages_for_highmem(unsigned int nr_highmem) | 1374 | static unsigned int count_pages_for_highmem(unsigned int nr_highmem) |
1144 | { | 1375 | { |
1145 | unsigned int free_highmem = count_free_highmem_pages(); | 1376 | unsigned int free_highmem = count_free_highmem_pages() + alloc_highmem; |
1146 | 1377 | ||
1147 | if (free_highmem >= nr_highmem) | 1378 | if (free_highmem >= nr_highmem) |
1148 | nr_highmem = 0; | 1379 | nr_highmem = 0; |
@@ -1164,19 +1395,17 @@ count_pages_for_highmem(unsigned int nr_highmem) { return 0; } | |||
1164 | static int enough_free_mem(unsigned int nr_pages, unsigned int nr_highmem) | 1395 | static int enough_free_mem(unsigned int nr_pages, unsigned int nr_highmem) |
1165 | { | 1396 | { |
1166 | struct zone *zone; | 1397 | struct zone *zone; |
1167 | unsigned int free = 0, meta = 0; | 1398 | unsigned int free = alloc_normal; |
1168 | 1399 | ||
1169 | for_each_zone(zone) { | 1400 | for_each_populated_zone(zone) |
1170 | meta += snapshot_additional_pages(zone); | ||
1171 | if (!is_highmem(zone)) | 1401 | if (!is_highmem(zone)) |
1172 | free += zone_page_state(zone, NR_FREE_PAGES); | 1402 | free += zone_page_state(zone, NR_FREE_PAGES); |
1173 | } | ||
1174 | 1403 | ||
1175 | nr_pages += count_pages_for_highmem(nr_highmem); | 1404 | nr_pages += count_pages_for_highmem(nr_highmem); |
1176 | pr_debug("PM: Normal pages needed: %u + %u + %u, available pages: %u\n", | 1405 | pr_debug("PM: Normal pages needed: %u + %u, available pages: %u\n", |
1177 | nr_pages, PAGES_FOR_IO, meta, free); | 1406 | nr_pages, PAGES_FOR_IO, free); |
1178 | 1407 | ||
1179 | return free > nr_pages + PAGES_FOR_IO + meta; | 1408 | return free > nr_pages + PAGES_FOR_IO; |
1180 | } | 1409 | } |
1181 | 1410 | ||
1182 | #ifdef CONFIG_HIGHMEM | 1411 | #ifdef CONFIG_HIGHMEM |
@@ -1198,7 +1427,7 @@ static inline int get_highmem_buffer(int safe_needed) | |||
1198 | */ | 1427 | */ |
1199 | 1428 | ||
1200 | static inline unsigned int | 1429 | static inline unsigned int |
1201 | alloc_highmem_image_pages(struct memory_bitmap *bm, unsigned int nr_highmem) | 1430 | alloc_highmem_pages(struct memory_bitmap *bm, unsigned int nr_highmem) |
1202 | { | 1431 | { |
1203 | unsigned int to_alloc = count_free_highmem_pages(); | 1432 | unsigned int to_alloc = count_free_highmem_pages(); |
1204 | 1433 | ||
@@ -1218,7 +1447,7 @@ alloc_highmem_image_pages(struct memory_bitmap *bm, unsigned int nr_highmem) | |||
1218 | static inline int get_highmem_buffer(int safe_needed) { return 0; } | 1447 | static inline int get_highmem_buffer(int safe_needed) { return 0; } |
1219 | 1448 | ||
1220 | static inline unsigned int | 1449 | static inline unsigned int |
1221 | alloc_highmem_image_pages(struct memory_bitmap *bm, unsigned int n) { return 0; } | 1450 | alloc_highmem_pages(struct memory_bitmap *bm, unsigned int n) { return 0; } |
1222 | #endif /* CONFIG_HIGHMEM */ | 1451 | #endif /* CONFIG_HIGHMEM */ |
1223 | 1452 | ||
1224 | /** | 1453 | /** |
@@ -1237,51 +1466,36 @@ static int | |||
1237 | swsusp_alloc(struct memory_bitmap *orig_bm, struct memory_bitmap *copy_bm, | 1466 | swsusp_alloc(struct memory_bitmap *orig_bm, struct memory_bitmap *copy_bm, |
1238 | unsigned int nr_pages, unsigned int nr_highmem) | 1467 | unsigned int nr_pages, unsigned int nr_highmem) |
1239 | { | 1468 | { |
1240 | int error; | 1469 | int error = 0; |
1241 | |||
1242 | error = memory_bm_create(orig_bm, GFP_ATOMIC | __GFP_COLD, PG_ANY); | ||
1243 | if (error) | ||
1244 | goto Free; | ||
1245 | |||
1246 | error = memory_bm_create(copy_bm, GFP_ATOMIC | __GFP_COLD, PG_ANY); | ||
1247 | if (error) | ||
1248 | goto Free; | ||
1249 | 1470 | ||
1250 | if (nr_highmem > 0) { | 1471 | if (nr_highmem > 0) { |
1251 | error = get_highmem_buffer(PG_ANY); | 1472 | error = get_highmem_buffer(PG_ANY); |
1252 | if (error) | 1473 | if (error) |
1253 | goto Free; | 1474 | goto err_out; |
1254 | 1475 | if (nr_highmem > alloc_highmem) { | |
1255 | nr_pages += alloc_highmem_image_pages(copy_bm, nr_highmem); | 1476 | nr_highmem -= alloc_highmem; |
1477 | nr_pages += alloc_highmem_pages(copy_bm, nr_highmem); | ||
1478 | } | ||
1256 | } | 1479 | } |
1257 | while (nr_pages-- > 0) { | 1480 | if (nr_pages > alloc_normal) { |
1258 | struct page *page = alloc_image_page(GFP_ATOMIC | __GFP_COLD); | 1481 | nr_pages -= alloc_normal; |
1259 | 1482 | while (nr_pages-- > 0) { | |
1260 | if (!page) | 1483 | struct page *page; |
1261 | goto Free; | ||
1262 | 1484 | ||
1263 | memory_bm_set_bit(copy_bm, page_to_pfn(page)); | 1485 | page = alloc_image_page(GFP_ATOMIC | __GFP_COLD); |
1486 | if (!page) | ||
1487 | goto err_out; | ||
1488 | memory_bm_set_bit(copy_bm, page_to_pfn(page)); | ||
1489 | } | ||
1264 | } | 1490 | } |
1491 | |||
1265 | return 0; | 1492 | return 0; |
1266 | 1493 | ||
1267 | Free: | 1494 | err_out: |
1268 | swsusp_free(); | 1495 | swsusp_free(); |
1269 | return -ENOMEM; | 1496 | return error; |
1270 | } | 1497 | } |
1271 | 1498 | ||
1272 | /* Memory bitmap used for marking saveable pages (during suspend) or the | ||
1273 | * suspend image pages (during resume) | ||
1274 | */ | ||
1275 | static struct memory_bitmap orig_bm; | ||
1276 | /* Memory bitmap used on suspend for marking allocated pages that will contain | ||
1277 | * the copies of saveable pages. During resume it is initially used for | ||
1278 | * marking the suspend image pages, but then its set bits are duplicated in | ||
1279 | * @orig_bm and it is released. Next, on systems with high memory, it may be | ||
1280 | * used for marking "safe" highmem pages, but it has to be reinitialized for | ||
1281 | * this purpose. | ||
1282 | */ | ||
1283 | static struct memory_bitmap copy_bm; | ||
1284 | |||
1285 | asmlinkage int swsusp_save(void) | 1499 | asmlinkage int swsusp_save(void) |
1286 | { | 1500 | { |
1287 | unsigned int nr_pages, nr_highmem; | 1501 | unsigned int nr_pages, nr_highmem; |
@@ -1474,7 +1688,7 @@ static int mark_unsafe_pages(struct memory_bitmap *bm) | |||
1474 | unsigned long pfn, max_zone_pfn; | 1688 | unsigned long pfn, max_zone_pfn; |
1475 | 1689 | ||
1476 | /* Clear page flags */ | 1690 | /* Clear page flags */ |
1477 | for_each_zone(zone) { | 1691 | for_each_populated_zone(zone) { |
1478 | max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages; | 1692 | max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages; |
1479 | for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) | 1693 | for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) |
1480 | if (pfn_valid(pfn)) | 1694 | if (pfn_valid(pfn)) |
diff --git a/kernel/printk.c b/kernel/printk.c index e10d193a833a..602033acd6c7 100644 --- a/kernel/printk.c +++ b/kernel/printk.c | |||
@@ -1075,12 +1075,6 @@ void __sched console_conditional_schedule(void) | |||
1075 | } | 1075 | } |
1076 | EXPORT_SYMBOL(console_conditional_schedule); | 1076 | EXPORT_SYMBOL(console_conditional_schedule); |
1077 | 1077 | ||
1078 | void console_print(const char *s) | ||
1079 | { | ||
1080 | printk(KERN_EMERG "%s", s); | ||
1081 | } | ||
1082 | EXPORT_SYMBOL(console_print); | ||
1083 | |||
1084 | void console_unblank(void) | 1078 | void console_unblank(void) |
1085 | { | 1079 | { |
1086 | struct console *c; | 1080 | struct console *c; |
diff --git a/kernel/sched.c b/kernel/sched.c index e27a53685ed9..faf4d463bbff 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -119,8 +119,6 @@ | |||
119 | */ | 119 | */ |
120 | #define RUNTIME_INF ((u64)~0ULL) | 120 | #define RUNTIME_INF ((u64)~0ULL) |
121 | 121 | ||
122 | static void double_rq_lock(struct rq *rq1, struct rq *rq2); | ||
123 | |||
124 | static inline int rt_policy(int policy) | 122 | static inline int rt_policy(int policy) |
125 | { | 123 | { |
126 | if (unlikely(policy == SCHED_FIFO || policy == SCHED_RR)) | 124 | if (unlikely(policy == SCHED_FIFO || policy == SCHED_RR)) |
@@ -295,12 +293,12 @@ struct task_group root_task_group; | |||
295 | /* Default task group's sched entity on each cpu */ | 293 | /* Default task group's sched entity on each cpu */ |
296 | static DEFINE_PER_CPU(struct sched_entity, init_sched_entity); | 294 | static DEFINE_PER_CPU(struct sched_entity, init_sched_entity); |
297 | /* Default task group's cfs_rq on each cpu */ | 295 | /* Default task group's cfs_rq on each cpu */ |
298 | static DEFINE_PER_CPU(struct cfs_rq, init_tg_cfs_rq) ____cacheline_aligned_in_smp; | 296 | static DEFINE_PER_CPU_SHARED_ALIGNED(struct cfs_rq, init_tg_cfs_rq); |
299 | #endif /* CONFIG_FAIR_GROUP_SCHED */ | 297 | #endif /* CONFIG_FAIR_GROUP_SCHED */ |
300 | 298 | ||
301 | #ifdef CONFIG_RT_GROUP_SCHED | 299 | #ifdef CONFIG_RT_GROUP_SCHED |
302 | static DEFINE_PER_CPU(struct sched_rt_entity, init_sched_rt_entity); | 300 | static DEFINE_PER_CPU(struct sched_rt_entity, init_sched_rt_entity); |
303 | static DEFINE_PER_CPU(struct rt_rq, init_rt_rq) ____cacheline_aligned_in_smp; | 301 | static DEFINE_PER_CPU_SHARED_ALIGNED(struct rt_rq, init_rt_rq); |
304 | #endif /* CONFIG_RT_GROUP_SCHED */ | 302 | #endif /* CONFIG_RT_GROUP_SCHED */ |
305 | #else /* !CONFIG_USER_SCHED */ | 303 | #else /* !CONFIG_USER_SCHED */ |
306 | #define root_task_group init_task_group | 304 | #define root_task_group init_task_group |
@@ -378,13 +376,6 @@ static inline void set_task_rq(struct task_struct *p, unsigned int cpu) | |||
378 | 376 | ||
379 | #else | 377 | #else |
380 | 378 | ||
381 | #ifdef CONFIG_SMP | ||
382 | static int root_task_group_empty(void) | ||
383 | { | ||
384 | return 1; | ||
385 | } | ||
386 | #endif | ||
387 | |||
388 | static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { } | 379 | static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { } |
389 | static inline struct task_group *task_group(struct task_struct *p) | 380 | static inline struct task_group *task_group(struct task_struct *p) |
390 | { | 381 | { |
@@ -514,14 +505,6 @@ struct root_domain { | |||
514 | #ifdef CONFIG_SMP | 505 | #ifdef CONFIG_SMP |
515 | struct cpupri cpupri; | 506 | struct cpupri cpupri; |
516 | #endif | 507 | #endif |
517 | #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) | ||
518 | /* | ||
519 | * Preferred wake up cpu nominated by sched_mc balance that will be | ||
520 | * used when most cpus are idle in the system indicating overall very | ||
521 | * low system utilisation. Triggered at POWERSAVINGS_BALANCE_WAKEUP(2) | ||
522 | */ | ||
523 | unsigned int sched_mc_preferred_wakeup_cpu; | ||
524 | #endif | ||
525 | }; | 508 | }; |
526 | 509 | ||
527 | /* | 510 | /* |
@@ -646,9 +629,10 @@ struct rq { | |||
646 | 629 | ||
647 | static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); | 630 | static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); |
648 | 631 | ||
649 | static inline void check_preempt_curr(struct rq *rq, struct task_struct *p, int sync) | 632 | static inline |
633 | void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags) | ||
650 | { | 634 | { |
651 | rq->curr->sched_class->check_preempt_curr(rq, p, sync); | 635 | rq->curr->sched_class->check_preempt_curr(rq, p, flags); |
652 | } | 636 | } |
653 | 637 | ||
654 | static inline int cpu_of(struct rq *rq) | 638 | static inline int cpu_of(struct rq *rq) |
@@ -1509,8 +1493,65 @@ static int tg_nop(struct task_group *tg, void *data) | |||
1509 | #endif | 1493 | #endif |
1510 | 1494 | ||
1511 | #ifdef CONFIG_SMP | 1495 | #ifdef CONFIG_SMP |
1512 | static unsigned long source_load(int cpu, int type); | 1496 | /* Used instead of source_load when we know the type == 0 */ |
1513 | static unsigned long target_load(int cpu, int type); | 1497 | static unsigned long weighted_cpuload(const int cpu) |
1498 | { | ||
1499 | return cpu_rq(cpu)->load.weight; | ||
1500 | } | ||
1501 | |||
1502 | /* | ||
1503 | * Return a low guess at the load of a migration-source cpu weighted | ||
1504 | * according to the scheduling class and "nice" value. | ||
1505 | * | ||
1506 | * We want to under-estimate the load of migration sources, to | ||
1507 | * balance conservatively. | ||
1508 | */ | ||
1509 | static unsigned long source_load(int cpu, int type) | ||
1510 | { | ||
1511 | struct rq *rq = cpu_rq(cpu); | ||
1512 | unsigned long total = weighted_cpuload(cpu); | ||
1513 | |||
1514 | if (type == 0 || !sched_feat(LB_BIAS)) | ||
1515 | return total; | ||
1516 | |||
1517 | return min(rq->cpu_load[type-1], total); | ||
1518 | } | ||
1519 | |||
1520 | /* | ||
1521 | * Return a high guess at the load of a migration-target cpu weighted | ||
1522 | * according to the scheduling class and "nice" value. | ||
1523 | */ | ||
1524 | static unsigned long target_load(int cpu, int type) | ||
1525 | { | ||
1526 | struct rq *rq = cpu_rq(cpu); | ||
1527 | unsigned long total = weighted_cpuload(cpu); | ||
1528 | |||
1529 | if (type == 0 || !sched_feat(LB_BIAS)) | ||
1530 | return total; | ||
1531 | |||
1532 | return max(rq->cpu_load[type-1], total); | ||
1533 | } | ||
1534 | |||
1535 | static struct sched_group *group_of(int cpu) | ||
1536 | { | ||
1537 | struct sched_domain *sd = rcu_dereference(cpu_rq(cpu)->sd); | ||
1538 | |||
1539 | if (!sd) | ||
1540 | return NULL; | ||
1541 | |||
1542 | return sd->groups; | ||
1543 | } | ||
1544 | |||
1545 | static unsigned long power_of(int cpu) | ||
1546 | { | ||
1547 | struct sched_group *group = group_of(cpu); | ||
1548 | |||
1549 | if (!group) | ||
1550 | return SCHED_LOAD_SCALE; | ||
1551 | |||
1552 | return group->cpu_power; | ||
1553 | } | ||
1554 | |||
1514 | static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd); | 1555 | static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd); |
1515 | 1556 | ||
1516 | static unsigned long cpu_avg_load_per_task(int cpu) | 1557 | static unsigned long cpu_avg_load_per_task(int cpu) |
@@ -1695,6 +1736,8 @@ static inline void update_shares_locked(struct rq *rq, struct sched_domain *sd) | |||
1695 | 1736 | ||
1696 | #ifdef CONFIG_PREEMPT | 1737 | #ifdef CONFIG_PREEMPT |
1697 | 1738 | ||
1739 | static void double_rq_lock(struct rq *rq1, struct rq *rq2); | ||
1740 | |||
1698 | /* | 1741 | /* |
1699 | * fair double_lock_balance: Safely acquires both rq->locks in a fair | 1742 | * fair double_lock_balance: Safely acquires both rq->locks in a fair |
1700 | * way at the expense of forcing extra atomic operations in all | 1743 | * way at the expense of forcing extra atomic operations in all |
@@ -1959,13 +2002,6 @@ static inline void check_class_changed(struct rq *rq, struct task_struct *p, | |||
1959 | } | 2002 | } |
1960 | 2003 | ||
1961 | #ifdef CONFIG_SMP | 2004 | #ifdef CONFIG_SMP |
1962 | |||
1963 | /* Used instead of source_load when we know the type == 0 */ | ||
1964 | static unsigned long weighted_cpuload(const int cpu) | ||
1965 | { | ||
1966 | return cpu_rq(cpu)->load.weight; | ||
1967 | } | ||
1968 | |||
1969 | /* | 2005 | /* |
1970 | * Is this task likely cache-hot: | 2006 | * Is this task likely cache-hot: |
1971 | */ | 2007 | */ |
@@ -2239,185 +2275,6 @@ void kick_process(struct task_struct *p) | |||
2239 | preempt_enable(); | 2275 | preempt_enable(); |
2240 | } | 2276 | } |
2241 | EXPORT_SYMBOL_GPL(kick_process); | 2277 | EXPORT_SYMBOL_GPL(kick_process); |
2242 | |||
2243 | /* | ||
2244 | * Return a low guess at the load of a migration-source cpu weighted | ||
2245 | * according to the scheduling class and "nice" value. | ||
2246 | * | ||
2247 | * We want to under-estimate the load of migration sources, to | ||
2248 | * balance conservatively. | ||
2249 | */ | ||
2250 | static unsigned long source_load(int cpu, int type) | ||
2251 | { | ||
2252 | struct rq *rq = cpu_rq(cpu); | ||
2253 | unsigned long total = weighted_cpuload(cpu); | ||
2254 | |||
2255 | if (type == 0 || !sched_feat(LB_BIAS)) | ||
2256 | return total; | ||
2257 | |||
2258 | return min(rq->cpu_load[type-1], total); | ||
2259 | } | ||
2260 | |||
2261 | /* | ||
2262 | * Return a high guess at the load of a migration-target cpu weighted | ||
2263 | * according to the scheduling class and "nice" value. | ||
2264 | */ | ||
2265 | static unsigned long target_load(int cpu, int type) | ||
2266 | { | ||
2267 | struct rq *rq = cpu_rq(cpu); | ||
2268 | unsigned long total = weighted_cpuload(cpu); | ||
2269 | |||
2270 | if (type == 0 || !sched_feat(LB_BIAS)) | ||
2271 | return total; | ||
2272 | |||
2273 | return max(rq->cpu_load[type-1], total); | ||
2274 | } | ||
2275 | |||
2276 | /* | ||
2277 | * find_idlest_group finds and returns the least busy CPU group within the | ||
2278 | * domain. | ||
2279 | */ | ||
2280 | static struct sched_group * | ||
2281 | find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu) | ||
2282 | { | ||
2283 | struct sched_group *idlest = NULL, *this = NULL, *group = sd->groups; | ||
2284 | unsigned long min_load = ULONG_MAX, this_load = 0; | ||
2285 | int load_idx = sd->forkexec_idx; | ||
2286 | int imbalance = 100 + (sd->imbalance_pct-100)/2; | ||
2287 | |||
2288 | do { | ||
2289 | unsigned long load, avg_load; | ||
2290 | int local_group; | ||
2291 | int i; | ||
2292 | |||
2293 | /* Skip over this group if it has no CPUs allowed */ | ||
2294 | if (!cpumask_intersects(sched_group_cpus(group), | ||
2295 | &p->cpus_allowed)) | ||
2296 | continue; | ||
2297 | |||
2298 | local_group = cpumask_test_cpu(this_cpu, | ||
2299 | sched_group_cpus(group)); | ||
2300 | |||
2301 | /* Tally up the load of all CPUs in the group */ | ||
2302 | avg_load = 0; | ||
2303 | |||
2304 | for_each_cpu(i, sched_group_cpus(group)) { | ||
2305 | /* Bias balancing toward cpus of our domain */ | ||
2306 | if (local_group) | ||
2307 | load = source_load(i, load_idx); | ||
2308 | else | ||
2309 | load = target_load(i, load_idx); | ||
2310 | |||
2311 | avg_load += load; | ||
2312 | } | ||
2313 | |||
2314 | /* Adjust by relative CPU power of the group */ | ||
2315 | avg_load = (avg_load * SCHED_LOAD_SCALE) / group->cpu_power; | ||
2316 | |||
2317 | if (local_group) { | ||
2318 | this_load = avg_load; | ||
2319 | this = group; | ||
2320 | } else if (avg_load < min_load) { | ||
2321 | min_load = avg_load; | ||
2322 | idlest = group; | ||
2323 | } | ||
2324 | } while (group = group->next, group != sd->groups); | ||
2325 | |||
2326 | if (!idlest || 100*this_load < imbalance*min_load) | ||
2327 | return NULL; | ||
2328 | return idlest; | ||
2329 | } | ||
2330 | |||
2331 | /* | ||
2332 | * find_idlest_cpu - find the idlest cpu among the cpus in group. | ||
2333 | */ | ||
2334 | static int | ||
2335 | find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu) | ||
2336 | { | ||
2337 | unsigned long load, min_load = ULONG_MAX; | ||
2338 | int idlest = -1; | ||
2339 | int i; | ||
2340 | |||
2341 | /* Traverse only the allowed CPUs */ | ||
2342 | for_each_cpu_and(i, sched_group_cpus(group), &p->cpus_allowed) { | ||
2343 | load = weighted_cpuload(i); | ||
2344 | |||
2345 | if (load < min_load || (load == min_load && i == this_cpu)) { | ||
2346 | min_load = load; | ||
2347 | idlest = i; | ||
2348 | } | ||
2349 | } | ||
2350 | |||
2351 | return idlest; | ||
2352 | } | ||
2353 | |||
2354 | /* | ||
2355 | * sched_balance_self: balance the current task (running on cpu) in domains | ||
2356 | * that have the 'flag' flag set. In practice, this is SD_BALANCE_FORK and | ||
2357 | * SD_BALANCE_EXEC. | ||
2358 | * | ||
2359 | * Balance, ie. select the least loaded group. | ||
2360 | * | ||
2361 | * Returns the target CPU number, or the same CPU if no balancing is needed. | ||
2362 | * | ||
2363 | * preempt must be disabled. | ||
2364 | */ | ||
2365 | static int sched_balance_self(int cpu, int flag) | ||
2366 | { | ||
2367 | struct task_struct *t = current; | ||
2368 | struct sched_domain *tmp, *sd = NULL; | ||
2369 | |||
2370 | for_each_domain(cpu, tmp) { | ||
2371 | /* | ||
2372 | * If power savings logic is enabled for a domain, stop there. | ||
2373 | */ | ||
2374 | if (tmp->flags & SD_POWERSAVINGS_BALANCE) | ||
2375 | break; | ||
2376 | if (tmp->flags & flag) | ||
2377 | sd = tmp; | ||
2378 | } | ||
2379 | |||
2380 | if (sd) | ||
2381 | update_shares(sd); | ||
2382 | |||
2383 | while (sd) { | ||
2384 | struct sched_group *group; | ||
2385 | int new_cpu, weight; | ||
2386 | |||
2387 | if (!(sd->flags & flag)) { | ||
2388 | sd = sd->child; | ||
2389 | continue; | ||
2390 | } | ||
2391 | |||
2392 | group = find_idlest_group(sd, t, cpu); | ||
2393 | if (!group) { | ||
2394 | sd = sd->child; | ||
2395 | continue; | ||
2396 | } | ||
2397 | |||
2398 | new_cpu = find_idlest_cpu(group, t, cpu); | ||
2399 | if (new_cpu == -1 || new_cpu == cpu) { | ||
2400 | /* Now try balancing at a lower domain level of cpu */ | ||
2401 | sd = sd->child; | ||
2402 | continue; | ||
2403 | } | ||
2404 | |||
2405 | /* Now try balancing at a lower domain level of new_cpu */ | ||
2406 | cpu = new_cpu; | ||
2407 | weight = cpumask_weight(sched_domain_span(sd)); | ||
2408 | sd = NULL; | ||
2409 | for_each_domain(cpu, tmp) { | ||
2410 | if (weight <= cpumask_weight(sched_domain_span(tmp))) | ||
2411 | break; | ||
2412 | if (tmp->flags & flag) | ||
2413 | sd = tmp; | ||
2414 | } | ||
2415 | /* while loop will break here if sd == NULL */ | ||
2416 | } | ||
2417 | |||
2418 | return cpu; | ||
2419 | } | ||
2420 | |||
2421 | #endif /* CONFIG_SMP */ | 2278 | #endif /* CONFIG_SMP */ |
2422 | 2279 | ||
2423 | /** | 2280 | /** |
@@ -2455,37 +2312,22 @@ void task_oncpu_function_call(struct task_struct *p, | |||
2455 | * | 2312 | * |
2456 | * returns failure only if the task is already active. | 2313 | * returns failure only if the task is already active. |
2457 | */ | 2314 | */ |
2458 | static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync) | 2315 | static int try_to_wake_up(struct task_struct *p, unsigned int state, |
2316 | int wake_flags) | ||
2459 | { | 2317 | { |
2460 | int cpu, orig_cpu, this_cpu, success = 0; | 2318 | int cpu, orig_cpu, this_cpu, success = 0; |
2461 | unsigned long flags; | 2319 | unsigned long flags; |
2462 | long old_state; | ||
2463 | struct rq *rq; | 2320 | struct rq *rq; |
2464 | 2321 | ||
2465 | if (!sched_feat(SYNC_WAKEUPS)) | 2322 | if (!sched_feat(SYNC_WAKEUPS)) |
2466 | sync = 0; | 2323 | wake_flags &= ~WF_SYNC; |
2467 | |||
2468 | #ifdef CONFIG_SMP | ||
2469 | if (sched_feat(LB_WAKEUP_UPDATE) && !root_task_group_empty()) { | ||
2470 | struct sched_domain *sd; | ||
2471 | 2324 | ||
2472 | this_cpu = raw_smp_processor_id(); | 2325 | this_cpu = get_cpu(); |
2473 | cpu = task_cpu(p); | ||
2474 | |||
2475 | for_each_domain(this_cpu, sd) { | ||
2476 | if (cpumask_test_cpu(cpu, sched_domain_span(sd))) { | ||
2477 | update_shares(sd); | ||
2478 | break; | ||
2479 | } | ||
2480 | } | ||
2481 | } | ||
2482 | #endif | ||
2483 | 2326 | ||
2484 | smp_wmb(); | 2327 | smp_wmb(); |
2485 | rq = task_rq_lock(p, &flags); | 2328 | rq = task_rq_lock(p, &flags); |
2486 | update_rq_clock(rq); | 2329 | update_rq_clock(rq); |
2487 | old_state = p->state; | 2330 | if (!(p->state & state)) |
2488 | if (!(old_state & state)) | ||
2489 | goto out; | 2331 | goto out; |
2490 | 2332 | ||
2491 | if (p->se.on_rq) | 2333 | if (p->se.on_rq) |
@@ -2493,27 +2335,29 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync) | |||
2493 | 2335 | ||
2494 | cpu = task_cpu(p); | 2336 | cpu = task_cpu(p); |
2495 | orig_cpu = cpu; | 2337 | orig_cpu = cpu; |
2496 | this_cpu = smp_processor_id(); | ||
2497 | 2338 | ||
2498 | #ifdef CONFIG_SMP | 2339 | #ifdef CONFIG_SMP |
2499 | if (unlikely(task_running(rq, p))) | 2340 | if (unlikely(task_running(rq, p))) |
2500 | goto out_activate; | 2341 | goto out_activate; |
2501 | 2342 | ||
2502 | cpu = p->sched_class->select_task_rq(p, sync); | 2343 | /* |
2503 | if (cpu != orig_cpu) { | 2344 | * In order to handle concurrent wakeups and release the rq->lock |
2345 | * we put the task in TASK_WAKING state. | ||
2346 | * | ||
2347 | * First fix up the nr_uninterruptible count: | ||
2348 | */ | ||
2349 | if (task_contributes_to_load(p)) | ||
2350 | rq->nr_uninterruptible--; | ||
2351 | p->state = TASK_WAKING; | ||
2352 | task_rq_unlock(rq, &flags); | ||
2353 | |||
2354 | cpu = p->sched_class->select_task_rq(p, SD_BALANCE_WAKE, wake_flags); | ||
2355 | if (cpu != orig_cpu) | ||
2504 | set_task_cpu(p, cpu); | 2356 | set_task_cpu(p, cpu); |
2505 | task_rq_unlock(rq, &flags); | ||
2506 | /* might preempt at this point */ | ||
2507 | rq = task_rq_lock(p, &flags); | ||
2508 | old_state = p->state; | ||
2509 | if (!(old_state & state)) | ||
2510 | goto out; | ||
2511 | if (p->se.on_rq) | ||
2512 | goto out_running; | ||
2513 | 2357 | ||
2514 | this_cpu = smp_processor_id(); | 2358 | rq = task_rq_lock(p, &flags); |
2515 | cpu = task_cpu(p); | 2359 | WARN_ON(p->state != TASK_WAKING); |
2516 | } | 2360 | cpu = task_cpu(p); |
2517 | 2361 | ||
2518 | #ifdef CONFIG_SCHEDSTATS | 2362 | #ifdef CONFIG_SCHEDSTATS |
2519 | schedstat_inc(rq, ttwu_count); | 2363 | schedstat_inc(rq, ttwu_count); |
@@ -2533,7 +2377,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync) | |||
2533 | out_activate: | 2377 | out_activate: |
2534 | #endif /* CONFIG_SMP */ | 2378 | #endif /* CONFIG_SMP */ |
2535 | schedstat_inc(p, se.nr_wakeups); | 2379 | schedstat_inc(p, se.nr_wakeups); |
2536 | if (sync) | 2380 | if (wake_flags & WF_SYNC) |
2537 | schedstat_inc(p, se.nr_wakeups_sync); | 2381 | schedstat_inc(p, se.nr_wakeups_sync); |
2538 | if (orig_cpu != cpu) | 2382 | if (orig_cpu != cpu) |
2539 | schedstat_inc(p, se.nr_wakeups_migrate); | 2383 | schedstat_inc(p, se.nr_wakeups_migrate); |
@@ -2562,7 +2406,7 @@ out_activate: | |||
2562 | 2406 | ||
2563 | out_running: | 2407 | out_running: |
2564 | trace_sched_wakeup(rq, p, success); | 2408 | trace_sched_wakeup(rq, p, success); |
2565 | check_preempt_curr(rq, p, sync); | 2409 | check_preempt_curr(rq, p, wake_flags); |
2566 | 2410 | ||
2567 | p->state = TASK_RUNNING; | 2411 | p->state = TASK_RUNNING; |
2568 | #ifdef CONFIG_SMP | 2412 | #ifdef CONFIG_SMP |
@@ -2571,6 +2415,7 @@ out_running: | |||
2571 | #endif | 2415 | #endif |
2572 | out: | 2416 | out: |
2573 | task_rq_unlock(rq, &flags); | 2417 | task_rq_unlock(rq, &flags); |
2418 | put_cpu(); | ||
2574 | 2419 | ||
2575 | return success; | 2420 | return success; |
2576 | } | 2421 | } |
@@ -2613,6 +2458,7 @@ static void __sched_fork(struct task_struct *p) | |||
2613 | p->se.avg_overlap = 0; | 2458 | p->se.avg_overlap = 0; |
2614 | p->se.start_runtime = 0; | 2459 | p->se.start_runtime = 0; |
2615 | p->se.avg_wakeup = sysctl_sched_wakeup_granularity; | 2460 | p->se.avg_wakeup = sysctl_sched_wakeup_granularity; |
2461 | p->se.avg_running = 0; | ||
2616 | 2462 | ||
2617 | #ifdef CONFIG_SCHEDSTATS | 2463 | #ifdef CONFIG_SCHEDSTATS |
2618 | p->se.wait_start = 0; | 2464 | p->se.wait_start = 0; |
@@ -2674,11 +2520,6 @@ void sched_fork(struct task_struct *p, int clone_flags) | |||
2674 | 2520 | ||
2675 | __sched_fork(p); | 2521 | __sched_fork(p); |
2676 | 2522 | ||
2677 | #ifdef CONFIG_SMP | ||
2678 | cpu = sched_balance_self(cpu, SD_BALANCE_FORK); | ||
2679 | #endif | ||
2680 | set_task_cpu(p, cpu); | ||
2681 | |||
2682 | /* | 2523 | /* |
2683 | * Make sure we do not leak PI boosting priority to the child. | 2524 | * Make sure we do not leak PI boosting priority to the child. |
2684 | */ | 2525 | */ |
@@ -2709,6 +2550,11 @@ void sched_fork(struct task_struct *p, int clone_flags) | |||
2709 | if (!rt_prio(p->prio)) | 2550 | if (!rt_prio(p->prio)) |
2710 | p->sched_class = &fair_sched_class; | 2551 | p->sched_class = &fair_sched_class; |
2711 | 2552 | ||
2553 | #ifdef CONFIG_SMP | ||
2554 | cpu = p->sched_class->select_task_rq(p, SD_BALANCE_FORK, 0); | ||
2555 | #endif | ||
2556 | set_task_cpu(p, cpu); | ||
2557 | |||
2712 | #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) | 2558 | #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) |
2713 | if (likely(sched_info_on())) | 2559 | if (likely(sched_info_on())) |
2714 | memset(&p->sched_info, 0, sizeof(p->sched_info)); | 2560 | memset(&p->sched_info, 0, sizeof(p->sched_info)); |
@@ -2754,7 +2600,7 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags) | |||
2754 | inc_nr_running(rq); | 2600 | inc_nr_running(rq); |
2755 | } | 2601 | } |
2756 | trace_sched_wakeup_new(rq, p, 1); | 2602 | trace_sched_wakeup_new(rq, p, 1); |
2757 | check_preempt_curr(rq, p, 0); | 2603 | check_preempt_curr(rq, p, WF_FORK); |
2758 | #ifdef CONFIG_SMP | 2604 | #ifdef CONFIG_SMP |
2759 | if (p->sched_class->task_wake_up) | 2605 | if (p->sched_class->task_wake_up) |
2760 | p->sched_class->task_wake_up(rq, p); | 2606 | p->sched_class->task_wake_up(rq, p); |
@@ -3263,7 +3109,7 @@ out: | |||
3263 | void sched_exec(void) | 3109 | void sched_exec(void) |
3264 | { | 3110 | { |
3265 | int new_cpu, this_cpu = get_cpu(); | 3111 | int new_cpu, this_cpu = get_cpu(); |
3266 | new_cpu = sched_balance_self(this_cpu, SD_BALANCE_EXEC); | 3112 | new_cpu = current->sched_class->select_task_rq(current, SD_BALANCE_EXEC, 0); |
3267 | put_cpu(); | 3113 | put_cpu(); |
3268 | if (new_cpu != this_cpu) | 3114 | if (new_cpu != this_cpu) |
3269 | sched_migrate_task(current, new_cpu); | 3115 | sched_migrate_task(current, new_cpu); |
@@ -3683,11 +3529,6 @@ static inline int check_power_save_busiest_group(struct sd_lb_stats *sds, | |||
3683 | *imbalance = sds->min_load_per_task; | 3529 | *imbalance = sds->min_load_per_task; |
3684 | sds->busiest = sds->group_min; | 3530 | sds->busiest = sds->group_min; |
3685 | 3531 | ||
3686 | if (sched_mc_power_savings >= POWERSAVINGS_BALANCE_WAKEUP) { | ||
3687 | cpu_rq(this_cpu)->rd->sched_mc_preferred_wakeup_cpu = | ||
3688 | group_first_cpu(sds->group_leader); | ||
3689 | } | ||
3690 | |||
3691 | return 1; | 3532 | return 1; |
3692 | 3533 | ||
3693 | } | 3534 | } |
@@ -3711,7 +3552,18 @@ static inline int check_power_save_busiest_group(struct sd_lb_stats *sds, | |||
3711 | } | 3552 | } |
3712 | #endif /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */ | 3553 | #endif /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */ |
3713 | 3554 | ||
3714 | unsigned long __weak arch_scale_smt_power(struct sched_domain *sd, int cpu) | 3555 | |
3556 | unsigned long default_scale_freq_power(struct sched_domain *sd, int cpu) | ||
3557 | { | ||
3558 | return SCHED_LOAD_SCALE; | ||
3559 | } | ||
3560 | |||
3561 | unsigned long __weak arch_scale_freq_power(struct sched_domain *sd, int cpu) | ||
3562 | { | ||
3563 | return default_scale_freq_power(sd, cpu); | ||
3564 | } | ||
3565 | |||
3566 | unsigned long default_scale_smt_power(struct sched_domain *sd, int cpu) | ||
3715 | { | 3567 | { |
3716 | unsigned long weight = cpumask_weight(sched_domain_span(sd)); | 3568 | unsigned long weight = cpumask_weight(sched_domain_span(sd)); |
3717 | unsigned long smt_gain = sd->smt_gain; | 3569 | unsigned long smt_gain = sd->smt_gain; |
@@ -3721,6 +3573,11 @@ unsigned long __weak arch_scale_smt_power(struct sched_domain *sd, int cpu) | |||
3721 | return smt_gain; | 3573 | return smt_gain; |
3722 | } | 3574 | } |
3723 | 3575 | ||
3576 | unsigned long __weak arch_scale_smt_power(struct sched_domain *sd, int cpu) | ||
3577 | { | ||
3578 | return default_scale_smt_power(sd, cpu); | ||
3579 | } | ||
3580 | |||
3724 | unsigned long scale_rt_power(int cpu) | 3581 | unsigned long scale_rt_power(int cpu) |
3725 | { | 3582 | { |
3726 | struct rq *rq = cpu_rq(cpu); | 3583 | struct rq *rq = cpu_rq(cpu); |
@@ -3745,10 +3602,19 @@ static void update_cpu_power(struct sched_domain *sd, int cpu) | |||
3745 | unsigned long power = SCHED_LOAD_SCALE; | 3602 | unsigned long power = SCHED_LOAD_SCALE; |
3746 | struct sched_group *sdg = sd->groups; | 3603 | struct sched_group *sdg = sd->groups; |
3747 | 3604 | ||
3748 | /* here we could scale based on cpufreq */ | 3605 | if (sched_feat(ARCH_POWER)) |
3606 | power *= arch_scale_freq_power(sd, cpu); | ||
3607 | else | ||
3608 | power *= default_scale_freq_power(sd, cpu); | ||
3609 | |||
3610 | power >>= SCHED_LOAD_SHIFT; | ||
3749 | 3611 | ||
3750 | if ((sd->flags & SD_SHARE_CPUPOWER) && weight > 1) { | 3612 | if ((sd->flags & SD_SHARE_CPUPOWER) && weight > 1) { |
3751 | power *= arch_scale_smt_power(sd, cpu); | 3613 | if (sched_feat(ARCH_POWER)) |
3614 | power *= arch_scale_smt_power(sd, cpu); | ||
3615 | else | ||
3616 | power *= default_scale_smt_power(sd, cpu); | ||
3617 | |||
3752 | power >>= SCHED_LOAD_SHIFT; | 3618 | power >>= SCHED_LOAD_SHIFT; |
3753 | } | 3619 | } |
3754 | 3620 | ||
@@ -4161,26 +4027,6 @@ ret: | |||
4161 | return NULL; | 4027 | return NULL; |
4162 | } | 4028 | } |
4163 | 4029 | ||
4164 | static struct sched_group *group_of(int cpu) | ||
4165 | { | ||
4166 | struct sched_domain *sd = rcu_dereference(cpu_rq(cpu)->sd); | ||
4167 | |||
4168 | if (!sd) | ||
4169 | return NULL; | ||
4170 | |||
4171 | return sd->groups; | ||
4172 | } | ||
4173 | |||
4174 | static unsigned long power_of(int cpu) | ||
4175 | { | ||
4176 | struct sched_group *group = group_of(cpu); | ||
4177 | |||
4178 | if (!group) | ||
4179 | return SCHED_LOAD_SCALE; | ||
4180 | |||
4181 | return group->cpu_power; | ||
4182 | } | ||
4183 | |||
4184 | /* | 4030 | /* |
4185 | * find_busiest_queue - find the busiest runqueue among the cpus in group. | 4031 | * find_busiest_queue - find the busiest runqueue among the cpus in group. |
4186 | */ | 4032 | */ |
@@ -5465,14 +5311,13 @@ static inline void schedule_debug(struct task_struct *prev) | |||
5465 | #endif | 5311 | #endif |
5466 | } | 5312 | } |
5467 | 5313 | ||
5468 | static void put_prev_task(struct rq *rq, struct task_struct *prev) | 5314 | static void put_prev_task(struct rq *rq, struct task_struct *p) |
5469 | { | 5315 | { |
5470 | if (prev->state == TASK_RUNNING) { | 5316 | u64 runtime = p->se.sum_exec_runtime - p->se.prev_sum_exec_runtime; |
5471 | u64 runtime = prev->se.sum_exec_runtime; | ||
5472 | 5317 | ||
5473 | runtime -= prev->se.prev_sum_exec_runtime; | 5318 | update_avg(&p->se.avg_running, runtime); |
5474 | runtime = min_t(u64, runtime, 2*sysctl_sched_migration_cost); | ||
5475 | 5319 | ||
5320 | if (p->state == TASK_RUNNING) { | ||
5476 | /* | 5321 | /* |
5477 | * In order to avoid avg_overlap growing stale when we are | 5322 | * In order to avoid avg_overlap growing stale when we are |
5478 | * indeed overlapping and hence not getting put to sleep, grow | 5323 | * indeed overlapping and hence not getting put to sleep, grow |
@@ -5482,9 +5327,12 @@ static void put_prev_task(struct rq *rq, struct task_struct *prev) | |||
5482 | * correlates to the amount of cache footprint a task can | 5327 | * correlates to the amount of cache footprint a task can |
5483 | * build up. | 5328 | * build up. |
5484 | */ | 5329 | */ |
5485 | update_avg(&prev->se.avg_overlap, runtime); | 5330 | runtime = min_t(u64, runtime, 2*sysctl_sched_migration_cost); |
5331 | update_avg(&p->se.avg_overlap, runtime); | ||
5332 | } else { | ||
5333 | update_avg(&p->se.avg_running, 0); | ||
5486 | } | 5334 | } |
5487 | prev->sched_class->put_prev_task(rq, prev); | 5335 | p->sched_class->put_prev_task(rq, p); |
5488 | } | 5336 | } |
5489 | 5337 | ||
5490 | /* | 5338 | /* |
@@ -5716,10 +5564,10 @@ asmlinkage void __sched preempt_schedule_irq(void) | |||
5716 | 5564 | ||
5717 | #endif /* CONFIG_PREEMPT */ | 5565 | #endif /* CONFIG_PREEMPT */ |
5718 | 5566 | ||
5719 | int default_wake_function(wait_queue_t *curr, unsigned mode, int sync, | 5567 | int default_wake_function(wait_queue_t *curr, unsigned mode, int wake_flags, |
5720 | void *key) | 5568 | void *key) |
5721 | { | 5569 | { |
5722 | return try_to_wake_up(curr->private, mode, sync); | 5570 | return try_to_wake_up(curr->private, mode, wake_flags); |
5723 | } | 5571 | } |
5724 | EXPORT_SYMBOL(default_wake_function); | 5572 | EXPORT_SYMBOL(default_wake_function); |
5725 | 5573 | ||
@@ -5733,14 +5581,14 @@ EXPORT_SYMBOL(default_wake_function); | |||
5733 | * zero in this (rare) case, and we handle it by continuing to scan the queue. | 5581 | * zero in this (rare) case, and we handle it by continuing to scan the queue. |
5734 | */ | 5582 | */ |
5735 | static void __wake_up_common(wait_queue_head_t *q, unsigned int mode, | 5583 | static void __wake_up_common(wait_queue_head_t *q, unsigned int mode, |
5736 | int nr_exclusive, int sync, void *key) | 5584 | int nr_exclusive, int wake_flags, void *key) |
5737 | { | 5585 | { |
5738 | wait_queue_t *curr, *next; | 5586 | wait_queue_t *curr, *next; |
5739 | 5587 | ||
5740 | list_for_each_entry_safe(curr, next, &q->task_list, task_list) { | 5588 | list_for_each_entry_safe(curr, next, &q->task_list, task_list) { |
5741 | unsigned flags = curr->flags; | 5589 | unsigned flags = curr->flags; |
5742 | 5590 | ||
5743 | if (curr->func(curr, mode, sync, key) && | 5591 | if (curr->func(curr, mode, wake_flags, key) && |
5744 | (flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive) | 5592 | (flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive) |
5745 | break; | 5593 | break; |
5746 | } | 5594 | } |
@@ -5801,16 +5649,16 @@ void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode, | |||
5801 | int nr_exclusive, void *key) | 5649 | int nr_exclusive, void *key) |
5802 | { | 5650 | { |
5803 | unsigned long flags; | 5651 | unsigned long flags; |
5804 | int sync = 1; | 5652 | int wake_flags = WF_SYNC; |
5805 | 5653 | ||
5806 | if (unlikely(!q)) | 5654 | if (unlikely(!q)) |
5807 | return; | 5655 | return; |
5808 | 5656 | ||
5809 | if (unlikely(!nr_exclusive)) | 5657 | if (unlikely(!nr_exclusive)) |
5810 | sync = 0; | 5658 | wake_flags = 0; |
5811 | 5659 | ||
5812 | spin_lock_irqsave(&q->lock, flags); | 5660 | spin_lock_irqsave(&q->lock, flags); |
5813 | __wake_up_common(q, mode, nr_exclusive, sync, key); | 5661 | __wake_up_common(q, mode, nr_exclusive, wake_flags, key); |
5814 | spin_unlock_irqrestore(&q->lock, flags); | 5662 | spin_unlock_irqrestore(&q->lock, flags); |
5815 | } | 5663 | } |
5816 | EXPORT_SYMBOL_GPL(__wake_up_sync_key); | 5664 | EXPORT_SYMBOL_GPL(__wake_up_sync_key); |
@@ -8000,9 +7848,7 @@ static int sd_degenerate(struct sched_domain *sd) | |||
8000 | } | 7848 | } |
8001 | 7849 | ||
8002 | /* Following flags don't use groups */ | 7850 | /* Following flags don't use groups */ |
8003 | if (sd->flags & (SD_WAKE_IDLE | | 7851 | if (sd->flags & (SD_WAKE_AFFINE)) |
8004 | SD_WAKE_AFFINE | | ||
8005 | SD_WAKE_BALANCE)) | ||
8006 | return 0; | 7852 | return 0; |
8007 | 7853 | ||
8008 | return 1; | 7854 | return 1; |
@@ -8019,10 +7865,6 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent) | |||
8019 | if (!cpumask_equal(sched_domain_span(sd), sched_domain_span(parent))) | 7865 | if (!cpumask_equal(sched_domain_span(sd), sched_domain_span(parent))) |
8020 | return 0; | 7866 | return 0; |
8021 | 7867 | ||
8022 | /* Does parent contain flags not in child? */ | ||
8023 | /* WAKE_BALANCE is a subset of WAKE_AFFINE */ | ||
8024 | if (cflags & SD_WAKE_AFFINE) | ||
8025 | pflags &= ~SD_WAKE_BALANCE; | ||
8026 | /* Flags needing groups don't count if only 1 group in parent */ | 7868 | /* Flags needing groups don't count if only 1 group in parent */ |
8027 | if (parent->groups == parent->groups->next) { | 7869 | if (parent->groups == parent->groups->next) { |
8028 | pflags &= ~(SD_LOAD_BALANCE | | 7870 | pflags &= ~(SD_LOAD_BALANCE | |
@@ -8708,10 +8550,10 @@ static void set_domain_attribute(struct sched_domain *sd, | |||
8708 | request = attr->relax_domain_level; | 8550 | request = attr->relax_domain_level; |
8709 | if (request < sd->level) { | 8551 | if (request < sd->level) { |
8710 | /* turn off idle balance on this domain */ | 8552 | /* turn off idle balance on this domain */ |
8711 | sd->flags &= ~(SD_WAKE_IDLE|SD_BALANCE_NEWIDLE); | 8553 | sd->flags &= ~(SD_BALANCE_WAKE|SD_BALANCE_NEWIDLE); |
8712 | } else { | 8554 | } else { |
8713 | /* turn on idle balance on this domain */ | 8555 | /* turn on idle balance on this domain */ |
8714 | sd->flags |= (SD_WAKE_IDLE_FAR|SD_BALANCE_NEWIDLE); | 8556 | sd->flags |= (SD_BALANCE_WAKE|SD_BALANCE_NEWIDLE); |
8715 | } | 8557 | } |
8716 | } | 8558 | } |
8717 | 8559 | ||
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index 5ddbd0891267..efb84409bc43 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c | |||
@@ -395,6 +395,7 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m) | |||
395 | PN(se.sum_exec_runtime); | 395 | PN(se.sum_exec_runtime); |
396 | PN(se.avg_overlap); | 396 | PN(se.avg_overlap); |
397 | PN(se.avg_wakeup); | 397 | PN(se.avg_wakeup); |
398 | PN(se.avg_running); | ||
398 | 399 | ||
399 | nr_switches = p->nvcsw + p->nivcsw; | 400 | nr_switches = p->nvcsw + p->nivcsw; |
400 | 401 | ||
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index a097e909e80f..990b188803ce 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
@@ -712,7 +712,7 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial) | |||
712 | 712 | ||
713 | if (!initial) { | 713 | if (!initial) { |
714 | /* sleeps upto a single latency don't count. */ | 714 | /* sleeps upto a single latency don't count. */ |
715 | if (sched_feat(NEW_FAIR_SLEEPERS)) { | 715 | if (sched_feat(FAIR_SLEEPERS)) { |
716 | unsigned long thresh = sysctl_sched_latency; | 716 | unsigned long thresh = sysctl_sched_latency; |
717 | 717 | ||
718 | /* | 718 | /* |
@@ -726,6 +726,13 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial) | |||
726 | task_of(se)->policy != SCHED_IDLE)) | 726 | task_of(se)->policy != SCHED_IDLE)) |
727 | thresh = calc_delta_fair(thresh, se); | 727 | thresh = calc_delta_fair(thresh, se); |
728 | 728 | ||
729 | /* | ||
730 | * Halve their sleep time's effect, to allow | ||
731 | * for a gentler effect of sleepers: | ||
732 | */ | ||
733 | if (sched_feat(GENTLE_FAIR_SLEEPERS)) | ||
734 | thresh >>= 1; | ||
735 | |||
729 | vruntime -= thresh; | 736 | vruntime -= thresh; |
730 | } | 737 | } |
731 | } | 738 | } |
@@ -758,10 +765,10 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int wakeup) | |||
758 | 765 | ||
759 | static void __clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se) | 766 | static void __clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se) |
760 | { | 767 | { |
761 | if (cfs_rq->last == se) | 768 | if (!se || cfs_rq->last == se) |
762 | cfs_rq->last = NULL; | 769 | cfs_rq->last = NULL; |
763 | 770 | ||
764 | if (cfs_rq->next == se) | 771 | if (!se || cfs_rq->next == se) |
765 | cfs_rq->next = NULL; | 772 | cfs_rq->next = NULL; |
766 | } | 773 | } |
767 | 774 | ||
@@ -1063,83 +1070,6 @@ static void yield_task_fair(struct rq *rq) | |||
1063 | se->vruntime = rightmost->vruntime + 1; | 1070 | se->vruntime = rightmost->vruntime + 1; |
1064 | } | 1071 | } |
1065 | 1072 | ||
1066 | /* | ||
1067 | * wake_idle() will wake a task on an idle cpu if task->cpu is | ||
1068 | * not idle and an idle cpu is available. The span of cpus to | ||
1069 | * search starts with cpus closest then further out as needed, | ||
1070 | * so we always favor a closer, idle cpu. | ||
1071 | * Domains may include CPUs that are not usable for migration, | ||
1072 | * hence we need to mask them out (rq->rd->online) | ||
1073 | * | ||
1074 | * Returns the CPU we should wake onto. | ||
1075 | */ | ||
1076 | #if defined(ARCH_HAS_SCHED_WAKE_IDLE) | ||
1077 | |||
1078 | #define cpu_rd_active(cpu, rq) cpumask_test_cpu(cpu, rq->rd->online) | ||
1079 | |||
1080 | static int wake_idle(int cpu, struct task_struct *p) | ||
1081 | { | ||
1082 | struct sched_domain *sd; | ||
1083 | int i; | ||
1084 | unsigned int chosen_wakeup_cpu; | ||
1085 | int this_cpu; | ||
1086 | struct rq *task_rq = task_rq(p); | ||
1087 | |||
1088 | /* | ||
1089 | * At POWERSAVINGS_BALANCE_WAKEUP level, if both this_cpu and prev_cpu | ||
1090 | * are idle and this is not a kernel thread and this task's affinity | ||
1091 | * allows it to be moved to preferred cpu, then just move! | ||
1092 | */ | ||
1093 | |||
1094 | this_cpu = smp_processor_id(); | ||
1095 | chosen_wakeup_cpu = | ||
1096 | cpu_rq(this_cpu)->rd->sched_mc_preferred_wakeup_cpu; | ||
1097 | |||
1098 | if (sched_mc_power_savings >= POWERSAVINGS_BALANCE_WAKEUP && | ||
1099 | idle_cpu(cpu) && idle_cpu(this_cpu) && | ||
1100 | p->mm && !(p->flags & PF_KTHREAD) && | ||
1101 | cpu_isset(chosen_wakeup_cpu, p->cpus_allowed)) | ||
1102 | return chosen_wakeup_cpu; | ||
1103 | |||
1104 | /* | ||
1105 | * If it is idle, then it is the best cpu to run this task. | ||
1106 | * | ||
1107 | * This cpu is also the best, if it has more than one task already. | ||
1108 | * Siblings must be also busy(in most cases) as they didn't already | ||
1109 | * pickup the extra load from this cpu and hence we need not check | ||
1110 | * sibling runqueue info. This will avoid the checks and cache miss | ||
1111 | * penalities associated with that. | ||
1112 | */ | ||
1113 | if (idle_cpu(cpu) || cpu_rq(cpu)->cfs.nr_running > 1) | ||
1114 | return cpu; | ||
1115 | |||
1116 | for_each_domain(cpu, sd) { | ||
1117 | if ((sd->flags & SD_WAKE_IDLE) | ||
1118 | || ((sd->flags & SD_WAKE_IDLE_FAR) | ||
1119 | && !task_hot(p, task_rq->clock, sd))) { | ||
1120 | for_each_cpu_and(i, sched_domain_span(sd), | ||
1121 | &p->cpus_allowed) { | ||
1122 | if (cpu_rd_active(i, task_rq) && idle_cpu(i)) { | ||
1123 | if (i != task_cpu(p)) { | ||
1124 | schedstat_inc(p, | ||
1125 | se.nr_wakeups_idle); | ||
1126 | } | ||
1127 | return i; | ||
1128 | } | ||
1129 | } | ||
1130 | } else { | ||
1131 | break; | ||
1132 | } | ||
1133 | } | ||
1134 | return cpu; | ||
1135 | } | ||
1136 | #else /* !ARCH_HAS_SCHED_WAKE_IDLE*/ | ||
1137 | static inline int wake_idle(int cpu, struct task_struct *p) | ||
1138 | { | ||
1139 | return cpu; | ||
1140 | } | ||
1141 | #endif | ||
1142 | |||
1143 | #ifdef CONFIG_SMP | 1073 | #ifdef CONFIG_SMP |
1144 | 1074 | ||
1145 | #ifdef CONFIG_FAIR_GROUP_SCHED | 1075 | #ifdef CONFIG_FAIR_GROUP_SCHED |
@@ -1226,25 +1156,34 @@ static inline unsigned long effective_load(struct task_group *tg, int cpu, | |||
1226 | 1156 | ||
1227 | #endif | 1157 | #endif |
1228 | 1158 | ||
1229 | static int | 1159 | static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync) |
1230 | wake_affine(struct sched_domain *this_sd, struct rq *this_rq, | ||
1231 | struct task_struct *p, int prev_cpu, int this_cpu, int sync, | ||
1232 | int idx, unsigned long load, unsigned long this_load, | ||
1233 | unsigned int imbalance) | ||
1234 | { | 1160 | { |
1235 | struct task_struct *curr = this_rq->curr; | 1161 | struct task_struct *curr = current; |
1236 | struct task_group *tg; | 1162 | unsigned long this_load, load; |
1237 | unsigned long tl = this_load; | 1163 | int idx, this_cpu, prev_cpu; |
1238 | unsigned long tl_per_task; | 1164 | unsigned long tl_per_task; |
1165 | unsigned int imbalance; | ||
1166 | struct task_group *tg; | ||
1239 | unsigned long weight; | 1167 | unsigned long weight; |
1240 | int balanced; | 1168 | int balanced; |
1241 | 1169 | ||
1242 | if (!(this_sd->flags & SD_WAKE_AFFINE) || !sched_feat(AFFINE_WAKEUPS)) | 1170 | idx = sd->wake_idx; |
1243 | return 0; | 1171 | this_cpu = smp_processor_id(); |
1172 | prev_cpu = task_cpu(p); | ||
1173 | load = source_load(prev_cpu, idx); | ||
1174 | this_load = target_load(this_cpu, idx); | ||
1244 | 1175 | ||
1245 | if (sync && (curr->se.avg_overlap > sysctl_sched_migration_cost || | 1176 | if (sync) { |
1246 | p->se.avg_overlap > sysctl_sched_migration_cost)) | 1177 | if (sched_feat(SYNC_LESS) && |
1247 | sync = 0; | 1178 | (curr->se.avg_overlap > sysctl_sched_migration_cost || |
1179 | p->se.avg_overlap > sysctl_sched_migration_cost)) | ||
1180 | sync = 0; | ||
1181 | } else { | ||
1182 | if (sched_feat(SYNC_MORE) && | ||
1183 | (curr->se.avg_overlap < sysctl_sched_migration_cost && | ||
1184 | p->se.avg_overlap < sysctl_sched_migration_cost)) | ||
1185 | sync = 1; | ||
1186 | } | ||
1248 | 1187 | ||
1249 | /* | 1188 | /* |
1250 | * If sync wakeup then subtract the (maximum possible) | 1189 | * If sync wakeup then subtract the (maximum possible) |
@@ -1255,24 +1194,26 @@ wake_affine(struct sched_domain *this_sd, struct rq *this_rq, | |||
1255 | tg = task_group(current); | 1194 | tg = task_group(current); |
1256 | weight = current->se.load.weight; | 1195 | weight = current->se.load.weight; |
1257 | 1196 | ||
1258 | tl += effective_load(tg, this_cpu, -weight, -weight); | 1197 | this_load += effective_load(tg, this_cpu, -weight, -weight); |
1259 | load += effective_load(tg, prev_cpu, 0, -weight); | 1198 | load += effective_load(tg, prev_cpu, 0, -weight); |
1260 | } | 1199 | } |
1261 | 1200 | ||
1262 | tg = task_group(p); | 1201 | tg = task_group(p); |
1263 | weight = p->se.load.weight; | 1202 | weight = p->se.load.weight; |
1264 | 1203 | ||
1204 | imbalance = 100 + (sd->imbalance_pct - 100) / 2; | ||
1205 | |||
1265 | /* | 1206 | /* |
1266 | * In low-load situations, where prev_cpu is idle and this_cpu is idle | 1207 | * In low-load situations, where prev_cpu is idle and this_cpu is idle |
1267 | * due to the sync cause above having dropped tl to 0, we'll always have | 1208 | * due to the sync cause above having dropped this_load to 0, we'll |
1268 | * an imbalance, but there's really nothing you can do about that, so | 1209 | * always have an imbalance, but there's really nothing you can do |
1269 | * that's good too. | 1210 | * about that, so that's good too. |
1270 | * | 1211 | * |
1271 | * Otherwise check if either cpus are near enough in load to allow this | 1212 | * Otherwise check if either cpus are near enough in load to allow this |
1272 | * task to be woken on this_cpu. | 1213 | * task to be woken on this_cpu. |
1273 | */ | 1214 | */ |
1274 | balanced = !tl || | 1215 | balanced = !this_load || |
1275 | 100*(tl + effective_load(tg, this_cpu, weight, weight)) <= | 1216 | 100*(this_load + effective_load(tg, this_cpu, weight, weight)) <= |
1276 | imbalance*(load + effective_load(tg, prev_cpu, 0, weight)); | 1217 | imbalance*(load + effective_load(tg, prev_cpu, 0, weight)); |
1277 | 1218 | ||
1278 | /* | 1219 | /* |
@@ -1286,14 +1227,15 @@ wake_affine(struct sched_domain *this_sd, struct rq *this_rq, | |||
1286 | schedstat_inc(p, se.nr_wakeups_affine_attempts); | 1227 | schedstat_inc(p, se.nr_wakeups_affine_attempts); |
1287 | tl_per_task = cpu_avg_load_per_task(this_cpu); | 1228 | tl_per_task = cpu_avg_load_per_task(this_cpu); |
1288 | 1229 | ||
1289 | if (balanced || (tl <= load && tl + target_load(prev_cpu, idx) <= | 1230 | if (balanced || |
1290 | tl_per_task)) { | 1231 | (this_load <= load && |
1232 | this_load + target_load(prev_cpu, idx) <= tl_per_task)) { | ||
1291 | /* | 1233 | /* |
1292 | * This domain has SD_WAKE_AFFINE and | 1234 | * This domain has SD_WAKE_AFFINE and |
1293 | * p is cache cold in this domain, and | 1235 | * p is cache cold in this domain, and |
1294 | * there is no bad imbalance. | 1236 | * there is no bad imbalance. |
1295 | */ | 1237 | */ |
1296 | schedstat_inc(this_sd, ttwu_move_affine); | 1238 | schedstat_inc(sd, ttwu_move_affine); |
1297 | schedstat_inc(p, se.nr_wakeups_affine); | 1239 | schedstat_inc(p, se.nr_wakeups_affine); |
1298 | 1240 | ||
1299 | return 1; | 1241 | return 1; |
@@ -1301,65 +1243,215 @@ wake_affine(struct sched_domain *this_sd, struct rq *this_rq, | |||
1301 | return 0; | 1243 | return 0; |
1302 | } | 1244 | } |
1303 | 1245 | ||
1304 | static int select_task_rq_fair(struct task_struct *p, int sync) | 1246 | /* |
1247 | * find_idlest_group finds and returns the least busy CPU group within the | ||
1248 | * domain. | ||
1249 | */ | ||
1250 | static struct sched_group * | ||
1251 | find_idlest_group(struct sched_domain *sd, struct task_struct *p, | ||
1252 | int this_cpu, int load_idx) | ||
1305 | { | 1253 | { |
1306 | struct sched_domain *sd, *this_sd = NULL; | 1254 | struct sched_group *idlest = NULL, *this = NULL, *group = sd->groups; |
1307 | int prev_cpu, this_cpu, new_cpu; | 1255 | unsigned long min_load = ULONG_MAX, this_load = 0; |
1308 | unsigned long load, this_load; | 1256 | int imbalance = 100 + (sd->imbalance_pct-100)/2; |
1309 | struct rq *this_rq; | ||
1310 | unsigned int imbalance; | ||
1311 | int idx; | ||
1312 | 1257 | ||
1313 | prev_cpu = task_cpu(p); | 1258 | do { |
1314 | this_cpu = smp_processor_id(); | 1259 | unsigned long load, avg_load; |
1315 | this_rq = cpu_rq(this_cpu); | 1260 | int local_group; |
1316 | new_cpu = prev_cpu; | 1261 | int i; |
1317 | 1262 | ||
1318 | /* | 1263 | /* Skip over this group if it has no CPUs allowed */ |
1319 | * 'this_sd' is the first domain that both | 1264 | if (!cpumask_intersects(sched_group_cpus(group), |
1320 | * this_cpu and prev_cpu are present in: | 1265 | &p->cpus_allowed)) |
1321 | */ | 1266 | continue; |
1322 | for_each_domain(this_cpu, sd) { | 1267 | |
1323 | if (cpumask_test_cpu(prev_cpu, sched_domain_span(sd))) { | 1268 | local_group = cpumask_test_cpu(this_cpu, |
1324 | this_sd = sd; | 1269 | sched_group_cpus(group)); |
1325 | break; | 1270 | |
1271 | /* Tally up the load of all CPUs in the group */ | ||
1272 | avg_load = 0; | ||
1273 | |||
1274 | for_each_cpu(i, sched_group_cpus(group)) { | ||
1275 | /* Bias balancing toward cpus of our domain */ | ||
1276 | if (local_group) | ||
1277 | load = source_load(i, load_idx); | ||
1278 | else | ||
1279 | load = target_load(i, load_idx); | ||
1280 | |||
1281 | avg_load += load; | ||
1282 | } | ||
1283 | |||
1284 | /* Adjust by relative CPU power of the group */ | ||
1285 | avg_load = (avg_load * SCHED_LOAD_SCALE) / group->cpu_power; | ||
1286 | |||
1287 | if (local_group) { | ||
1288 | this_load = avg_load; | ||
1289 | this = group; | ||
1290 | } else if (avg_load < min_load) { | ||
1291 | min_load = avg_load; | ||
1292 | idlest = group; | ||
1293 | } | ||
1294 | } while (group = group->next, group != sd->groups); | ||
1295 | |||
1296 | if (!idlest || 100*this_load < imbalance*min_load) | ||
1297 | return NULL; | ||
1298 | return idlest; | ||
1299 | } | ||
1300 | |||
1301 | /* | ||
1302 | * find_idlest_cpu - find the idlest cpu among the cpus in group. | ||
1303 | */ | ||
1304 | static int | ||
1305 | find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu) | ||
1306 | { | ||
1307 | unsigned long load, min_load = ULONG_MAX; | ||
1308 | int idlest = -1; | ||
1309 | int i; | ||
1310 | |||
1311 | /* Traverse only the allowed CPUs */ | ||
1312 | for_each_cpu_and(i, sched_group_cpus(group), &p->cpus_allowed) { | ||
1313 | load = weighted_cpuload(i); | ||
1314 | |||
1315 | if (load < min_load || (load == min_load && i == this_cpu)) { | ||
1316 | min_load = load; | ||
1317 | idlest = i; | ||
1326 | } | 1318 | } |
1327 | } | 1319 | } |
1328 | 1320 | ||
1329 | if (unlikely(!cpumask_test_cpu(this_cpu, &p->cpus_allowed))) | 1321 | return idlest; |
1330 | goto out; | 1322 | } |
1331 | 1323 | ||
1332 | /* | 1324 | /* |
1333 | * Check for affine wakeup and passive balancing possibilities. | 1325 | * sched_balance_self: balance the current task (running on cpu) in domains |
1334 | */ | 1326 | * that have the 'flag' flag set. In practice, this is SD_BALANCE_FORK and |
1335 | if (!this_sd) | 1327 | * SD_BALANCE_EXEC. |
1328 | * | ||
1329 | * Balance, ie. select the least loaded group. | ||
1330 | * | ||
1331 | * Returns the target CPU number, or the same CPU if no balancing is needed. | ||
1332 | * | ||
1333 | * preempt must be disabled. | ||
1334 | */ | ||
1335 | static int select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags) | ||
1336 | { | ||
1337 | struct sched_domain *tmp, *affine_sd = NULL, *sd = NULL; | ||
1338 | int cpu = smp_processor_id(); | ||
1339 | int prev_cpu = task_cpu(p); | ||
1340 | int new_cpu = cpu; | ||
1341 | int want_affine = 0; | ||
1342 | int want_sd = 1; | ||
1343 | int sync = wake_flags & WF_SYNC; | ||
1344 | |||
1345 | if (sd_flag & SD_BALANCE_WAKE) { | ||
1346 | if (sched_feat(AFFINE_WAKEUPS)) | ||
1347 | want_affine = 1; | ||
1348 | new_cpu = prev_cpu; | ||
1349 | } | ||
1350 | |||
1351 | rcu_read_lock(); | ||
1352 | for_each_domain(cpu, tmp) { | ||
1353 | /* | ||
1354 | * If power savings logic is enabled for a domain, see if we | ||
1355 | * are not overloaded, if so, don't balance wider. | ||
1356 | */ | ||
1357 | if (tmp->flags & (SD_POWERSAVINGS_BALANCE|SD_PREFER_LOCAL)) { | ||
1358 | unsigned long power = 0; | ||
1359 | unsigned long nr_running = 0; | ||
1360 | unsigned long capacity; | ||
1361 | int i; | ||
1362 | |||
1363 | for_each_cpu(i, sched_domain_span(tmp)) { | ||
1364 | power += power_of(i); | ||
1365 | nr_running += cpu_rq(i)->cfs.nr_running; | ||
1366 | } | ||
1367 | |||
1368 | capacity = DIV_ROUND_CLOSEST(power, SCHED_LOAD_SCALE); | ||
1369 | |||
1370 | if (tmp->flags & SD_POWERSAVINGS_BALANCE) | ||
1371 | nr_running /= 2; | ||
1372 | |||
1373 | if (nr_running < capacity) | ||
1374 | want_sd = 0; | ||
1375 | } | ||
1376 | |||
1377 | if (want_affine && (tmp->flags & SD_WAKE_AFFINE) && | ||
1378 | cpumask_test_cpu(prev_cpu, sched_domain_span(tmp))) { | ||
1379 | |||
1380 | affine_sd = tmp; | ||
1381 | want_affine = 0; | ||
1382 | } | ||
1383 | |||
1384 | if (!want_sd && !want_affine) | ||
1385 | break; | ||
1386 | |||
1387 | if (!(tmp->flags & sd_flag)) | ||
1388 | continue; | ||
1389 | |||
1390 | if (want_sd) | ||
1391 | sd = tmp; | ||
1392 | } | ||
1393 | |||
1394 | if (sched_feat(LB_SHARES_UPDATE)) { | ||
1395 | /* | ||
1396 | * Pick the largest domain to update shares over | ||
1397 | */ | ||
1398 | tmp = sd; | ||
1399 | if (affine_sd && (!tmp || | ||
1400 | cpumask_weight(sched_domain_span(affine_sd)) > | ||
1401 | cpumask_weight(sched_domain_span(sd)))) | ||
1402 | tmp = affine_sd; | ||
1403 | |||
1404 | if (tmp) | ||
1405 | update_shares(tmp); | ||
1406 | } | ||
1407 | |||
1408 | if (affine_sd && wake_affine(affine_sd, p, sync)) { | ||
1409 | new_cpu = cpu; | ||
1336 | goto out; | 1410 | goto out; |
1411 | } | ||
1337 | 1412 | ||
1338 | idx = this_sd->wake_idx; | 1413 | while (sd) { |
1414 | int load_idx = sd->forkexec_idx; | ||
1415 | struct sched_group *group; | ||
1416 | int weight; | ||
1339 | 1417 | ||
1340 | imbalance = 100 + (this_sd->imbalance_pct - 100) / 2; | 1418 | if (!(sd->flags & sd_flag)) { |
1419 | sd = sd->child; | ||
1420 | continue; | ||
1421 | } | ||
1341 | 1422 | ||
1342 | load = source_load(prev_cpu, idx); | 1423 | if (sd_flag & SD_BALANCE_WAKE) |
1343 | this_load = target_load(this_cpu, idx); | 1424 | load_idx = sd->wake_idx; |
1344 | 1425 | ||
1345 | if (wake_affine(this_sd, this_rq, p, prev_cpu, this_cpu, sync, idx, | 1426 | group = find_idlest_group(sd, p, cpu, load_idx); |
1346 | load, this_load, imbalance)) | 1427 | if (!group) { |
1347 | return this_cpu; | 1428 | sd = sd->child; |
1429 | continue; | ||
1430 | } | ||
1348 | 1431 | ||
1349 | /* | 1432 | new_cpu = find_idlest_cpu(group, p, cpu); |
1350 | * Start passive balancing when half the imbalance_pct | 1433 | if (new_cpu == -1 || new_cpu == cpu) { |
1351 | * limit is reached. | 1434 | /* Now try balancing at a lower domain level of cpu */ |
1352 | */ | 1435 | sd = sd->child; |
1353 | if (this_sd->flags & SD_WAKE_BALANCE) { | 1436 | continue; |
1354 | if (imbalance*this_load <= 100*load) { | ||
1355 | schedstat_inc(this_sd, ttwu_move_balance); | ||
1356 | schedstat_inc(p, se.nr_wakeups_passive); | ||
1357 | return this_cpu; | ||
1358 | } | 1437 | } |
1438 | |||
1439 | /* Now try balancing at a lower domain level of new_cpu */ | ||
1440 | cpu = new_cpu; | ||
1441 | weight = cpumask_weight(sched_domain_span(sd)); | ||
1442 | sd = NULL; | ||
1443 | for_each_domain(cpu, tmp) { | ||
1444 | if (weight <= cpumask_weight(sched_domain_span(tmp))) | ||
1445 | break; | ||
1446 | if (tmp->flags & sd_flag) | ||
1447 | sd = tmp; | ||
1448 | } | ||
1449 | /* while loop will break here if sd == NULL */ | ||
1359 | } | 1450 | } |
1360 | 1451 | ||
1361 | out: | 1452 | out: |
1362 | return wake_idle(new_cpu, p); | 1453 | rcu_read_unlock(); |
1454 | return new_cpu; | ||
1363 | } | 1455 | } |
1364 | #endif /* CONFIG_SMP */ | 1456 | #endif /* CONFIG_SMP */ |
1365 | 1457 | ||
@@ -1472,11 +1564,12 @@ static void set_next_buddy(struct sched_entity *se) | |||
1472 | /* | 1564 | /* |
1473 | * Preempt the current task with a newly woken task if needed: | 1565 | * Preempt the current task with a newly woken task if needed: |
1474 | */ | 1566 | */ |
1475 | static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync) | 1567 | static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_flags) |
1476 | { | 1568 | { |
1477 | struct task_struct *curr = rq->curr; | 1569 | struct task_struct *curr = rq->curr; |
1478 | struct sched_entity *se = &curr->se, *pse = &p->se; | 1570 | struct sched_entity *se = &curr->se, *pse = &p->se; |
1479 | struct cfs_rq *cfs_rq = task_cfs_rq(curr); | 1571 | struct cfs_rq *cfs_rq = task_cfs_rq(curr); |
1572 | int sync = wake_flags & WF_SYNC; | ||
1480 | 1573 | ||
1481 | update_curr(cfs_rq); | 1574 | update_curr(cfs_rq); |
1482 | 1575 | ||
@@ -1502,7 +1595,8 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync) | |||
1502 | */ | 1595 | */ |
1503 | if (sched_feat(LAST_BUDDY) && likely(se->on_rq && curr != rq->idle)) | 1596 | if (sched_feat(LAST_BUDDY) && likely(se->on_rq && curr != rq->idle)) |
1504 | set_last_buddy(se); | 1597 | set_last_buddy(se); |
1505 | set_next_buddy(pse); | 1598 | if (sched_feat(NEXT_BUDDY) && !(wake_flags & WF_FORK)) |
1599 | set_next_buddy(pse); | ||
1506 | 1600 | ||
1507 | /* | 1601 | /* |
1508 | * We can come here with TIF_NEED_RESCHED already set from new task | 1602 | * We can come here with TIF_NEED_RESCHED already set from new task |
@@ -1524,16 +1618,25 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync) | |||
1524 | return; | 1618 | return; |
1525 | } | 1619 | } |
1526 | 1620 | ||
1527 | if (!sched_feat(WAKEUP_PREEMPT)) | 1621 | if ((sched_feat(WAKEUP_SYNC) && sync) || |
1528 | return; | 1622 | (sched_feat(WAKEUP_OVERLAP) && |
1529 | 1623 | (se->avg_overlap < sysctl_sched_migration_cost && | |
1530 | if (sched_feat(WAKEUP_OVERLAP) && (sync || | 1624 | pse->avg_overlap < sysctl_sched_migration_cost))) { |
1531 | (se->avg_overlap < sysctl_sched_migration_cost && | ||
1532 | pse->avg_overlap < sysctl_sched_migration_cost))) { | ||
1533 | resched_task(curr); | 1625 | resched_task(curr); |
1534 | return; | 1626 | return; |
1535 | } | 1627 | } |
1536 | 1628 | ||
1629 | if (sched_feat(WAKEUP_RUNNING)) { | ||
1630 | if (pse->avg_running < se->avg_running) { | ||
1631 | set_next_buddy(pse); | ||
1632 | resched_task(curr); | ||
1633 | return; | ||
1634 | } | ||
1635 | } | ||
1636 | |||
1637 | if (!sched_feat(WAKEUP_PREEMPT)) | ||
1638 | return; | ||
1639 | |||
1537 | find_matching_se(&se, &pse); | 1640 | find_matching_se(&se, &pse); |
1538 | 1641 | ||
1539 | BUG_ON(!pse); | 1642 | BUG_ON(!pse); |
@@ -1556,8 +1659,13 @@ static struct task_struct *pick_next_task_fair(struct rq *rq) | |||
1556 | /* | 1659 | /* |
1557 | * If se was a buddy, clear it so that it will have to earn | 1660 | * If se was a buddy, clear it so that it will have to earn |
1558 | * the favour again. | 1661 | * the favour again. |
1662 | * | ||
1663 | * If se was not a buddy, clear the buddies because neither | ||
1664 | * was elegible to run, let them earn it again. | ||
1665 | * | ||
1666 | * IOW. unconditionally clear buddies. | ||
1559 | */ | 1667 | */ |
1560 | __clear_buddies(cfs_rq, se); | 1668 | __clear_buddies(cfs_rq, NULL); |
1561 | set_next_entity(cfs_rq, se); | 1669 | set_next_entity(cfs_rq, se); |
1562 | cfs_rq = group_cfs_rq(se); | 1670 | cfs_rq = group_cfs_rq(se); |
1563 | } while (cfs_rq); | 1671 | } while (cfs_rq); |
diff --git a/kernel/sched_features.h b/kernel/sched_features.h index e2dc63a5815d..0d94083582c7 100644 --- a/kernel/sched_features.h +++ b/kernel/sched_features.h | |||
@@ -1,17 +1,123 @@ | |||
1 | SCHED_FEAT(NEW_FAIR_SLEEPERS, 0) | 1 | /* |
2 | * Disregards a certain amount of sleep time (sched_latency_ns) and | ||
3 | * considers the task to be running during that period. This gives it | ||
4 | * a service deficit on wakeup, allowing it to run sooner. | ||
5 | */ | ||
6 | SCHED_FEAT(FAIR_SLEEPERS, 1) | ||
7 | |||
8 | /* | ||
9 | * Only give sleepers 50% of their service deficit. This allows | ||
10 | * them to run sooner, but does not allow tons of sleepers to | ||
11 | * rip the spread apart. | ||
12 | */ | ||
13 | SCHED_FEAT(GENTLE_FAIR_SLEEPERS, 1) | ||
14 | |||
15 | /* | ||
16 | * By not normalizing the sleep time, heavy tasks get an effective | ||
17 | * longer period, and lighter task an effective shorter period they | ||
18 | * are considered running. | ||
19 | */ | ||
2 | SCHED_FEAT(NORMALIZED_SLEEPER, 0) | 20 | SCHED_FEAT(NORMALIZED_SLEEPER, 0) |
3 | SCHED_FEAT(ADAPTIVE_GRAN, 1) | 21 | |
4 | SCHED_FEAT(WAKEUP_PREEMPT, 1) | 22 | /* |
23 | * Place new tasks ahead so that they do not starve already running | ||
24 | * tasks | ||
25 | */ | ||
5 | SCHED_FEAT(START_DEBIT, 1) | 26 | SCHED_FEAT(START_DEBIT, 1) |
27 | |||
28 | /* | ||
29 | * Should wakeups try to preempt running tasks. | ||
30 | */ | ||
31 | SCHED_FEAT(WAKEUP_PREEMPT, 1) | ||
32 | |||
33 | /* | ||
34 | * Compute wakeup_gran based on task behaviour, clipped to | ||
35 | * [0, sched_wakeup_gran_ns] | ||
36 | */ | ||
37 | SCHED_FEAT(ADAPTIVE_GRAN, 1) | ||
38 | |||
39 | /* | ||
40 | * When converting the wakeup granularity to virtual time, do it such | ||
41 | * that heavier tasks preempting a lighter task have an edge. | ||
42 | */ | ||
43 | SCHED_FEAT(ASYM_GRAN, 1) | ||
44 | |||
45 | /* | ||
46 | * Always wakeup-preempt SYNC wakeups, see SYNC_WAKEUPS. | ||
47 | */ | ||
48 | SCHED_FEAT(WAKEUP_SYNC, 0) | ||
49 | |||
50 | /* | ||
51 | * Wakeup preempt based on task behaviour. Tasks that do not overlap | ||
52 | * don't get preempted. | ||
53 | */ | ||
54 | SCHED_FEAT(WAKEUP_OVERLAP, 0) | ||
55 | |||
56 | /* | ||
57 | * Wakeup preemption towards tasks that run short | ||
58 | */ | ||
59 | SCHED_FEAT(WAKEUP_RUNNING, 0) | ||
60 | |||
61 | /* | ||
62 | * Use the SYNC wakeup hint, pipes and the likes use this to indicate | ||
63 | * the remote end is likely to consume the data we just wrote, and | ||
64 | * therefore has cache benefit from being placed on the same cpu, see | ||
65 | * also AFFINE_WAKEUPS. | ||
66 | */ | ||
67 | SCHED_FEAT(SYNC_WAKEUPS, 1) | ||
68 | |||
69 | /* | ||
70 | * Based on load and program behaviour, see if it makes sense to place | ||
71 | * a newly woken task on the same cpu as the task that woke it -- | ||
72 | * improve cache locality. Typically used with SYNC wakeups as | ||
73 | * generated by pipes and the like, see also SYNC_WAKEUPS. | ||
74 | */ | ||
6 | SCHED_FEAT(AFFINE_WAKEUPS, 1) | 75 | SCHED_FEAT(AFFINE_WAKEUPS, 1) |
76 | |||
77 | /* | ||
78 | * Weaken SYNC hint based on overlap | ||
79 | */ | ||
80 | SCHED_FEAT(SYNC_LESS, 1) | ||
81 | |||
82 | /* | ||
83 | * Add SYNC hint based on overlap | ||
84 | */ | ||
85 | SCHED_FEAT(SYNC_MORE, 0) | ||
86 | |||
87 | /* | ||
88 | * Prefer to schedule the task we woke last (assuming it failed | ||
89 | * wakeup-preemption), since its likely going to consume data we | ||
90 | * touched, increases cache locality. | ||
91 | */ | ||
92 | SCHED_FEAT(NEXT_BUDDY, 0) | ||
93 | |||
94 | /* | ||
95 | * Prefer to schedule the task that ran last (when we did | ||
96 | * wake-preempt) as that likely will touch the same data, increases | ||
97 | * cache locality. | ||
98 | */ | ||
99 | SCHED_FEAT(LAST_BUDDY, 1) | ||
100 | |||
101 | /* | ||
102 | * Consider buddies to be cache hot, decreases the likelyness of a | ||
103 | * cache buddy being migrated away, increases cache locality. | ||
104 | */ | ||
7 | SCHED_FEAT(CACHE_HOT_BUDDY, 1) | 105 | SCHED_FEAT(CACHE_HOT_BUDDY, 1) |
8 | SCHED_FEAT(SYNC_WAKEUPS, 1) | 106 | |
107 | /* | ||
108 | * Use arch dependent cpu power functions | ||
109 | */ | ||
110 | SCHED_FEAT(ARCH_POWER, 0) | ||
111 | |||
9 | SCHED_FEAT(HRTICK, 0) | 112 | SCHED_FEAT(HRTICK, 0) |
10 | SCHED_FEAT(DOUBLE_TICK, 0) | 113 | SCHED_FEAT(DOUBLE_TICK, 0) |
11 | SCHED_FEAT(ASYM_GRAN, 1) | ||
12 | SCHED_FEAT(LB_BIAS, 1) | 114 | SCHED_FEAT(LB_BIAS, 1) |
13 | SCHED_FEAT(LB_WAKEUP_UPDATE, 1) | 115 | SCHED_FEAT(LB_SHARES_UPDATE, 1) |
14 | SCHED_FEAT(ASYM_EFF_LOAD, 1) | 116 | SCHED_FEAT(ASYM_EFF_LOAD, 1) |
15 | SCHED_FEAT(WAKEUP_OVERLAP, 0) | 117 | |
16 | SCHED_FEAT(LAST_BUDDY, 1) | 118 | /* |
119 | * Spin-wait on mutex acquisition when the mutex owner is running on | ||
120 | * another cpu -- assumes that when the owner is running, it will soon | ||
121 | * release the lock. Decreases scheduling overhead. | ||
122 | */ | ||
17 | SCHED_FEAT(OWNER_SPIN, 1) | 123 | SCHED_FEAT(OWNER_SPIN, 1) |
diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c index 499672c10cbd..a8b448af004b 100644 --- a/kernel/sched_idletask.c +++ b/kernel/sched_idletask.c | |||
@@ -6,7 +6,7 @@ | |||
6 | */ | 6 | */ |
7 | 7 | ||
8 | #ifdef CONFIG_SMP | 8 | #ifdef CONFIG_SMP |
9 | static int select_task_rq_idle(struct task_struct *p, int sync) | 9 | static int select_task_rq_idle(struct task_struct *p, int sd_flag, int flags) |
10 | { | 10 | { |
11 | return task_cpu(p); /* IDLE tasks as never migrated */ | 11 | return task_cpu(p); /* IDLE tasks as never migrated */ |
12 | } | 12 | } |
@@ -14,7 +14,7 @@ static int select_task_rq_idle(struct task_struct *p, int sync) | |||
14 | /* | 14 | /* |
15 | * Idle tasks are unconditionally rescheduled: | 15 | * Idle tasks are unconditionally rescheduled: |
16 | */ | 16 | */ |
17 | static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p, int sync) | 17 | static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p, int flags) |
18 | { | 18 | { |
19 | resched_task(rq->idle); | 19 | resched_task(rq->idle); |
20 | } | 20 | } |
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 2eb4bd6a526c..13de7126a6ab 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c | |||
@@ -938,10 +938,13 @@ static void yield_task_rt(struct rq *rq) | |||
938 | #ifdef CONFIG_SMP | 938 | #ifdef CONFIG_SMP |
939 | static int find_lowest_rq(struct task_struct *task); | 939 | static int find_lowest_rq(struct task_struct *task); |
940 | 940 | ||
941 | static int select_task_rq_rt(struct task_struct *p, int sync) | 941 | static int select_task_rq_rt(struct task_struct *p, int sd_flag, int flags) |
942 | { | 942 | { |
943 | struct rq *rq = task_rq(p); | 943 | struct rq *rq = task_rq(p); |
944 | 944 | ||
945 | if (sd_flag != SD_BALANCE_WAKE) | ||
946 | return smp_processor_id(); | ||
947 | |||
945 | /* | 948 | /* |
946 | * If the current task is an RT task, then | 949 | * If the current task is an RT task, then |
947 | * try to see if we can wake this RT task up on another | 950 | * try to see if we can wake this RT task up on another |
@@ -999,7 +1002,7 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p) | |||
999 | /* | 1002 | /* |
1000 | * Preempt the current task with a newly woken task if needed: | 1003 | * Preempt the current task with a newly woken task if needed: |
1001 | */ | 1004 | */ |
1002 | static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int sync) | 1005 | static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int flags) |
1003 | { | 1006 | { |
1004 | if (p->prio < rq->curr->prio) { | 1007 | if (p->prio < rq->curr->prio) { |
1005 | resched_task(rq->curr); | 1008 | resched_task(rq->curr); |
diff --git a/kernel/smp.c b/kernel/smp.c index 94188b8ecc33..8e218500ab14 100644 --- a/kernel/smp.c +++ b/kernel/smp.c | |||
@@ -177,6 +177,11 @@ void generic_smp_call_function_interrupt(void) | |||
177 | int cpu = get_cpu(); | 177 | int cpu = get_cpu(); |
178 | 178 | ||
179 | /* | 179 | /* |
180 | * Shouldn't receive this interrupt on a cpu that is not yet online. | ||
181 | */ | ||
182 | WARN_ON_ONCE(!cpu_online(cpu)); | ||
183 | |||
184 | /* | ||
180 | * Ensure entry is visible on call_function_queue after we have | 185 | * Ensure entry is visible on call_function_queue after we have |
181 | * entered the IPI. See comment in smp_call_function_many. | 186 | * entered the IPI. See comment in smp_call_function_many. |
182 | * If we don't have this, then we may miss an entry on the list | 187 | * If we don't have this, then we may miss an entry on the list |
@@ -230,6 +235,11 @@ void generic_smp_call_function_single_interrupt(void) | |||
230 | unsigned int data_flags; | 235 | unsigned int data_flags; |
231 | LIST_HEAD(list); | 236 | LIST_HEAD(list); |
232 | 237 | ||
238 | /* | ||
239 | * Shouldn't receive this interrupt on a cpu that is not yet online. | ||
240 | */ | ||
241 | WARN_ON_ONCE(!cpu_online(smp_processor_id())); | ||
242 | |||
233 | spin_lock(&q->lock); | 243 | spin_lock(&q->lock); |
234 | list_replace_init(&q->list, &list); | 244 | list_replace_init(&q->list, &list); |
235 | spin_unlock(&q->lock); | 245 | spin_unlock(&q->lock); |
@@ -285,8 +295,14 @@ int smp_call_function_single(int cpu, void (*func) (void *info), void *info, | |||
285 | */ | 295 | */ |
286 | this_cpu = get_cpu(); | 296 | this_cpu = get_cpu(); |
287 | 297 | ||
288 | /* Can deadlock when called with interrupts disabled */ | 298 | /* |
289 | WARN_ON_ONCE(irqs_disabled() && !oops_in_progress); | 299 | * Can deadlock when called with interrupts disabled. |
300 | * We allow cpu's that are not yet online though, as no one else can | ||
301 | * send smp call function interrupt to this cpu and as such deadlocks | ||
302 | * can't happen. | ||
303 | */ | ||
304 | WARN_ON_ONCE(cpu_online(this_cpu) && irqs_disabled() | ||
305 | && !oops_in_progress); | ||
290 | 306 | ||
291 | if (cpu == this_cpu) { | 307 | if (cpu == this_cpu) { |
292 | local_irq_save(flags); | 308 | local_irq_save(flags); |
@@ -329,8 +345,14 @@ void __smp_call_function_single(int cpu, struct call_single_data *data, | |||
329 | { | 345 | { |
330 | csd_lock(data); | 346 | csd_lock(data); |
331 | 347 | ||
332 | /* Can deadlock when called with interrupts disabled */ | 348 | /* |
333 | WARN_ON_ONCE(wait && irqs_disabled() && !oops_in_progress); | 349 | * Can deadlock when called with interrupts disabled. |
350 | * We allow cpu's that are not yet online though, as no one else can | ||
351 | * send smp call function interrupt to this cpu and as such deadlocks | ||
352 | * can't happen. | ||
353 | */ | ||
354 | WARN_ON_ONCE(cpu_online(smp_processor_id()) && wait && irqs_disabled() | ||
355 | && !oops_in_progress); | ||
334 | 356 | ||
335 | generic_exec_single(cpu, data, wait); | 357 | generic_exec_single(cpu, data, wait); |
336 | } | 358 | } |
@@ -365,8 +387,14 @@ void smp_call_function_many(const struct cpumask *mask, | |||
365 | unsigned long flags; | 387 | unsigned long flags; |
366 | int cpu, next_cpu, this_cpu = smp_processor_id(); | 388 | int cpu, next_cpu, this_cpu = smp_processor_id(); |
367 | 389 | ||
368 | /* Can deadlock when called with interrupts disabled */ | 390 | /* |
369 | WARN_ON_ONCE(irqs_disabled() && !oops_in_progress); | 391 | * Can deadlock when called with interrupts disabled. |
392 | * We allow cpu's that are not yet online though, as no one else can | ||
393 | * send smp call function interrupt to this cpu and as such deadlocks | ||
394 | * can't happen. | ||
395 | */ | ||
396 | WARN_ON_ONCE(cpu_online(this_cpu) && irqs_disabled() | ||
397 | && !oops_in_progress); | ||
370 | 398 | ||
371 | /* So, what's a CPU they want? Ignoring this one. */ | 399 | /* So, what's a CPU they want? Ignoring this one. */ |
372 | cpu = cpumask_first_and(mask, cpu_online_mask); | 400 | cpu = cpumask_first_and(mask, cpu_online_mask); |
diff --git a/kernel/softirq.c b/kernel/softirq.c index 7db25067cd2d..f8749e5216e0 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c | |||
@@ -57,7 +57,7 @@ static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp | |||
57 | static DEFINE_PER_CPU(struct task_struct *, ksoftirqd); | 57 | static DEFINE_PER_CPU(struct task_struct *, ksoftirqd); |
58 | 58 | ||
59 | char *softirq_to_name[NR_SOFTIRQS] = { | 59 | char *softirq_to_name[NR_SOFTIRQS] = { |
60 | "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", | 60 | "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "BLOCK_IOPOLL", |
61 | "TASKLET", "SCHED", "HRTIMER", "RCU" | 61 | "TASKLET", "SCHED", "HRTIMER", "RCU" |
62 | }; | 62 | }; |
63 | 63 | ||
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 3125cff1c570..1a631ba684a4 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -91,6 +91,9 @@ extern int sysctl_nr_trim_pages; | |||
91 | #ifdef CONFIG_RCU_TORTURE_TEST | 91 | #ifdef CONFIG_RCU_TORTURE_TEST |
92 | extern int rcutorture_runnable; | 92 | extern int rcutorture_runnable; |
93 | #endif /* #ifdef CONFIG_RCU_TORTURE_TEST */ | 93 | #endif /* #ifdef CONFIG_RCU_TORTURE_TEST */ |
94 | #ifdef CONFIG_BLOCK | ||
95 | extern int blk_iopoll_enabled; | ||
96 | #endif | ||
94 | 97 | ||
95 | /* Constants used for minimum and maximum */ | 98 | /* Constants used for minimum and maximum */ |
96 | #ifdef CONFIG_DETECT_SOFTLOCKUP | 99 | #ifdef CONFIG_DETECT_SOFTLOCKUP |
@@ -997,7 +1000,16 @@ static struct ctl_table kern_table[] = { | |||
997 | .proc_handler = &proc_dointvec, | 1000 | .proc_handler = &proc_dointvec, |
998 | }, | 1001 | }, |
999 | #endif | 1002 | #endif |
1000 | 1003 | #ifdef CONFIG_BLOCK | |
1004 | { | ||
1005 | .ctl_name = CTL_UNNUMBERED, | ||
1006 | .procname = "blk_iopoll", | ||
1007 | .data = &blk_iopoll_enabled, | ||
1008 | .maxlen = sizeof(int), | ||
1009 | .mode = 0644, | ||
1010 | .proc_handler = &proc_dointvec, | ||
1011 | }, | ||
1012 | #endif | ||
1001 | /* | 1013 | /* |
1002 | * NOTE: do not add new entries to this table unless you have read | 1014 | * NOTE: do not add new entries to this table unless you have read |
1003 | * Documentation/sysctl/ctl_unnumbered.txt | 1015 | * Documentation/sysctl/ctl_unnumbered.txt |
diff --git a/kernel/taskstats.c b/kernel/taskstats.c index 888adbcca30c..ea8384d3caa7 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c | |||
@@ -108,7 +108,7 @@ static int prepare_reply(struct genl_info *info, u8 cmd, struct sk_buff **skbp, | |||
108 | /* | 108 | /* |
109 | * Send taskstats data in @skb to listener with nl_pid @pid | 109 | * Send taskstats data in @skb to listener with nl_pid @pid |
110 | */ | 110 | */ |
111 | static int send_reply(struct sk_buff *skb, pid_t pid) | 111 | static int send_reply(struct sk_buff *skb, struct genl_info *info) |
112 | { | 112 | { |
113 | struct genlmsghdr *genlhdr = nlmsg_data(nlmsg_hdr(skb)); | 113 | struct genlmsghdr *genlhdr = nlmsg_data(nlmsg_hdr(skb)); |
114 | void *reply = genlmsg_data(genlhdr); | 114 | void *reply = genlmsg_data(genlhdr); |
@@ -120,7 +120,7 @@ static int send_reply(struct sk_buff *skb, pid_t pid) | |||
120 | return rc; | 120 | return rc; |
121 | } | 121 | } |
122 | 122 | ||
123 | return genlmsg_unicast(skb, pid); | 123 | return genlmsg_reply(skb, info); |
124 | } | 124 | } |
125 | 125 | ||
126 | /* | 126 | /* |
@@ -150,7 +150,7 @@ static void send_cpu_listeners(struct sk_buff *skb, | |||
150 | if (!skb_next) | 150 | if (!skb_next) |
151 | break; | 151 | break; |
152 | } | 152 | } |
153 | rc = genlmsg_unicast(skb_cur, s->pid); | 153 | rc = genlmsg_unicast(&init_net, skb_cur, s->pid); |
154 | if (rc == -ECONNREFUSED) { | 154 | if (rc == -ECONNREFUSED) { |
155 | s->valid = 0; | 155 | s->valid = 0; |
156 | delcount++; | 156 | delcount++; |
@@ -418,7 +418,7 @@ static int cgroupstats_user_cmd(struct sk_buff *skb, struct genl_info *info) | |||
418 | goto err; | 418 | goto err; |
419 | } | 419 | } |
420 | 420 | ||
421 | rc = send_reply(rep_skb, info->snd_pid); | 421 | rc = send_reply(rep_skb, info); |
422 | 422 | ||
423 | err: | 423 | err: |
424 | fput_light(file, fput_needed); | 424 | fput_light(file, fput_needed); |
@@ -487,7 +487,7 @@ free_return_rc: | |||
487 | } else | 487 | } else |
488 | goto err; | 488 | goto err; |
489 | 489 | ||
490 | return send_reply(rep_skb, info->snd_pid); | 490 | return send_reply(rep_skb, info); |
491 | err: | 491 | err: |
492 | nlmsg_free(rep_skb); | 492 | nlmsg_free(rep_skb); |
493 | return rc; | 493 | return rc; |
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 1ea0d1234f4a..e71634604400 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig | |||
@@ -11,12 +11,18 @@ config NOP_TRACER | |||
11 | 11 | ||
12 | config HAVE_FTRACE_NMI_ENTER | 12 | config HAVE_FTRACE_NMI_ENTER |
13 | bool | 13 | bool |
14 | help | ||
15 | See Documentation/trace/ftrace-implementation.txt | ||
14 | 16 | ||
15 | config HAVE_FUNCTION_TRACER | 17 | config HAVE_FUNCTION_TRACER |
16 | bool | 18 | bool |
19 | help | ||
20 | See Documentation/trace/ftrace-implementation.txt | ||
17 | 21 | ||
18 | config HAVE_FUNCTION_GRAPH_TRACER | 22 | config HAVE_FUNCTION_GRAPH_TRACER |
19 | bool | 23 | bool |
24 | help | ||
25 | See Documentation/trace/ftrace-implementation.txt | ||
20 | 26 | ||
21 | config HAVE_FUNCTION_GRAPH_FP_TEST | 27 | config HAVE_FUNCTION_GRAPH_FP_TEST |
22 | bool | 28 | bool |
@@ -28,21 +34,25 @@ config HAVE_FUNCTION_GRAPH_FP_TEST | |||
28 | config HAVE_FUNCTION_TRACE_MCOUNT_TEST | 34 | config HAVE_FUNCTION_TRACE_MCOUNT_TEST |
29 | bool | 35 | bool |
30 | help | 36 | help |
31 | This gets selected when the arch tests the function_trace_stop | 37 | See Documentation/trace/ftrace-implementation.txt |
32 | variable at the mcount call site. Otherwise, this variable | ||
33 | is tested by the called function. | ||
34 | 38 | ||
35 | config HAVE_DYNAMIC_FTRACE | 39 | config HAVE_DYNAMIC_FTRACE |
36 | bool | 40 | bool |
41 | help | ||
42 | See Documentation/trace/ftrace-implementation.txt | ||
37 | 43 | ||
38 | config HAVE_FTRACE_MCOUNT_RECORD | 44 | config HAVE_FTRACE_MCOUNT_RECORD |
39 | bool | 45 | bool |
46 | help | ||
47 | See Documentation/trace/ftrace-implementation.txt | ||
40 | 48 | ||
41 | config HAVE_HW_BRANCH_TRACER | 49 | config HAVE_HW_BRANCH_TRACER |
42 | bool | 50 | bool |
43 | 51 | ||
44 | config HAVE_SYSCALL_TRACEPOINTS | 52 | config HAVE_SYSCALL_TRACEPOINTS |
45 | bool | 53 | bool |
54 | help | ||
55 | See Documentation/trace/ftrace-implementation.txt | ||
46 | 56 | ||
47 | config TRACER_MAX_TRACE | 57 | config TRACER_MAX_TRACE |
48 | bool | 58 | bool |
@@ -469,6 +479,18 @@ config FTRACE_STARTUP_TEST | |||
469 | functioning properly. It will do tests on all the configured | 479 | functioning properly. It will do tests on all the configured |
470 | tracers of ftrace. | 480 | tracers of ftrace. |
471 | 481 | ||
482 | config EVENT_TRACE_TEST_SYSCALLS | ||
483 | bool "Run selftest on syscall events" | ||
484 | depends on FTRACE_STARTUP_TEST | ||
485 | help | ||
486 | This option will also enable testing every syscall event. | ||
487 | It only enables the event and disables it and runs various loads | ||
488 | with the event enabled. This adds a bit more time for kernel boot | ||
489 | up since it runs this on every system call defined. | ||
490 | |||
491 | TBD - enable a way to actually call the syscalls as we test their | ||
492 | events | ||
493 | |||
472 | config MMIOTRACE | 494 | config MMIOTRACE |
473 | bool "Memory mapped IO tracing" | 495 | bool "Memory mapped IO tracing" |
474 | depends on HAVE_MMIOTRACE_SUPPORT && PCI | 496 | depends on HAVE_MMIOTRACE_SUPPORT && PCI |
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 8c804e24f96f..cc615f84751b 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c | |||
@@ -1323,11 +1323,10 @@ static int __init ftrace_dyn_table_alloc(unsigned long num_to_init) | |||
1323 | 1323 | ||
1324 | enum { | 1324 | enum { |
1325 | FTRACE_ITER_FILTER = (1 << 0), | 1325 | FTRACE_ITER_FILTER = (1 << 0), |
1326 | FTRACE_ITER_CONT = (1 << 1), | 1326 | FTRACE_ITER_NOTRACE = (1 << 1), |
1327 | FTRACE_ITER_NOTRACE = (1 << 2), | 1327 | FTRACE_ITER_FAILURES = (1 << 2), |
1328 | FTRACE_ITER_FAILURES = (1 << 3), | 1328 | FTRACE_ITER_PRINTALL = (1 << 3), |
1329 | FTRACE_ITER_PRINTALL = (1 << 4), | 1329 | FTRACE_ITER_HASH = (1 << 4), |
1330 | FTRACE_ITER_HASH = (1 << 5), | ||
1331 | }; | 1330 | }; |
1332 | 1331 | ||
1333 | #define FTRACE_BUFF_MAX (KSYM_SYMBOL_LEN+4) /* room for wildcards */ | 1332 | #define FTRACE_BUFF_MAX (KSYM_SYMBOL_LEN+4) /* room for wildcards */ |
@@ -1337,8 +1336,7 @@ struct ftrace_iterator { | |||
1337 | int hidx; | 1336 | int hidx; |
1338 | int idx; | 1337 | int idx; |
1339 | unsigned flags; | 1338 | unsigned flags; |
1340 | unsigned char buffer[FTRACE_BUFF_MAX+1]; | 1339 | struct trace_parser parser; |
1341 | unsigned buffer_idx; | ||
1342 | }; | 1340 | }; |
1343 | 1341 | ||
1344 | static void * | 1342 | static void * |
@@ -1407,7 +1405,7 @@ static int t_hash_show(struct seq_file *m, void *v) | |||
1407 | if (rec->ops->print) | 1405 | if (rec->ops->print) |
1408 | return rec->ops->print(m, rec->ip, rec->ops, rec->data); | 1406 | return rec->ops->print(m, rec->ip, rec->ops, rec->data); |
1409 | 1407 | ||
1410 | seq_printf(m, "%pf:%pf", (void *)rec->ip, (void *)rec->ops->func); | 1408 | seq_printf(m, "%ps:%ps", (void *)rec->ip, (void *)rec->ops->func); |
1411 | 1409 | ||
1412 | if (rec->data) | 1410 | if (rec->data) |
1413 | seq_printf(m, ":%p", rec->data); | 1411 | seq_printf(m, ":%p", rec->data); |
@@ -1517,7 +1515,7 @@ static int t_show(struct seq_file *m, void *v) | |||
1517 | if (!rec) | 1515 | if (!rec) |
1518 | return 0; | 1516 | return 0; |
1519 | 1517 | ||
1520 | seq_printf(m, "%pf\n", (void *)rec->ip); | 1518 | seq_printf(m, "%ps\n", (void *)rec->ip); |
1521 | 1519 | ||
1522 | return 0; | 1520 | return 0; |
1523 | } | 1521 | } |
@@ -1604,6 +1602,11 @@ ftrace_regex_open(struct inode *inode, struct file *file, int enable) | |||
1604 | if (!iter) | 1602 | if (!iter) |
1605 | return -ENOMEM; | 1603 | return -ENOMEM; |
1606 | 1604 | ||
1605 | if (trace_parser_get_init(&iter->parser, FTRACE_BUFF_MAX)) { | ||
1606 | kfree(iter); | ||
1607 | return -ENOMEM; | ||
1608 | } | ||
1609 | |||
1607 | mutex_lock(&ftrace_regex_lock); | 1610 | mutex_lock(&ftrace_regex_lock); |
1608 | if ((file->f_mode & FMODE_WRITE) && | 1611 | if ((file->f_mode & FMODE_WRITE) && |
1609 | (file->f_flags & O_TRUNC)) | 1612 | (file->f_flags & O_TRUNC)) |
@@ -2059,9 +2062,9 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, | |||
2059 | int i, len = 0; | 2062 | int i, len = 0; |
2060 | char *search; | 2063 | char *search; |
2061 | 2064 | ||
2062 | if (glob && (strcmp(glob, "*") || !strlen(glob))) | 2065 | if (glob && (strcmp(glob, "*") == 0 || !strlen(glob))) |
2063 | glob = NULL; | 2066 | glob = NULL; |
2064 | else { | 2067 | else if (glob) { |
2065 | int not; | 2068 | int not; |
2066 | 2069 | ||
2067 | type = ftrace_setup_glob(glob, strlen(glob), &search, ¬); | 2070 | type = ftrace_setup_glob(glob, strlen(glob), &search, ¬); |
@@ -2196,9 +2199,8 @@ ftrace_regex_write(struct file *file, const char __user *ubuf, | |||
2196 | size_t cnt, loff_t *ppos, int enable) | 2199 | size_t cnt, loff_t *ppos, int enable) |
2197 | { | 2200 | { |
2198 | struct ftrace_iterator *iter; | 2201 | struct ftrace_iterator *iter; |
2199 | char ch; | 2202 | struct trace_parser *parser; |
2200 | size_t read = 0; | 2203 | ssize_t ret, read; |
2201 | ssize_t ret; | ||
2202 | 2204 | ||
2203 | if (!cnt || cnt < 0) | 2205 | if (!cnt || cnt < 0) |
2204 | return 0; | 2206 | return 0; |
@@ -2211,72 +2213,23 @@ ftrace_regex_write(struct file *file, const char __user *ubuf, | |||
2211 | } else | 2213 | } else |
2212 | iter = file->private_data; | 2214 | iter = file->private_data; |
2213 | 2215 | ||
2214 | if (!*ppos) { | 2216 | parser = &iter->parser; |
2215 | iter->flags &= ~FTRACE_ITER_CONT; | 2217 | read = trace_get_user(parser, ubuf, cnt, ppos); |
2216 | iter->buffer_idx = 0; | ||
2217 | } | ||
2218 | |||
2219 | ret = get_user(ch, ubuf++); | ||
2220 | if (ret) | ||
2221 | goto out; | ||
2222 | read++; | ||
2223 | cnt--; | ||
2224 | 2218 | ||
2225 | /* | 2219 | if (trace_parser_loaded(parser) && |
2226 | * If the parser haven't finished with the last write, | 2220 | !trace_parser_cont(parser)) { |
2227 | * continue reading the user input without skipping spaces. | 2221 | ret = ftrace_process_regex(parser->buffer, |
2228 | */ | 2222 | parser->idx, enable); |
2229 | if (!(iter->flags & FTRACE_ITER_CONT)) { | ||
2230 | /* skip white space */ | ||
2231 | while (cnt && isspace(ch)) { | ||
2232 | ret = get_user(ch, ubuf++); | ||
2233 | if (ret) | ||
2234 | goto out; | ||
2235 | read++; | ||
2236 | cnt--; | ||
2237 | } | ||
2238 | |||
2239 | /* only spaces were written */ | ||
2240 | if (isspace(ch)) { | ||
2241 | *ppos += read; | ||
2242 | ret = read; | ||
2243 | goto out; | ||
2244 | } | ||
2245 | |||
2246 | iter->buffer_idx = 0; | ||
2247 | } | ||
2248 | |||
2249 | while (cnt && !isspace(ch)) { | ||
2250 | if (iter->buffer_idx < FTRACE_BUFF_MAX) | ||
2251 | iter->buffer[iter->buffer_idx++] = ch; | ||
2252 | else { | ||
2253 | ret = -EINVAL; | ||
2254 | goto out; | ||
2255 | } | ||
2256 | ret = get_user(ch, ubuf++); | ||
2257 | if (ret) | 2223 | if (ret) |
2258 | goto out; | 2224 | goto out; |
2259 | read++; | ||
2260 | cnt--; | ||
2261 | } | ||
2262 | 2225 | ||
2263 | if (isspace(ch)) { | 2226 | trace_parser_clear(parser); |
2264 | iter->buffer[iter->buffer_idx] = 0; | ||
2265 | ret = ftrace_process_regex(iter->buffer, | ||
2266 | iter->buffer_idx, enable); | ||
2267 | if (ret) | ||
2268 | goto out; | ||
2269 | iter->buffer_idx = 0; | ||
2270 | } else { | ||
2271 | iter->flags |= FTRACE_ITER_CONT; | ||
2272 | iter->buffer[iter->buffer_idx++] = ch; | ||
2273 | } | 2227 | } |
2274 | 2228 | ||
2275 | *ppos += read; | ||
2276 | ret = read; | 2229 | ret = read; |
2277 | out: | ||
2278 | mutex_unlock(&ftrace_regex_lock); | ||
2279 | 2230 | ||
2231 | mutex_unlock(&ftrace_regex_lock); | ||
2232 | out: | ||
2280 | return ret; | 2233 | return ret; |
2281 | } | 2234 | } |
2282 | 2235 | ||
@@ -2381,6 +2334,7 @@ ftrace_regex_release(struct inode *inode, struct file *file, int enable) | |||
2381 | { | 2334 | { |
2382 | struct seq_file *m = (struct seq_file *)file->private_data; | 2335 | struct seq_file *m = (struct seq_file *)file->private_data; |
2383 | struct ftrace_iterator *iter; | 2336 | struct ftrace_iterator *iter; |
2337 | struct trace_parser *parser; | ||
2384 | 2338 | ||
2385 | mutex_lock(&ftrace_regex_lock); | 2339 | mutex_lock(&ftrace_regex_lock); |
2386 | if (file->f_mode & FMODE_READ) { | 2340 | if (file->f_mode & FMODE_READ) { |
@@ -2390,9 +2344,10 @@ ftrace_regex_release(struct inode *inode, struct file *file, int enable) | |||
2390 | } else | 2344 | } else |
2391 | iter = file->private_data; | 2345 | iter = file->private_data; |
2392 | 2346 | ||
2393 | if (iter->buffer_idx) { | 2347 | parser = &iter->parser; |
2394 | iter->buffer[iter->buffer_idx] = 0; | 2348 | if (trace_parser_loaded(parser)) { |
2395 | ftrace_match_records(iter->buffer, iter->buffer_idx, enable); | 2349 | parser->buffer[parser->idx] = 0; |
2350 | ftrace_match_records(parser->buffer, parser->idx, enable); | ||
2396 | } | 2351 | } |
2397 | 2352 | ||
2398 | mutex_lock(&ftrace_lock); | 2353 | mutex_lock(&ftrace_lock); |
@@ -2400,7 +2355,9 @@ ftrace_regex_release(struct inode *inode, struct file *file, int enable) | |||
2400 | ftrace_run_update_code(FTRACE_ENABLE_CALLS); | 2355 | ftrace_run_update_code(FTRACE_ENABLE_CALLS); |
2401 | mutex_unlock(&ftrace_lock); | 2356 | mutex_unlock(&ftrace_lock); |
2402 | 2357 | ||
2358 | trace_parser_put(parser); | ||
2403 | kfree(iter); | 2359 | kfree(iter); |
2360 | |||
2404 | mutex_unlock(&ftrace_regex_lock); | 2361 | mutex_unlock(&ftrace_regex_lock); |
2405 | return 0; | 2362 | return 0; |
2406 | } | 2363 | } |
@@ -2499,7 +2456,7 @@ static int g_show(struct seq_file *m, void *v) | |||
2499 | return 0; | 2456 | return 0; |
2500 | } | 2457 | } |
2501 | 2458 | ||
2502 | seq_printf(m, "%pf\n", v); | 2459 | seq_printf(m, "%ps\n", (void *)*ptr); |
2503 | 2460 | ||
2504 | return 0; | 2461 | return 0; |
2505 | } | 2462 | } |
@@ -2602,12 +2559,10 @@ static ssize_t | |||
2602 | ftrace_graph_write(struct file *file, const char __user *ubuf, | 2559 | ftrace_graph_write(struct file *file, const char __user *ubuf, |
2603 | size_t cnt, loff_t *ppos) | 2560 | size_t cnt, loff_t *ppos) |
2604 | { | 2561 | { |
2605 | unsigned char buffer[FTRACE_BUFF_MAX+1]; | 2562 | struct trace_parser parser; |
2606 | unsigned long *array; | 2563 | unsigned long *array; |
2607 | size_t read = 0; | 2564 | size_t read = 0; |
2608 | ssize_t ret; | 2565 | ssize_t ret; |
2609 | int index = 0; | ||
2610 | char ch; | ||
2611 | 2566 | ||
2612 | if (!cnt || cnt < 0) | 2567 | if (!cnt || cnt < 0) |
2613 | return 0; | 2568 | return 0; |
@@ -2625,51 +2580,26 @@ ftrace_graph_write(struct file *file, const char __user *ubuf, | |||
2625 | } else | 2580 | } else |
2626 | array = file->private_data; | 2581 | array = file->private_data; |
2627 | 2582 | ||
2628 | ret = get_user(ch, ubuf++); | 2583 | if (trace_parser_get_init(&parser, FTRACE_BUFF_MAX)) { |
2629 | if (ret) | 2584 | ret = -ENOMEM; |
2630 | goto out; | 2585 | goto out; |
2631 | read++; | ||
2632 | cnt--; | ||
2633 | |||
2634 | /* skip white space */ | ||
2635 | while (cnt && isspace(ch)) { | ||
2636 | ret = get_user(ch, ubuf++); | ||
2637 | if (ret) | ||
2638 | goto out; | ||
2639 | read++; | ||
2640 | cnt--; | ||
2641 | } | 2586 | } |
2642 | 2587 | ||
2643 | if (isspace(ch)) { | 2588 | read = trace_get_user(&parser, ubuf, cnt, ppos); |
2644 | *ppos += read; | ||
2645 | ret = read; | ||
2646 | goto out; | ||
2647 | } | ||
2648 | 2589 | ||
2649 | while (cnt && !isspace(ch)) { | 2590 | if (trace_parser_loaded((&parser))) { |
2650 | if (index < FTRACE_BUFF_MAX) | 2591 | parser.buffer[parser.idx] = 0; |
2651 | buffer[index++] = ch; | 2592 | |
2652 | else { | 2593 | /* we allow only one expression at a time */ |
2653 | ret = -EINVAL; | 2594 | ret = ftrace_set_func(array, &ftrace_graph_count, |
2654 | goto out; | 2595 | parser.buffer); |
2655 | } | ||
2656 | ret = get_user(ch, ubuf++); | ||
2657 | if (ret) | 2596 | if (ret) |
2658 | goto out; | 2597 | goto out; |
2659 | read++; | ||
2660 | cnt--; | ||
2661 | } | 2598 | } |
2662 | buffer[index] = 0; | ||
2663 | |||
2664 | /* we allow only one expression at a time */ | ||
2665 | ret = ftrace_set_func(array, &ftrace_graph_count, buffer); | ||
2666 | if (ret) | ||
2667 | goto out; | ||
2668 | |||
2669 | file->f_pos += read; | ||
2670 | 2599 | ||
2671 | ret = read; | 2600 | ret = read; |
2672 | out: | 2601 | out: |
2602 | trace_parser_put(&parser); | ||
2673 | mutex_unlock(&graph_lock); | 2603 | mutex_unlock(&graph_lock); |
2674 | 2604 | ||
2675 | return ret; | 2605 | return ret; |
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 454e74e718cf..6eef38923b07 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c | |||
@@ -701,8 +701,8 @@ static int rb_head_page_set(struct ring_buffer_per_cpu *cpu_buffer, | |||
701 | 701 | ||
702 | val &= ~RB_FLAG_MASK; | 702 | val &= ~RB_FLAG_MASK; |
703 | 703 | ||
704 | ret = (unsigned long)cmpxchg(&list->next, | 704 | ret = cmpxchg((unsigned long *)&list->next, |
705 | val | old_flag, val | new_flag); | 705 | val | old_flag, val | new_flag); |
706 | 706 | ||
707 | /* check if the reader took the page */ | 707 | /* check if the reader took the page */ |
708 | if ((ret & ~RB_FLAG_MASK) != val) | 708 | if ((ret & ~RB_FLAG_MASK) != val) |
@@ -794,7 +794,7 @@ static int rb_head_page_replace(struct buffer_page *old, | |||
794 | val = *ptr & ~RB_FLAG_MASK; | 794 | val = *ptr & ~RB_FLAG_MASK; |
795 | val |= RB_PAGE_HEAD; | 795 | val |= RB_PAGE_HEAD; |
796 | 796 | ||
797 | ret = cmpxchg(ptr, val, &new->list); | 797 | ret = cmpxchg(ptr, val, (unsigned long)&new->list); |
798 | 798 | ||
799 | return ret == val; | 799 | return ret == val; |
800 | } | 800 | } |
@@ -2997,15 +2997,12 @@ static void rb_advance_iter(struct ring_buffer_iter *iter) | |||
2997 | } | 2997 | } |
2998 | 2998 | ||
2999 | static struct ring_buffer_event * | 2999 | static struct ring_buffer_event * |
3000 | rb_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) | 3000 | rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts) |
3001 | { | 3001 | { |
3002 | struct ring_buffer_per_cpu *cpu_buffer; | ||
3003 | struct ring_buffer_event *event; | 3002 | struct ring_buffer_event *event; |
3004 | struct buffer_page *reader; | 3003 | struct buffer_page *reader; |
3005 | int nr_loops = 0; | 3004 | int nr_loops = 0; |
3006 | 3005 | ||
3007 | cpu_buffer = buffer->buffers[cpu]; | ||
3008 | |||
3009 | again: | 3006 | again: |
3010 | /* | 3007 | /* |
3011 | * We repeat when a timestamp is encountered. It is possible | 3008 | * We repeat when a timestamp is encountered. It is possible |
@@ -3049,7 +3046,7 @@ rb_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) | |||
3049 | case RINGBUF_TYPE_DATA: | 3046 | case RINGBUF_TYPE_DATA: |
3050 | if (ts) { | 3047 | if (ts) { |
3051 | *ts = cpu_buffer->read_stamp + event->time_delta; | 3048 | *ts = cpu_buffer->read_stamp + event->time_delta; |
3052 | ring_buffer_normalize_time_stamp(buffer, | 3049 | ring_buffer_normalize_time_stamp(cpu_buffer->buffer, |
3053 | cpu_buffer->cpu, ts); | 3050 | cpu_buffer->cpu, ts); |
3054 | } | 3051 | } |
3055 | return event; | 3052 | return event; |
@@ -3168,7 +3165,7 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) | |||
3168 | local_irq_save(flags); | 3165 | local_irq_save(flags); |
3169 | if (dolock) | 3166 | if (dolock) |
3170 | spin_lock(&cpu_buffer->reader_lock); | 3167 | spin_lock(&cpu_buffer->reader_lock); |
3171 | event = rb_buffer_peek(buffer, cpu, ts); | 3168 | event = rb_buffer_peek(cpu_buffer, ts); |
3172 | if (event && event->type_len == RINGBUF_TYPE_PADDING) | 3169 | if (event && event->type_len == RINGBUF_TYPE_PADDING) |
3173 | rb_advance_reader(cpu_buffer); | 3170 | rb_advance_reader(cpu_buffer); |
3174 | if (dolock) | 3171 | if (dolock) |
@@ -3237,7 +3234,7 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts) | |||
3237 | if (dolock) | 3234 | if (dolock) |
3238 | spin_lock(&cpu_buffer->reader_lock); | 3235 | spin_lock(&cpu_buffer->reader_lock); |
3239 | 3236 | ||
3240 | event = rb_buffer_peek(buffer, cpu, ts); | 3237 | event = rb_buffer_peek(cpu_buffer, ts); |
3241 | if (event) | 3238 | if (event) |
3242 | rb_advance_reader(cpu_buffer); | 3239 | rb_advance_reader(cpu_buffer); |
3243 | 3240 | ||
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 5c75deeefe30..fd52a19dd172 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c | |||
@@ -339,6 +339,112 @@ static struct { | |||
339 | 339 | ||
340 | int trace_clock_id; | 340 | int trace_clock_id; |
341 | 341 | ||
342 | /* | ||
343 | * trace_parser_get_init - gets the buffer for trace parser | ||
344 | */ | ||
345 | int trace_parser_get_init(struct trace_parser *parser, int size) | ||
346 | { | ||
347 | memset(parser, 0, sizeof(*parser)); | ||
348 | |||
349 | parser->buffer = kmalloc(size, GFP_KERNEL); | ||
350 | if (!parser->buffer) | ||
351 | return 1; | ||
352 | |||
353 | parser->size = size; | ||
354 | return 0; | ||
355 | } | ||
356 | |||
357 | /* | ||
358 | * trace_parser_put - frees the buffer for trace parser | ||
359 | */ | ||
360 | void trace_parser_put(struct trace_parser *parser) | ||
361 | { | ||
362 | kfree(parser->buffer); | ||
363 | } | ||
364 | |||
365 | /* | ||
366 | * trace_get_user - reads the user input string separated by space | ||
367 | * (matched by isspace(ch)) | ||
368 | * | ||
369 | * For each string found the 'struct trace_parser' is updated, | ||
370 | * and the function returns. | ||
371 | * | ||
372 | * Returns number of bytes read. | ||
373 | * | ||
374 | * See kernel/trace/trace.h for 'struct trace_parser' details. | ||
375 | */ | ||
376 | int trace_get_user(struct trace_parser *parser, const char __user *ubuf, | ||
377 | size_t cnt, loff_t *ppos) | ||
378 | { | ||
379 | char ch; | ||
380 | size_t read = 0; | ||
381 | ssize_t ret; | ||
382 | |||
383 | if (!*ppos) | ||
384 | trace_parser_clear(parser); | ||
385 | |||
386 | ret = get_user(ch, ubuf++); | ||
387 | if (ret) | ||
388 | goto out; | ||
389 | |||
390 | read++; | ||
391 | cnt--; | ||
392 | |||
393 | /* | ||
394 | * The parser is not finished with the last write, | ||
395 | * continue reading the user input without skipping spaces. | ||
396 | */ | ||
397 | if (!parser->cont) { | ||
398 | /* skip white space */ | ||
399 | while (cnt && isspace(ch)) { | ||
400 | ret = get_user(ch, ubuf++); | ||
401 | if (ret) | ||
402 | goto out; | ||
403 | read++; | ||
404 | cnt--; | ||
405 | } | ||
406 | |||
407 | /* only spaces were written */ | ||
408 | if (isspace(ch)) { | ||
409 | *ppos += read; | ||
410 | ret = read; | ||
411 | goto out; | ||
412 | } | ||
413 | |||
414 | parser->idx = 0; | ||
415 | } | ||
416 | |||
417 | /* read the non-space input */ | ||
418 | while (cnt && !isspace(ch)) { | ||
419 | if (parser->idx < parser->size) | ||
420 | parser->buffer[parser->idx++] = ch; | ||
421 | else { | ||
422 | ret = -EINVAL; | ||
423 | goto out; | ||
424 | } | ||
425 | ret = get_user(ch, ubuf++); | ||
426 | if (ret) | ||
427 | goto out; | ||
428 | read++; | ||
429 | cnt--; | ||
430 | } | ||
431 | |||
432 | /* We either got finished input or we have to wait for another call. */ | ||
433 | if (isspace(ch)) { | ||
434 | parser->buffer[parser->idx] = 0; | ||
435 | parser->cont = false; | ||
436 | } else { | ||
437 | parser->cont = true; | ||
438 | parser->buffer[parser->idx++] = ch; | ||
439 | } | ||
440 | |||
441 | *ppos += read; | ||
442 | ret = read; | ||
443 | |||
444 | out: | ||
445 | return ret; | ||
446 | } | ||
447 | |||
342 | ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt) | 448 | ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt) |
343 | { | 449 | { |
344 | int len; | 450 | int len; |
@@ -719,6 +825,11 @@ static void trace_init_cmdlines(void) | |||
719 | cmdline_idx = 0; | 825 | cmdline_idx = 0; |
720 | } | 826 | } |
721 | 827 | ||
828 | int is_tracing_stopped(void) | ||
829 | { | ||
830 | return trace_stop_count; | ||
831 | } | ||
832 | |||
722 | /** | 833 | /** |
723 | * ftrace_off_permanent - disable all ftrace code permanently | 834 | * ftrace_off_permanent - disable all ftrace code permanently |
724 | * | 835 | * |
@@ -886,7 +997,7 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags, | |||
886 | 997 | ||
887 | entry->preempt_count = pc & 0xff; | 998 | entry->preempt_count = pc & 0xff; |
888 | entry->pid = (tsk) ? tsk->pid : 0; | 999 | entry->pid = (tsk) ? tsk->pid : 0; |
889 | entry->tgid = (tsk) ? tsk->tgid : 0; | 1000 | entry->lock_depth = (tsk) ? tsk->lock_depth : 0; |
890 | entry->flags = | 1001 | entry->flags = |
891 | #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT | 1002 | #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT |
892 | (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) | | 1003 | (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) | |
@@ -1068,6 +1179,7 @@ ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc) | |||
1068 | return; | 1179 | return; |
1069 | entry = ring_buffer_event_data(event); | 1180 | entry = ring_buffer_event_data(event); |
1070 | 1181 | ||
1182 | entry->tgid = current->tgid; | ||
1071 | memset(&entry->caller, 0, sizeof(entry->caller)); | 1183 | memset(&entry->caller, 0, sizeof(entry->caller)); |
1072 | 1184 | ||
1073 | trace.nr_entries = 0; | 1185 | trace.nr_entries = 0; |
@@ -1094,6 +1206,7 @@ ftrace_trace_special(void *__tr, | |||
1094 | unsigned long arg1, unsigned long arg2, unsigned long arg3, | 1206 | unsigned long arg1, unsigned long arg2, unsigned long arg3, |
1095 | int pc) | 1207 | int pc) |
1096 | { | 1208 | { |
1209 | struct ftrace_event_call *call = &event_special; | ||
1097 | struct ring_buffer_event *event; | 1210 | struct ring_buffer_event *event; |
1098 | struct trace_array *tr = __tr; | 1211 | struct trace_array *tr = __tr; |
1099 | struct ring_buffer *buffer = tr->buffer; | 1212 | struct ring_buffer *buffer = tr->buffer; |
@@ -1107,7 +1220,9 @@ ftrace_trace_special(void *__tr, | |||
1107 | entry->arg1 = arg1; | 1220 | entry->arg1 = arg1; |
1108 | entry->arg2 = arg2; | 1221 | entry->arg2 = arg2; |
1109 | entry->arg3 = arg3; | 1222 | entry->arg3 = arg3; |
1110 | trace_buffer_unlock_commit(buffer, event, 0, pc); | 1223 | |
1224 | if (!filter_check_discard(call, entry, buffer, event)) | ||
1225 | trace_buffer_unlock_commit(buffer, event, 0, pc); | ||
1111 | } | 1226 | } |
1112 | 1227 | ||
1113 | void | 1228 | void |
@@ -1530,10 +1645,10 @@ static void print_lat_help_header(struct seq_file *m) | |||
1530 | seq_puts(m, "# | / _----=> need-resched \n"); | 1645 | seq_puts(m, "# | / _----=> need-resched \n"); |
1531 | seq_puts(m, "# || / _---=> hardirq/softirq \n"); | 1646 | seq_puts(m, "# || / _---=> hardirq/softirq \n"); |
1532 | seq_puts(m, "# ||| / _--=> preempt-depth \n"); | 1647 | seq_puts(m, "# ||| / _--=> preempt-depth \n"); |
1533 | seq_puts(m, "# |||| / \n"); | 1648 | seq_puts(m, "# |||| /_--=> lock-depth \n"); |
1534 | seq_puts(m, "# ||||| delay \n"); | 1649 | seq_puts(m, "# |||||/ delay \n"); |
1535 | seq_puts(m, "# cmd pid ||||| time | caller \n"); | 1650 | seq_puts(m, "# cmd pid |||||| time | caller \n"); |
1536 | seq_puts(m, "# \\ / ||||| \\ | / \n"); | 1651 | seq_puts(m, "# \\ / |||||| \\ | / \n"); |
1537 | } | 1652 | } |
1538 | 1653 | ||
1539 | static void print_func_help_header(struct seq_file *m) | 1654 | static void print_func_help_header(struct seq_file *m) |
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index fa1dccb579d5..86bcff94791a 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h | |||
@@ -7,6 +7,7 @@ | |||
7 | #include <linux/clocksource.h> | 7 | #include <linux/clocksource.h> |
8 | #include <linux/ring_buffer.h> | 8 | #include <linux/ring_buffer.h> |
9 | #include <linux/mmiotrace.h> | 9 | #include <linux/mmiotrace.h> |
10 | #include <linux/tracepoint.h> | ||
10 | #include <linux/ftrace.h> | 11 | #include <linux/ftrace.h> |
11 | #include <trace/boot.h> | 12 | #include <trace/boot.h> |
12 | #include <linux/kmemtrace.h> | 13 | #include <linux/kmemtrace.h> |
@@ -42,157 +43,54 @@ enum trace_type { | |||
42 | __TRACE_LAST_TYPE, | 43 | __TRACE_LAST_TYPE, |
43 | }; | 44 | }; |
44 | 45 | ||
45 | /* | 46 | enum kmemtrace_type_id { |
46 | * Function trace entry - function address and parent function addres: | 47 | KMEMTRACE_TYPE_KMALLOC = 0, /* kmalloc() or kfree(). */ |
47 | */ | 48 | KMEMTRACE_TYPE_CACHE, /* kmem_cache_*(). */ |
48 | struct ftrace_entry { | 49 | KMEMTRACE_TYPE_PAGES, /* __get_free_pages() and friends. */ |
49 | struct trace_entry ent; | ||
50 | unsigned long ip; | ||
51 | unsigned long parent_ip; | ||
52 | }; | ||
53 | |||
54 | /* Function call entry */ | ||
55 | struct ftrace_graph_ent_entry { | ||
56 | struct trace_entry ent; | ||
57 | struct ftrace_graph_ent graph_ent; | ||
58 | }; | 50 | }; |
59 | 51 | ||
60 | /* Function return entry */ | ||
61 | struct ftrace_graph_ret_entry { | ||
62 | struct trace_entry ent; | ||
63 | struct ftrace_graph_ret ret; | ||
64 | }; | ||
65 | extern struct tracer boot_tracer; | 52 | extern struct tracer boot_tracer; |
66 | 53 | ||
67 | /* | 54 | #undef __field |
68 | * Context switch trace entry - which task (and prio) we switched from/to: | 55 | #define __field(type, item) type item; |
69 | */ | ||
70 | struct ctx_switch_entry { | ||
71 | struct trace_entry ent; | ||
72 | unsigned int prev_pid; | ||
73 | unsigned char prev_prio; | ||
74 | unsigned char prev_state; | ||
75 | unsigned int next_pid; | ||
76 | unsigned char next_prio; | ||
77 | unsigned char next_state; | ||
78 | unsigned int next_cpu; | ||
79 | }; | ||
80 | |||
81 | /* | ||
82 | * Special (free-form) trace entry: | ||
83 | */ | ||
84 | struct special_entry { | ||
85 | struct trace_entry ent; | ||
86 | unsigned long arg1; | ||
87 | unsigned long arg2; | ||
88 | unsigned long arg3; | ||
89 | }; | ||
90 | |||
91 | /* | ||
92 | * Stack-trace entry: | ||
93 | */ | ||
94 | |||
95 | #define FTRACE_STACK_ENTRIES 8 | ||
96 | |||
97 | struct stack_entry { | ||
98 | struct trace_entry ent; | ||
99 | unsigned long caller[FTRACE_STACK_ENTRIES]; | ||
100 | }; | ||
101 | |||
102 | struct userstack_entry { | ||
103 | struct trace_entry ent; | ||
104 | unsigned long caller[FTRACE_STACK_ENTRIES]; | ||
105 | }; | ||
106 | |||
107 | /* | ||
108 | * trace_printk entry: | ||
109 | */ | ||
110 | struct bprint_entry { | ||
111 | struct trace_entry ent; | ||
112 | unsigned long ip; | ||
113 | const char *fmt; | ||
114 | u32 buf[]; | ||
115 | }; | ||
116 | 56 | ||
117 | struct print_entry { | 57 | #undef __field_struct |
118 | struct trace_entry ent; | 58 | #define __field_struct(type, item) __field(type, item) |
119 | unsigned long ip; | ||
120 | char buf[]; | ||
121 | }; | ||
122 | 59 | ||
123 | #define TRACE_OLD_SIZE 88 | 60 | #undef __field_desc |
61 | #define __field_desc(type, container, item) | ||
124 | 62 | ||
125 | struct trace_field_cont { | 63 | #undef __array |
126 | unsigned char type; | 64 | #define __array(type, item, size) type item[size]; |
127 | /* Temporary till we get rid of this completely */ | ||
128 | char buf[TRACE_OLD_SIZE - 1]; | ||
129 | }; | ||
130 | 65 | ||
131 | struct trace_mmiotrace_rw { | 66 | #undef __array_desc |
132 | struct trace_entry ent; | 67 | #define __array_desc(type, container, item, size) |
133 | struct mmiotrace_rw rw; | ||
134 | }; | ||
135 | 68 | ||
136 | struct trace_mmiotrace_map { | 69 | #undef __dynamic_array |
137 | struct trace_entry ent; | 70 | #define __dynamic_array(type, item) type item[]; |
138 | struct mmiotrace_map map; | ||
139 | }; | ||
140 | 71 | ||
141 | struct trace_boot_call { | 72 | #undef F_STRUCT |
142 | struct trace_entry ent; | 73 | #define F_STRUCT(args...) args |
143 | struct boot_trace_call boot_call; | ||
144 | }; | ||
145 | 74 | ||
146 | struct trace_boot_ret { | 75 | #undef FTRACE_ENTRY |
147 | struct trace_entry ent; | 76 | #define FTRACE_ENTRY(name, struct_name, id, tstruct, print) \ |
148 | struct boot_trace_ret boot_ret; | 77 | struct struct_name { \ |
149 | }; | 78 | struct trace_entry ent; \ |
150 | 79 | tstruct \ | |
151 | #define TRACE_FUNC_SIZE 30 | 80 | } |
152 | #define TRACE_FILE_SIZE 20 | ||
153 | struct trace_branch { | ||
154 | struct trace_entry ent; | ||
155 | unsigned line; | ||
156 | char func[TRACE_FUNC_SIZE+1]; | ||
157 | char file[TRACE_FILE_SIZE+1]; | ||
158 | char correct; | ||
159 | }; | ||
160 | |||
161 | struct hw_branch_entry { | ||
162 | struct trace_entry ent; | ||
163 | u64 from; | ||
164 | u64 to; | ||
165 | }; | ||
166 | |||
167 | struct trace_power { | ||
168 | struct trace_entry ent; | ||
169 | struct power_trace state_data; | ||
170 | }; | ||
171 | 81 | ||
172 | enum kmemtrace_type_id { | 82 | #undef TP_ARGS |
173 | KMEMTRACE_TYPE_KMALLOC = 0, /* kmalloc() or kfree(). */ | 83 | #define TP_ARGS(args...) args |
174 | KMEMTRACE_TYPE_CACHE, /* kmem_cache_*(). */ | ||
175 | KMEMTRACE_TYPE_PAGES, /* __get_free_pages() and friends. */ | ||
176 | }; | ||
177 | 84 | ||
178 | struct kmemtrace_alloc_entry { | 85 | #undef FTRACE_ENTRY_DUP |
179 | struct trace_entry ent; | 86 | #define FTRACE_ENTRY_DUP(name, name_struct, id, tstruct, printk) |
180 | enum kmemtrace_type_id type_id; | ||
181 | unsigned long call_site; | ||
182 | const void *ptr; | ||
183 | size_t bytes_req; | ||
184 | size_t bytes_alloc; | ||
185 | gfp_t gfp_flags; | ||
186 | int node; | ||
187 | }; | ||
188 | 87 | ||
189 | struct kmemtrace_free_entry { | 88 | #include "trace_entries.h" |
190 | struct trace_entry ent; | ||
191 | enum kmemtrace_type_id type_id; | ||
192 | unsigned long call_site; | ||
193 | const void *ptr; | ||
194 | }; | ||
195 | 89 | ||
90 | /* | ||
91 | * syscalls are special, and need special handling, this is why | ||
92 | * they are not included in trace_entries.h | ||
93 | */ | ||
196 | struct syscall_trace_enter { | 94 | struct syscall_trace_enter { |
197 | struct trace_entry ent; | 95 | struct trace_entry ent; |
198 | int nr; | 96 | int nr; |
@@ -205,13 +103,12 @@ struct syscall_trace_exit { | |||
205 | unsigned long ret; | 103 | unsigned long ret; |
206 | }; | 104 | }; |
207 | 105 | ||
208 | |||
209 | /* | 106 | /* |
210 | * trace_flag_type is an enumeration that holds different | 107 | * trace_flag_type is an enumeration that holds different |
211 | * states when a trace occurs. These are: | 108 | * states when a trace occurs. These are: |
212 | * IRQS_OFF - interrupts were disabled | 109 | * IRQS_OFF - interrupts were disabled |
213 | * IRQS_NOSUPPORT - arch does not support irqs_disabled_flags | 110 | * IRQS_NOSUPPORT - arch does not support irqs_disabled_flags |
214 | * NEED_RESCED - reschedule is requested | 111 | * NEED_RESCHED - reschedule is requested |
215 | * HARDIRQ - inside an interrupt handler | 112 | * HARDIRQ - inside an interrupt handler |
216 | * SOFTIRQ - inside a softirq handler | 113 | * SOFTIRQ - inside a softirq handler |
217 | */ | 114 | */ |
@@ -390,7 +287,6 @@ struct tracer { | |||
390 | struct tracer *next; | 287 | struct tracer *next; |
391 | int print_max; | 288 | int print_max; |
392 | struct tracer_flags *flags; | 289 | struct tracer_flags *flags; |
393 | struct tracer_stat *stats; | ||
394 | }; | 290 | }; |
395 | 291 | ||
396 | 292 | ||
@@ -469,6 +365,7 @@ void tracing_stop_sched_switch_record(void); | |||
469 | void tracing_start_sched_switch_record(void); | 365 | void tracing_start_sched_switch_record(void); |
470 | int register_tracer(struct tracer *type); | 366 | int register_tracer(struct tracer *type); |
471 | void unregister_tracer(struct tracer *type); | 367 | void unregister_tracer(struct tracer *type); |
368 | int is_tracing_stopped(void); | ||
472 | 369 | ||
473 | extern unsigned long nsecs_to_usecs(unsigned long nsecs); | 370 | extern unsigned long nsecs_to_usecs(unsigned long nsecs); |
474 | 371 | ||
@@ -509,20 +406,6 @@ static inline void __trace_stack(struct trace_array *tr, unsigned long flags, | |||
509 | 406 | ||
510 | extern cycle_t ftrace_now(int cpu); | 407 | extern cycle_t ftrace_now(int cpu); |
511 | 408 | ||
512 | #ifdef CONFIG_CONTEXT_SWITCH_TRACER | ||
513 | typedef void | ||
514 | (*tracer_switch_func_t)(void *private, | ||
515 | void *__rq, | ||
516 | struct task_struct *prev, | ||
517 | struct task_struct *next); | ||
518 | |||
519 | struct tracer_switch_ops { | ||
520 | tracer_switch_func_t func; | ||
521 | void *private; | ||
522 | struct tracer_switch_ops *next; | ||
523 | }; | ||
524 | #endif /* CONFIG_CONTEXT_SWITCH_TRACER */ | ||
525 | |||
526 | extern void trace_find_cmdline(int pid, char comm[]); | 409 | extern void trace_find_cmdline(int pid, char comm[]); |
527 | 410 | ||
528 | #ifdef CONFIG_DYNAMIC_FTRACE | 411 | #ifdef CONFIG_DYNAMIC_FTRACE |
@@ -638,6 +521,41 @@ static inline int ftrace_trace_task(struct task_struct *task) | |||
638 | #endif | 521 | #endif |
639 | 522 | ||
640 | /* | 523 | /* |
524 | * struct trace_parser - servers for reading the user input separated by spaces | ||
525 | * @cont: set if the input is not complete - no final space char was found | ||
526 | * @buffer: holds the parsed user input | ||
527 | * @idx: user input lenght | ||
528 | * @size: buffer size | ||
529 | */ | ||
530 | struct trace_parser { | ||
531 | bool cont; | ||
532 | char *buffer; | ||
533 | unsigned idx; | ||
534 | unsigned size; | ||
535 | }; | ||
536 | |||
537 | static inline bool trace_parser_loaded(struct trace_parser *parser) | ||
538 | { | ||
539 | return (parser->idx != 0); | ||
540 | } | ||
541 | |||
542 | static inline bool trace_parser_cont(struct trace_parser *parser) | ||
543 | { | ||
544 | return parser->cont; | ||
545 | } | ||
546 | |||
547 | static inline void trace_parser_clear(struct trace_parser *parser) | ||
548 | { | ||
549 | parser->cont = false; | ||
550 | parser->idx = 0; | ||
551 | } | ||
552 | |||
553 | extern int trace_parser_get_init(struct trace_parser *parser, int size); | ||
554 | extern void trace_parser_put(struct trace_parser *parser); | ||
555 | extern int trace_get_user(struct trace_parser *parser, const char __user *ubuf, | ||
556 | size_t cnt, loff_t *ppos); | ||
557 | |||
558 | /* | ||
641 | * trace_iterator_flags is an enumeration that defines bit | 559 | * trace_iterator_flags is an enumeration that defines bit |
642 | * positions into trace_flags that controls the output. | 560 | * positions into trace_flags that controls the output. |
643 | * | 561 | * |
@@ -823,58 +741,18 @@ filter_check_discard(struct ftrace_event_call *call, void *rec, | |||
823 | return 0; | 741 | return 0; |
824 | } | 742 | } |
825 | 743 | ||
826 | #define DEFINE_COMPARISON_PRED(type) \ | ||
827 | static int filter_pred_##type(struct filter_pred *pred, void *event, \ | ||
828 | int val1, int val2) \ | ||
829 | { \ | ||
830 | type *addr = (type *)(event + pred->offset); \ | ||
831 | type val = (type)pred->val; \ | ||
832 | int match = 0; \ | ||
833 | \ | ||
834 | switch (pred->op) { \ | ||
835 | case OP_LT: \ | ||
836 | match = (*addr < val); \ | ||
837 | break; \ | ||
838 | case OP_LE: \ | ||
839 | match = (*addr <= val); \ | ||
840 | break; \ | ||
841 | case OP_GT: \ | ||
842 | match = (*addr > val); \ | ||
843 | break; \ | ||
844 | case OP_GE: \ | ||
845 | match = (*addr >= val); \ | ||
846 | break; \ | ||
847 | default: \ | ||
848 | break; \ | ||
849 | } \ | ||
850 | \ | ||
851 | return match; \ | ||
852 | } | ||
853 | |||
854 | #define DEFINE_EQUALITY_PRED(size) \ | ||
855 | static int filter_pred_##size(struct filter_pred *pred, void *event, \ | ||
856 | int val1, int val2) \ | ||
857 | { \ | ||
858 | u##size *addr = (u##size *)(event + pred->offset); \ | ||
859 | u##size val = (u##size)pred->val; \ | ||
860 | int match; \ | ||
861 | \ | ||
862 | match = (val == *addr) ^ pred->not; \ | ||
863 | \ | ||
864 | return match; \ | ||
865 | } | ||
866 | |||
867 | extern struct mutex event_mutex; | 744 | extern struct mutex event_mutex; |
868 | extern struct list_head ftrace_events; | 745 | extern struct list_head ftrace_events; |
869 | 746 | ||
870 | extern const char *__start___trace_bprintk_fmt[]; | 747 | extern const char *__start___trace_bprintk_fmt[]; |
871 | extern const char *__stop___trace_bprintk_fmt[]; | 748 | extern const char *__stop___trace_bprintk_fmt[]; |
872 | 749 | ||
873 | #undef TRACE_EVENT_FORMAT | 750 | #undef FTRACE_ENTRY |
874 | #define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \ | 751 | #define FTRACE_ENTRY(call, struct_name, id, tstruct, print) \ |
875 | extern struct ftrace_event_call event_##call; | 752 | extern struct ftrace_event_call event_##call; |
876 | #undef TRACE_EVENT_FORMAT_NOFILTER | 753 | #undef FTRACE_ENTRY_DUP |
877 | #define TRACE_EVENT_FORMAT_NOFILTER(call, proto, args, fmt, tstruct, tpfmt) | 754 | #define FTRACE_ENTRY_DUP(call, struct_name, id, tstruct, print) \ |
878 | #include "trace_event_types.h" | 755 | FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print)) |
756 | #include "trace_entries.h" | ||
879 | 757 | ||
880 | #endif /* _LINUX_KERNEL_TRACE_H */ | 758 | #endif /* _LINUX_KERNEL_TRACE_H */ |
diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c index 19bfc75d467e..c21d5f3956ad 100644 --- a/kernel/trace/trace_boot.c +++ b/kernel/trace/trace_boot.c | |||
@@ -129,6 +129,7 @@ struct tracer boot_tracer __read_mostly = | |||
129 | 129 | ||
130 | void trace_boot_call(struct boot_trace_call *bt, initcall_t fn) | 130 | void trace_boot_call(struct boot_trace_call *bt, initcall_t fn) |
131 | { | 131 | { |
132 | struct ftrace_event_call *call = &event_boot_call; | ||
132 | struct ring_buffer_event *event; | 133 | struct ring_buffer_event *event; |
133 | struct ring_buffer *buffer; | 134 | struct ring_buffer *buffer; |
134 | struct trace_boot_call *entry; | 135 | struct trace_boot_call *entry; |
@@ -150,13 +151,15 @@ void trace_boot_call(struct boot_trace_call *bt, initcall_t fn) | |||
150 | goto out; | 151 | goto out; |
151 | entry = ring_buffer_event_data(event); | 152 | entry = ring_buffer_event_data(event); |
152 | entry->boot_call = *bt; | 153 | entry->boot_call = *bt; |
153 | trace_buffer_unlock_commit(buffer, event, 0, 0); | 154 | if (!filter_check_discard(call, entry, buffer, event)) |
155 | trace_buffer_unlock_commit(buffer, event, 0, 0); | ||
154 | out: | 156 | out: |
155 | preempt_enable(); | 157 | preempt_enable(); |
156 | } | 158 | } |
157 | 159 | ||
158 | void trace_boot_ret(struct boot_trace_ret *bt, initcall_t fn) | 160 | void trace_boot_ret(struct boot_trace_ret *bt, initcall_t fn) |
159 | { | 161 | { |
162 | struct ftrace_event_call *call = &event_boot_ret; | ||
160 | struct ring_buffer_event *event; | 163 | struct ring_buffer_event *event; |
161 | struct ring_buffer *buffer; | 164 | struct ring_buffer *buffer; |
162 | struct trace_boot_ret *entry; | 165 | struct trace_boot_ret *entry; |
@@ -175,7 +178,8 @@ void trace_boot_ret(struct boot_trace_ret *bt, initcall_t fn) | |||
175 | goto out; | 178 | goto out; |
176 | entry = ring_buffer_event_data(event); | 179 | entry = ring_buffer_event_data(event); |
177 | entry->boot_ret = *bt; | 180 | entry->boot_ret = *bt; |
178 | trace_buffer_unlock_commit(buffer, event, 0, 0); | 181 | if (!filter_check_discard(call, entry, buffer, event)) |
182 | trace_buffer_unlock_commit(buffer, event, 0, 0); | ||
179 | out: | 183 | out: |
180 | preempt_enable(); | 184 | preempt_enable(); |
181 | } | 185 | } |
diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c index b588fd81f7f9..20c5f92e28a8 100644 --- a/kernel/trace/trace_clock.c +++ b/kernel/trace/trace_clock.c | |||
@@ -66,10 +66,14 @@ u64 notrace trace_clock(void) | |||
66 | * Used by plugins that need globally coherent timestamps. | 66 | * Used by plugins that need globally coherent timestamps. |
67 | */ | 67 | */ |
68 | 68 | ||
69 | static u64 prev_trace_clock_time; | 69 | /* keep prev_time and lock in the same cacheline. */ |
70 | 70 | static struct { | |
71 | static raw_spinlock_t trace_clock_lock ____cacheline_aligned_in_smp = | 71 | u64 prev_time; |
72 | (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; | 72 | raw_spinlock_t lock; |
73 | } trace_clock_struct ____cacheline_aligned_in_smp = | ||
74 | { | ||
75 | .lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED, | ||
76 | }; | ||
73 | 77 | ||
74 | u64 notrace trace_clock_global(void) | 78 | u64 notrace trace_clock_global(void) |
75 | { | 79 | { |
@@ -88,19 +92,19 @@ u64 notrace trace_clock_global(void) | |||
88 | if (unlikely(in_nmi())) | 92 | if (unlikely(in_nmi())) |
89 | goto out; | 93 | goto out; |
90 | 94 | ||
91 | __raw_spin_lock(&trace_clock_lock); | 95 | __raw_spin_lock(&trace_clock_struct.lock); |
92 | 96 | ||
93 | /* | 97 | /* |
94 | * TODO: if this happens often then maybe we should reset | 98 | * TODO: if this happens often then maybe we should reset |
95 | * my_scd->clock to prev_trace_clock_time+1, to make sure | 99 | * my_scd->clock to prev_time+1, to make sure |
96 | * we start ticking with the local clock from now on? | 100 | * we start ticking with the local clock from now on? |
97 | */ | 101 | */ |
98 | if ((s64)(now - prev_trace_clock_time) < 0) | 102 | if ((s64)(now - trace_clock_struct.prev_time) < 0) |
99 | now = prev_trace_clock_time + 1; | 103 | now = trace_clock_struct.prev_time + 1; |
100 | 104 | ||
101 | prev_trace_clock_time = now; | 105 | trace_clock_struct.prev_time = now; |
102 | 106 | ||
103 | __raw_spin_unlock(&trace_clock_lock); | 107 | __raw_spin_unlock(&trace_clock_struct.lock); |
104 | 108 | ||
105 | out: | 109 | out: |
106 | raw_local_irq_restore(flags); | 110 | raw_local_irq_restore(flags); |
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h new file mode 100644 index 000000000000..a431748ddd6e --- /dev/null +++ b/kernel/trace/trace_entries.h | |||
@@ -0,0 +1,383 @@ | |||
1 | /* | ||
2 | * This file defines the trace event structures that go into the ring | ||
3 | * buffer directly. They are created via macros so that changes for them | ||
4 | * appear in the format file. Using macros will automate this process. | ||
5 | * | ||
6 | * The macro used to create a ftrace data structure is: | ||
7 | * | ||
8 | * FTRACE_ENTRY( name, struct_name, id, structure, print ) | ||
9 | * | ||
10 | * @name: the name used the event name, as well as the name of | ||
11 | * the directory that holds the format file. | ||
12 | * | ||
13 | * @struct_name: the name of the structure that is created. | ||
14 | * | ||
15 | * @id: The event identifier that is used to detect what event | ||
16 | * this is from the ring buffer. | ||
17 | * | ||
18 | * @structure: the structure layout | ||
19 | * | ||
20 | * - __field( type, item ) | ||
21 | * This is equivalent to declaring | ||
22 | * type item; | ||
23 | * in the structure. | ||
24 | * - __array( type, item, size ) | ||
25 | * This is equivalent to declaring | ||
26 | * type item[size]; | ||
27 | * in the structure. | ||
28 | * | ||
29 | * * for structures within structures, the format of the internal | ||
30 | * structure is layed out. This allows the internal structure | ||
31 | * to be deciphered for the format file. Although these macros | ||
32 | * may become out of sync with the internal structure, they | ||
33 | * will create a compile error if it happens. Since the | ||
34 | * internel structures are just tracing helpers, this is not | ||
35 | * an issue. | ||
36 | * | ||
37 | * When an internal structure is used, it should use: | ||
38 | * | ||
39 | * __field_struct( type, item ) | ||
40 | * | ||
41 | * instead of __field. This will prevent it from being shown in | ||
42 | * the output file. The fields in the structure should use. | ||
43 | * | ||
44 | * __field_desc( type, container, item ) | ||
45 | * __array_desc( type, container, item, len ) | ||
46 | * | ||
47 | * type, item and len are the same as __field and __array, but | ||
48 | * container is added. This is the name of the item in | ||
49 | * __field_struct that this is describing. | ||
50 | * | ||
51 | * | ||
52 | * @print: the print format shown to users in the format file. | ||
53 | */ | ||
54 | |||
55 | /* | ||
56 | * Function trace entry - function address and parent function addres: | ||
57 | */ | ||
58 | FTRACE_ENTRY(function, ftrace_entry, | ||
59 | |||
60 | TRACE_FN, | ||
61 | |||
62 | F_STRUCT( | ||
63 | __field( unsigned long, ip ) | ||
64 | __field( unsigned long, parent_ip ) | ||
65 | ), | ||
66 | |||
67 | F_printk(" %lx <-- %lx", __entry->ip, __entry->parent_ip) | ||
68 | ); | ||
69 | |||
70 | /* Function call entry */ | ||
71 | FTRACE_ENTRY(funcgraph_entry, ftrace_graph_ent_entry, | ||
72 | |||
73 | TRACE_GRAPH_ENT, | ||
74 | |||
75 | F_STRUCT( | ||
76 | __field_struct( struct ftrace_graph_ent, graph_ent ) | ||
77 | __field_desc( unsigned long, graph_ent, func ) | ||
78 | __field_desc( int, graph_ent, depth ) | ||
79 | ), | ||
80 | |||
81 | F_printk("--> %lx (%d)", __entry->func, __entry->depth) | ||
82 | ); | ||
83 | |||
84 | /* Function return entry */ | ||
85 | FTRACE_ENTRY(funcgraph_exit, ftrace_graph_ret_entry, | ||
86 | |||
87 | TRACE_GRAPH_RET, | ||
88 | |||
89 | F_STRUCT( | ||
90 | __field_struct( struct ftrace_graph_ret, ret ) | ||
91 | __field_desc( unsigned long, ret, func ) | ||
92 | __field_desc( unsigned long long, ret, calltime) | ||
93 | __field_desc( unsigned long long, ret, rettime ) | ||
94 | __field_desc( unsigned long, ret, overrun ) | ||
95 | __field_desc( int, ret, depth ) | ||
96 | ), | ||
97 | |||
98 | F_printk("<-- %lx (%d) (start: %llx end: %llx) over: %d", | ||
99 | __entry->func, __entry->depth, | ||
100 | __entry->calltime, __entry->rettime, | ||
101 | __entry->depth) | ||
102 | ); | ||
103 | |||
104 | /* | ||
105 | * Context switch trace entry - which task (and prio) we switched from/to: | ||
106 | * | ||
107 | * This is used for both wakeup and context switches. We only want | ||
108 | * to create one structure, but we need two outputs for it. | ||
109 | */ | ||
110 | #define FTRACE_CTX_FIELDS \ | ||
111 | __field( unsigned int, prev_pid ) \ | ||
112 | __field( unsigned char, prev_prio ) \ | ||
113 | __field( unsigned char, prev_state ) \ | ||
114 | __field( unsigned int, next_pid ) \ | ||
115 | __field( unsigned char, next_prio ) \ | ||
116 | __field( unsigned char, next_state ) \ | ||
117 | __field( unsigned int, next_cpu ) | ||
118 | |||
119 | FTRACE_ENTRY(context_switch, ctx_switch_entry, | ||
120 | |||
121 | TRACE_CTX, | ||
122 | |||
123 | F_STRUCT( | ||
124 | FTRACE_CTX_FIELDS | ||
125 | ), | ||
126 | |||
127 | F_printk("%u:%u:%u ==> %u:%u:%u [%03u]", | ||
128 | __entry->prev_pid, __entry->prev_prio, __entry->prev_state, | ||
129 | __entry->next_pid, __entry->next_prio, __entry->next_state, | ||
130 | __entry->next_cpu | ||
131 | ) | ||
132 | ); | ||
133 | |||
134 | /* | ||
135 | * FTRACE_ENTRY_DUP only creates the format file, it will not | ||
136 | * create another structure. | ||
137 | */ | ||
138 | FTRACE_ENTRY_DUP(wakeup, ctx_switch_entry, | ||
139 | |||
140 | TRACE_WAKE, | ||
141 | |||
142 | F_STRUCT( | ||
143 | FTRACE_CTX_FIELDS | ||
144 | ), | ||
145 | |||
146 | F_printk("%u:%u:%u ==+ %u:%u:%u [%03u]", | ||
147 | __entry->prev_pid, __entry->prev_prio, __entry->prev_state, | ||
148 | __entry->next_pid, __entry->next_prio, __entry->next_state, | ||
149 | __entry->next_cpu | ||
150 | ) | ||
151 | ); | ||
152 | |||
153 | /* | ||
154 | * Special (free-form) trace entry: | ||
155 | */ | ||
156 | FTRACE_ENTRY(special, special_entry, | ||
157 | |||
158 | TRACE_SPECIAL, | ||
159 | |||
160 | F_STRUCT( | ||
161 | __field( unsigned long, arg1 ) | ||
162 | __field( unsigned long, arg2 ) | ||
163 | __field( unsigned long, arg3 ) | ||
164 | ), | ||
165 | |||
166 | F_printk("(%08lx) (%08lx) (%08lx)", | ||
167 | __entry->arg1, __entry->arg2, __entry->arg3) | ||
168 | ); | ||
169 | |||
170 | /* | ||
171 | * Stack-trace entry: | ||
172 | */ | ||
173 | |||
174 | #define FTRACE_STACK_ENTRIES 8 | ||
175 | |||
176 | FTRACE_ENTRY(kernel_stack, stack_entry, | ||
177 | |||
178 | TRACE_STACK, | ||
179 | |||
180 | F_STRUCT( | ||
181 | __array( unsigned long, caller, FTRACE_STACK_ENTRIES ) | ||
182 | ), | ||
183 | |||
184 | F_printk("\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n" | ||
185 | "\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n", | ||
186 | __entry->caller[0], __entry->caller[1], __entry->caller[2], | ||
187 | __entry->caller[3], __entry->caller[4], __entry->caller[5], | ||
188 | __entry->caller[6], __entry->caller[7]) | ||
189 | ); | ||
190 | |||
191 | FTRACE_ENTRY(user_stack, userstack_entry, | ||
192 | |||
193 | TRACE_USER_STACK, | ||
194 | |||
195 | F_STRUCT( | ||
196 | __field( unsigned int, tgid ) | ||
197 | __array( unsigned long, caller, FTRACE_STACK_ENTRIES ) | ||
198 | ), | ||
199 | |||
200 | F_printk("\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n" | ||
201 | "\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n", | ||
202 | __entry->caller[0], __entry->caller[1], __entry->caller[2], | ||
203 | __entry->caller[3], __entry->caller[4], __entry->caller[5], | ||
204 | __entry->caller[6], __entry->caller[7]) | ||
205 | ); | ||
206 | |||
207 | /* | ||
208 | * trace_printk entry: | ||
209 | */ | ||
210 | FTRACE_ENTRY(bprint, bprint_entry, | ||
211 | |||
212 | TRACE_BPRINT, | ||
213 | |||
214 | F_STRUCT( | ||
215 | __field( unsigned long, ip ) | ||
216 | __field( const char *, fmt ) | ||
217 | __dynamic_array( u32, buf ) | ||
218 | ), | ||
219 | |||
220 | F_printk("%08lx fmt:%p", | ||
221 | __entry->ip, __entry->fmt) | ||
222 | ); | ||
223 | |||
224 | FTRACE_ENTRY(print, print_entry, | ||
225 | |||
226 | TRACE_PRINT, | ||
227 | |||
228 | F_STRUCT( | ||
229 | __field( unsigned long, ip ) | ||
230 | __dynamic_array( char, buf ) | ||
231 | ), | ||
232 | |||
233 | F_printk("%08lx %s", | ||
234 | __entry->ip, __entry->buf) | ||
235 | ); | ||
236 | |||
237 | FTRACE_ENTRY(mmiotrace_rw, trace_mmiotrace_rw, | ||
238 | |||
239 | TRACE_MMIO_RW, | ||
240 | |||
241 | F_STRUCT( | ||
242 | __field_struct( struct mmiotrace_rw, rw ) | ||
243 | __field_desc( resource_size_t, rw, phys ) | ||
244 | __field_desc( unsigned long, rw, value ) | ||
245 | __field_desc( unsigned long, rw, pc ) | ||
246 | __field_desc( int, rw, map_id ) | ||
247 | __field_desc( unsigned char, rw, opcode ) | ||
248 | __field_desc( unsigned char, rw, width ) | ||
249 | ), | ||
250 | |||
251 | F_printk("%lx %lx %lx %d %x %x", | ||
252 | (unsigned long)__entry->phys, __entry->value, __entry->pc, | ||
253 | __entry->map_id, __entry->opcode, __entry->width) | ||
254 | ); | ||
255 | |||
256 | FTRACE_ENTRY(mmiotrace_map, trace_mmiotrace_map, | ||
257 | |||
258 | TRACE_MMIO_MAP, | ||
259 | |||
260 | F_STRUCT( | ||
261 | __field_struct( struct mmiotrace_map, map ) | ||
262 | __field_desc( resource_size_t, map, phys ) | ||
263 | __field_desc( unsigned long, map, virt ) | ||
264 | __field_desc( unsigned long, map, len ) | ||
265 | __field_desc( int, map, map_id ) | ||
266 | __field_desc( unsigned char, map, opcode ) | ||
267 | ), | ||
268 | |||
269 | F_printk("%lx %lx %lx %d %x", | ||
270 | (unsigned long)__entry->phys, __entry->virt, __entry->len, | ||
271 | __entry->map_id, __entry->opcode) | ||
272 | ); | ||
273 | |||
274 | FTRACE_ENTRY(boot_call, trace_boot_call, | ||
275 | |||
276 | TRACE_BOOT_CALL, | ||
277 | |||
278 | F_STRUCT( | ||
279 | __field_struct( struct boot_trace_call, boot_call ) | ||
280 | __field_desc( pid_t, boot_call, caller ) | ||
281 | __array_desc( char, boot_call, func, KSYM_SYMBOL_LEN) | ||
282 | ), | ||
283 | |||
284 | F_printk("%d %s", __entry->caller, __entry->func) | ||
285 | ); | ||
286 | |||
287 | FTRACE_ENTRY(boot_ret, trace_boot_ret, | ||
288 | |||
289 | TRACE_BOOT_RET, | ||
290 | |||
291 | F_STRUCT( | ||
292 | __field_struct( struct boot_trace_ret, boot_ret ) | ||
293 | __array_desc( char, boot_ret, func, KSYM_SYMBOL_LEN) | ||
294 | __field_desc( int, boot_ret, result ) | ||
295 | __field_desc( unsigned long, boot_ret, duration ) | ||
296 | ), | ||
297 | |||
298 | F_printk("%s %d %lx", | ||
299 | __entry->func, __entry->result, __entry->duration) | ||
300 | ); | ||
301 | |||
302 | #define TRACE_FUNC_SIZE 30 | ||
303 | #define TRACE_FILE_SIZE 20 | ||
304 | |||
305 | FTRACE_ENTRY(branch, trace_branch, | ||
306 | |||
307 | TRACE_BRANCH, | ||
308 | |||
309 | F_STRUCT( | ||
310 | __field( unsigned int, line ) | ||
311 | __array( char, func, TRACE_FUNC_SIZE+1 ) | ||
312 | __array( char, file, TRACE_FILE_SIZE+1 ) | ||
313 | __field( char, correct ) | ||
314 | ), | ||
315 | |||
316 | F_printk("%u:%s:%s (%u)", | ||
317 | __entry->line, | ||
318 | __entry->func, __entry->file, __entry->correct) | ||
319 | ); | ||
320 | |||
321 | FTRACE_ENTRY(hw_branch, hw_branch_entry, | ||
322 | |||
323 | TRACE_HW_BRANCHES, | ||
324 | |||
325 | F_STRUCT( | ||
326 | __field( u64, from ) | ||
327 | __field( u64, to ) | ||
328 | ), | ||
329 | |||
330 | F_printk("from: %llx to: %llx", __entry->from, __entry->to) | ||
331 | ); | ||
332 | |||
333 | FTRACE_ENTRY(power, trace_power, | ||
334 | |||
335 | TRACE_POWER, | ||
336 | |||
337 | F_STRUCT( | ||
338 | __field_struct( struct power_trace, state_data ) | ||
339 | __field_desc( s64, state_data, stamp ) | ||
340 | __field_desc( s64, state_data, end ) | ||
341 | __field_desc( int, state_data, type ) | ||
342 | __field_desc( int, state_data, state ) | ||
343 | ), | ||
344 | |||
345 | F_printk("%llx->%llx type:%u state:%u", | ||
346 | __entry->stamp, __entry->end, | ||
347 | __entry->type, __entry->state) | ||
348 | ); | ||
349 | |||
350 | FTRACE_ENTRY(kmem_alloc, kmemtrace_alloc_entry, | ||
351 | |||
352 | TRACE_KMEM_ALLOC, | ||
353 | |||
354 | F_STRUCT( | ||
355 | __field( enum kmemtrace_type_id, type_id ) | ||
356 | __field( unsigned long, call_site ) | ||
357 | __field( const void *, ptr ) | ||
358 | __field( size_t, bytes_req ) | ||
359 | __field( size_t, bytes_alloc ) | ||
360 | __field( gfp_t, gfp_flags ) | ||
361 | __field( int, node ) | ||
362 | ), | ||
363 | |||
364 | F_printk("type:%u call_site:%lx ptr:%p req:%zi alloc:%zi" | ||
365 | " flags:%x node:%d", | ||
366 | __entry->type_id, __entry->call_site, __entry->ptr, | ||
367 | __entry->bytes_req, __entry->bytes_alloc, | ||
368 | __entry->gfp_flags, __entry->node) | ||
369 | ); | ||
370 | |||
371 | FTRACE_ENTRY(kmem_free, kmemtrace_free_entry, | ||
372 | |||
373 | TRACE_KMEM_FREE, | ||
374 | |||
375 | F_STRUCT( | ||
376 | __field( enum kmemtrace_type_id, type_id ) | ||
377 | __field( unsigned long, call_site ) | ||
378 | __field( const void *, ptr ) | ||
379 | ), | ||
380 | |||
381 | F_printk("type:%u call_site:%lx ptr:%p", | ||
382 | __entry->type_id, __entry->call_site, __entry->ptr) | ||
383 | ); | ||
diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c index 11ba5bb4ed0a..55a25c933d15 100644 --- a/kernel/trace/trace_event_profile.c +++ b/kernel/trace/trace_event_profile.c | |||
@@ -5,6 +5,7 @@ | |||
5 | * | 5 | * |
6 | */ | 6 | */ |
7 | 7 | ||
8 | #include <linux/module.h> | ||
8 | #include "trace.h" | 9 | #include "trace.h" |
9 | 10 | ||
10 | int ftrace_profile_enable(int event_id) | 11 | int ftrace_profile_enable(int event_id) |
@@ -14,7 +15,8 @@ int ftrace_profile_enable(int event_id) | |||
14 | 15 | ||
15 | mutex_lock(&event_mutex); | 16 | mutex_lock(&event_mutex); |
16 | list_for_each_entry(event, &ftrace_events, list) { | 17 | list_for_each_entry(event, &ftrace_events, list) { |
17 | if (event->id == event_id && event->profile_enable) { | 18 | if (event->id == event_id && event->profile_enable && |
19 | try_module_get(event->mod)) { | ||
18 | ret = event->profile_enable(event); | 20 | ret = event->profile_enable(event); |
19 | break; | 21 | break; |
20 | } | 22 | } |
@@ -32,6 +34,7 @@ void ftrace_profile_disable(int event_id) | |||
32 | list_for_each_entry(event, &ftrace_events, list) { | 34 | list_for_each_entry(event, &ftrace_events, list) { |
33 | if (event->id == event_id) { | 35 | if (event->id == event_id) { |
34 | event->profile_disable(event); | 36 | event->profile_disable(event); |
37 | module_put(event->mod); | ||
35 | break; | 38 | break; |
36 | } | 39 | } |
37 | } | 40 | } |
diff --git a/kernel/trace/trace_event_types.h b/kernel/trace/trace_event_types.h deleted file mode 100644 index 6db005e12487..000000000000 --- a/kernel/trace/trace_event_types.h +++ /dev/null | |||
@@ -1,178 +0,0 @@ | |||
1 | #undef TRACE_SYSTEM | ||
2 | #define TRACE_SYSTEM ftrace | ||
3 | |||
4 | /* | ||
5 | * We cheat and use the proto type field as the ID | ||
6 | * and args as the entry type (minus 'struct') | ||
7 | */ | ||
8 | TRACE_EVENT_FORMAT(function, TRACE_FN, ftrace_entry, ignore, | ||
9 | TRACE_STRUCT( | ||
10 | TRACE_FIELD(unsigned long, ip, ip) | ||
11 | TRACE_FIELD(unsigned long, parent_ip, parent_ip) | ||
12 | ), | ||
13 | TP_RAW_FMT(" %lx <-- %lx") | ||
14 | ); | ||
15 | |||
16 | TRACE_EVENT_FORMAT(funcgraph_entry, TRACE_GRAPH_ENT, | ||
17 | ftrace_graph_ent_entry, ignore, | ||
18 | TRACE_STRUCT( | ||
19 | TRACE_FIELD(unsigned long, graph_ent.func, func) | ||
20 | TRACE_FIELD(int, graph_ent.depth, depth) | ||
21 | ), | ||
22 | TP_RAW_FMT("--> %lx (%d)") | ||
23 | ); | ||
24 | |||
25 | TRACE_EVENT_FORMAT(funcgraph_exit, TRACE_GRAPH_RET, | ||
26 | ftrace_graph_ret_entry, ignore, | ||
27 | TRACE_STRUCT( | ||
28 | TRACE_FIELD(unsigned long, ret.func, func) | ||
29 | TRACE_FIELD(unsigned long long, ret.calltime, calltime) | ||
30 | TRACE_FIELD(unsigned long long, ret.rettime, rettime) | ||
31 | TRACE_FIELD(unsigned long, ret.overrun, overrun) | ||
32 | TRACE_FIELD(int, ret.depth, depth) | ||
33 | ), | ||
34 | TP_RAW_FMT("<-- %lx (%d)") | ||
35 | ); | ||
36 | |||
37 | TRACE_EVENT_FORMAT(wakeup, TRACE_WAKE, ctx_switch_entry, ignore, | ||
38 | TRACE_STRUCT( | ||
39 | TRACE_FIELD(unsigned int, prev_pid, prev_pid) | ||
40 | TRACE_FIELD(unsigned char, prev_prio, prev_prio) | ||
41 | TRACE_FIELD(unsigned char, prev_state, prev_state) | ||
42 | TRACE_FIELD(unsigned int, next_pid, next_pid) | ||
43 | TRACE_FIELD(unsigned char, next_prio, next_prio) | ||
44 | TRACE_FIELD(unsigned char, next_state, next_state) | ||
45 | TRACE_FIELD(unsigned int, next_cpu, next_cpu) | ||
46 | ), | ||
47 | TP_RAW_FMT("%u:%u:%u ==+ %u:%u:%u [%03u]") | ||
48 | ); | ||
49 | |||
50 | TRACE_EVENT_FORMAT(context_switch, TRACE_CTX, ctx_switch_entry, ignore, | ||
51 | TRACE_STRUCT( | ||
52 | TRACE_FIELD(unsigned int, prev_pid, prev_pid) | ||
53 | TRACE_FIELD(unsigned char, prev_prio, prev_prio) | ||
54 | TRACE_FIELD(unsigned char, prev_state, prev_state) | ||
55 | TRACE_FIELD(unsigned int, next_pid, next_pid) | ||
56 | TRACE_FIELD(unsigned char, next_prio, next_prio) | ||
57 | TRACE_FIELD(unsigned char, next_state, next_state) | ||
58 | TRACE_FIELD(unsigned int, next_cpu, next_cpu) | ||
59 | ), | ||
60 | TP_RAW_FMT("%u:%u:%u ==+ %u:%u:%u [%03u]") | ||
61 | ); | ||
62 | |||
63 | TRACE_EVENT_FORMAT_NOFILTER(special, TRACE_SPECIAL, special_entry, ignore, | ||
64 | TRACE_STRUCT( | ||
65 | TRACE_FIELD(unsigned long, arg1, arg1) | ||
66 | TRACE_FIELD(unsigned long, arg2, arg2) | ||
67 | TRACE_FIELD(unsigned long, arg3, arg3) | ||
68 | ), | ||
69 | TP_RAW_FMT("(%08lx) (%08lx) (%08lx)") | ||
70 | ); | ||
71 | |||
72 | /* | ||
73 | * Stack-trace entry: | ||
74 | */ | ||
75 | |||
76 | /* #define FTRACE_STACK_ENTRIES 8 */ | ||
77 | |||
78 | TRACE_EVENT_FORMAT(kernel_stack, TRACE_STACK, stack_entry, ignore, | ||
79 | TRACE_STRUCT( | ||
80 | TRACE_FIELD(unsigned long, caller[0], stack0) | ||
81 | TRACE_FIELD(unsigned long, caller[1], stack1) | ||
82 | TRACE_FIELD(unsigned long, caller[2], stack2) | ||
83 | TRACE_FIELD(unsigned long, caller[3], stack3) | ||
84 | TRACE_FIELD(unsigned long, caller[4], stack4) | ||
85 | TRACE_FIELD(unsigned long, caller[5], stack5) | ||
86 | TRACE_FIELD(unsigned long, caller[6], stack6) | ||
87 | TRACE_FIELD(unsigned long, caller[7], stack7) | ||
88 | ), | ||
89 | TP_RAW_FMT("\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n" | ||
90 | "\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n") | ||
91 | ); | ||
92 | |||
93 | TRACE_EVENT_FORMAT(user_stack, TRACE_USER_STACK, userstack_entry, ignore, | ||
94 | TRACE_STRUCT( | ||
95 | TRACE_FIELD(unsigned long, caller[0], stack0) | ||
96 | TRACE_FIELD(unsigned long, caller[1], stack1) | ||
97 | TRACE_FIELD(unsigned long, caller[2], stack2) | ||
98 | TRACE_FIELD(unsigned long, caller[3], stack3) | ||
99 | TRACE_FIELD(unsigned long, caller[4], stack4) | ||
100 | TRACE_FIELD(unsigned long, caller[5], stack5) | ||
101 | TRACE_FIELD(unsigned long, caller[6], stack6) | ||
102 | TRACE_FIELD(unsigned long, caller[7], stack7) | ||
103 | ), | ||
104 | TP_RAW_FMT("\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n" | ||
105 | "\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n") | ||
106 | ); | ||
107 | |||
108 | TRACE_EVENT_FORMAT(bprint, TRACE_BPRINT, bprint_entry, ignore, | ||
109 | TRACE_STRUCT( | ||
110 | TRACE_FIELD(unsigned long, ip, ip) | ||
111 | TRACE_FIELD(char *, fmt, fmt) | ||
112 | TRACE_FIELD_ZERO_CHAR(buf) | ||
113 | ), | ||
114 | TP_RAW_FMT("%08lx (%d) fmt:%p %s") | ||
115 | ); | ||
116 | |||
117 | TRACE_EVENT_FORMAT(print, TRACE_PRINT, print_entry, ignore, | ||
118 | TRACE_STRUCT( | ||
119 | TRACE_FIELD(unsigned long, ip, ip) | ||
120 | TRACE_FIELD_ZERO_CHAR(buf) | ||
121 | ), | ||
122 | TP_RAW_FMT("%08lx (%d) fmt:%p %s") | ||
123 | ); | ||
124 | |||
125 | TRACE_EVENT_FORMAT(branch, TRACE_BRANCH, trace_branch, ignore, | ||
126 | TRACE_STRUCT( | ||
127 | TRACE_FIELD(unsigned int, line, line) | ||
128 | TRACE_FIELD_SPECIAL(char func[TRACE_FUNC_SIZE+1], func, | ||
129 | TRACE_FUNC_SIZE+1, func) | ||
130 | TRACE_FIELD_SPECIAL(char file[TRACE_FUNC_SIZE+1], file, | ||
131 | TRACE_FUNC_SIZE+1, file) | ||
132 | TRACE_FIELD(char, correct, correct) | ||
133 | ), | ||
134 | TP_RAW_FMT("%u:%s:%s (%u)") | ||
135 | ); | ||
136 | |||
137 | TRACE_EVENT_FORMAT(hw_branch, TRACE_HW_BRANCHES, hw_branch_entry, ignore, | ||
138 | TRACE_STRUCT( | ||
139 | TRACE_FIELD(u64, from, from) | ||
140 | TRACE_FIELD(u64, to, to) | ||
141 | ), | ||
142 | TP_RAW_FMT("from: %llx to: %llx") | ||
143 | ); | ||
144 | |||
145 | TRACE_EVENT_FORMAT(power, TRACE_POWER, trace_power, ignore, | ||
146 | TRACE_STRUCT( | ||
147 | TRACE_FIELD_SIGN(ktime_t, state_data.stamp, stamp, 1) | ||
148 | TRACE_FIELD_SIGN(ktime_t, state_data.end, end, 1) | ||
149 | TRACE_FIELD(int, state_data.type, type) | ||
150 | TRACE_FIELD(int, state_data.state, state) | ||
151 | ), | ||
152 | TP_RAW_FMT("%llx->%llx type:%u state:%u") | ||
153 | ); | ||
154 | |||
155 | TRACE_EVENT_FORMAT(kmem_alloc, TRACE_KMEM_ALLOC, kmemtrace_alloc_entry, ignore, | ||
156 | TRACE_STRUCT( | ||
157 | TRACE_FIELD(enum kmemtrace_type_id, type_id, type_id) | ||
158 | TRACE_FIELD(unsigned long, call_site, call_site) | ||
159 | TRACE_FIELD(const void *, ptr, ptr) | ||
160 | TRACE_FIELD(size_t, bytes_req, bytes_req) | ||
161 | TRACE_FIELD(size_t, bytes_alloc, bytes_alloc) | ||
162 | TRACE_FIELD(gfp_t, gfp_flags, gfp_flags) | ||
163 | TRACE_FIELD(int, node, node) | ||
164 | ), | ||
165 | TP_RAW_FMT("type:%u call_site:%lx ptr:%p req:%lu alloc:%lu" | ||
166 | " flags:%x node:%d") | ||
167 | ); | ||
168 | |||
169 | TRACE_EVENT_FORMAT(kmem_free, TRACE_KMEM_FREE, kmemtrace_free_entry, ignore, | ||
170 | TRACE_STRUCT( | ||
171 | TRACE_FIELD(enum kmemtrace_type_id, type_id, type_id) | ||
172 | TRACE_FIELD(unsigned long, call_site, call_site) | ||
173 | TRACE_FIELD(const void *, ptr, ptr) | ||
174 | ), | ||
175 | TP_RAW_FMT("type:%u call_site:%lx ptr:%p") | ||
176 | ); | ||
177 | |||
178 | #undef TRACE_SYSTEM | ||
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 78b1ed230177..56c260b83a9c 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c | |||
@@ -21,6 +21,7 @@ | |||
21 | 21 | ||
22 | #include "trace_output.h" | 22 | #include "trace_output.h" |
23 | 23 | ||
24 | #undef TRACE_SYSTEM | ||
24 | #define TRACE_SYSTEM "TRACE_SYSTEM" | 25 | #define TRACE_SYSTEM "TRACE_SYSTEM" |
25 | 26 | ||
26 | DEFINE_MUTEX(event_mutex); | 27 | DEFINE_MUTEX(event_mutex); |
@@ -86,7 +87,7 @@ int trace_define_common_fields(struct ftrace_event_call *call) | |||
86 | __common_field(unsigned char, flags); | 87 | __common_field(unsigned char, flags); |
87 | __common_field(unsigned char, preempt_count); | 88 | __common_field(unsigned char, preempt_count); |
88 | __common_field(int, pid); | 89 | __common_field(int, pid); |
89 | __common_field(int, tgid); | 90 | __common_field(int, lock_depth); |
90 | 91 | ||
91 | return ret; | 92 | return ret; |
92 | } | 93 | } |
@@ -230,11 +231,9 @@ static ssize_t | |||
230 | ftrace_event_write(struct file *file, const char __user *ubuf, | 231 | ftrace_event_write(struct file *file, const char __user *ubuf, |
231 | size_t cnt, loff_t *ppos) | 232 | size_t cnt, loff_t *ppos) |
232 | { | 233 | { |
234 | struct trace_parser parser; | ||
233 | size_t read = 0; | 235 | size_t read = 0; |
234 | int i, set = 1; | ||
235 | ssize_t ret; | 236 | ssize_t ret; |
236 | char *buf; | ||
237 | char ch; | ||
238 | 237 | ||
239 | if (!cnt || cnt < 0) | 238 | if (!cnt || cnt < 0) |
240 | return 0; | 239 | return 0; |
@@ -243,60 +242,28 @@ ftrace_event_write(struct file *file, const char __user *ubuf, | |||
243 | if (ret < 0) | 242 | if (ret < 0) |
244 | return ret; | 243 | return ret; |
245 | 244 | ||
246 | ret = get_user(ch, ubuf++); | 245 | if (trace_parser_get_init(&parser, EVENT_BUF_SIZE + 1)) |
247 | if (ret) | ||
248 | return ret; | ||
249 | read++; | ||
250 | cnt--; | ||
251 | |||
252 | /* skip white space */ | ||
253 | while (cnt && isspace(ch)) { | ||
254 | ret = get_user(ch, ubuf++); | ||
255 | if (ret) | ||
256 | return ret; | ||
257 | read++; | ||
258 | cnt--; | ||
259 | } | ||
260 | |||
261 | /* Only white space found? */ | ||
262 | if (isspace(ch)) { | ||
263 | file->f_pos += read; | ||
264 | ret = read; | ||
265 | return ret; | ||
266 | } | ||
267 | |||
268 | buf = kmalloc(EVENT_BUF_SIZE+1, GFP_KERNEL); | ||
269 | if (!buf) | ||
270 | return -ENOMEM; | 246 | return -ENOMEM; |
271 | 247 | ||
272 | if (cnt > EVENT_BUF_SIZE) | 248 | read = trace_get_user(&parser, ubuf, cnt, ppos); |
273 | cnt = EVENT_BUF_SIZE; | 249 | |
250 | if (trace_parser_loaded((&parser))) { | ||
251 | int set = 1; | ||
274 | 252 | ||
275 | i = 0; | 253 | if (*parser.buffer == '!') |
276 | while (cnt && !isspace(ch)) { | ||
277 | if (!i && ch == '!') | ||
278 | set = 0; | 254 | set = 0; |
279 | else | ||
280 | buf[i++] = ch; | ||
281 | 255 | ||
282 | ret = get_user(ch, ubuf++); | 256 | parser.buffer[parser.idx] = 0; |
257 | |||
258 | ret = ftrace_set_clr_event(parser.buffer + !set, set); | ||
283 | if (ret) | 259 | if (ret) |
284 | goto out_free; | 260 | goto out_put; |
285 | read++; | ||
286 | cnt--; | ||
287 | } | 261 | } |
288 | buf[i] = 0; | ||
289 | |||
290 | file->f_pos += read; | ||
291 | |||
292 | ret = ftrace_set_clr_event(buf, set); | ||
293 | if (ret) | ||
294 | goto out_free; | ||
295 | 262 | ||
296 | ret = read; | 263 | ret = read; |
297 | 264 | ||
298 | out_free: | 265 | out_put: |
299 | kfree(buf); | 266 | trace_parser_put(&parser); |
300 | 267 | ||
301 | return ret; | 268 | return ret; |
302 | } | 269 | } |
@@ -578,7 +545,7 @@ static int trace_write_header(struct trace_seq *s) | |||
578 | FIELD(unsigned char, flags), | 545 | FIELD(unsigned char, flags), |
579 | FIELD(unsigned char, preempt_count), | 546 | FIELD(unsigned char, preempt_count), |
580 | FIELD(int, pid), | 547 | FIELD(int, pid), |
581 | FIELD(int, tgid)); | 548 | FIELD(int, lock_depth)); |
582 | } | 549 | } |
583 | 550 | ||
584 | static ssize_t | 551 | static ssize_t |
@@ -1187,7 +1154,7 @@ static int trace_module_notify(struct notifier_block *self, | |||
1187 | } | 1154 | } |
1188 | #endif /* CONFIG_MODULES */ | 1155 | #endif /* CONFIG_MODULES */ |
1189 | 1156 | ||
1190 | struct notifier_block trace_module_nb = { | 1157 | static struct notifier_block trace_module_nb = { |
1191 | .notifier_call = trace_module_notify, | 1158 | .notifier_call = trace_module_notify, |
1192 | .priority = 0, | 1159 | .priority = 0, |
1193 | }; | 1160 | }; |
@@ -1359,6 +1326,18 @@ static __init void event_trace_self_tests(void) | |||
1359 | if (!call->regfunc) | 1326 | if (!call->regfunc) |
1360 | continue; | 1327 | continue; |
1361 | 1328 | ||
1329 | /* | ||
1330 | * Testing syscall events here is pretty useless, but | ||
1331 | * we still do it if configured. But this is time consuming. | ||
1332 | * What we really need is a user thread to perform the | ||
1333 | * syscalls as we test. | ||
1334 | */ | ||
1335 | #ifndef CONFIG_EVENT_TRACE_TEST_SYSCALLS | ||
1336 | if (call->system && | ||
1337 | strcmp(call->system, "syscalls") == 0) | ||
1338 | continue; | ||
1339 | #endif | ||
1340 | |||
1362 | pr_info("Testing event %s: ", call->name); | 1341 | pr_info("Testing event %s: ", call->name); |
1363 | 1342 | ||
1364 | /* | 1343 | /* |
@@ -1432,7 +1411,7 @@ static __init void event_trace_self_tests(void) | |||
1432 | 1411 | ||
1433 | #ifdef CONFIG_FUNCTION_TRACER | 1412 | #ifdef CONFIG_FUNCTION_TRACER |
1434 | 1413 | ||
1435 | static DEFINE_PER_CPU(atomic_t, test_event_disable); | 1414 | static DEFINE_PER_CPU(atomic_t, ftrace_test_event_disable); |
1436 | 1415 | ||
1437 | static void | 1416 | static void |
1438 | function_test_events_call(unsigned long ip, unsigned long parent_ip) | 1417 | function_test_events_call(unsigned long ip, unsigned long parent_ip) |
@@ -1449,7 +1428,7 @@ function_test_events_call(unsigned long ip, unsigned long parent_ip) | |||
1449 | pc = preempt_count(); | 1428 | pc = preempt_count(); |
1450 | resched = ftrace_preempt_disable(); | 1429 | resched = ftrace_preempt_disable(); |
1451 | cpu = raw_smp_processor_id(); | 1430 | cpu = raw_smp_processor_id(); |
1452 | disabled = atomic_inc_return(&per_cpu(test_event_disable, cpu)); | 1431 | disabled = atomic_inc_return(&per_cpu(ftrace_test_event_disable, cpu)); |
1453 | 1432 | ||
1454 | if (disabled != 1) | 1433 | if (disabled != 1) |
1455 | goto out; | 1434 | goto out; |
@@ -1468,7 +1447,7 @@ function_test_events_call(unsigned long ip, unsigned long parent_ip) | |||
1468 | trace_nowake_buffer_unlock_commit(buffer, event, flags, pc); | 1447 | trace_nowake_buffer_unlock_commit(buffer, event, flags, pc); |
1469 | 1448 | ||
1470 | out: | 1449 | out: |
1471 | atomic_dec(&per_cpu(test_event_disable, cpu)); | 1450 | atomic_dec(&per_cpu(ftrace_test_event_disable, cpu)); |
1472 | ftrace_preempt_enable(resched); | 1451 | ftrace_preempt_enable(resched); |
1473 | } | 1452 | } |
1474 | 1453 | ||
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 93660fbbf629..23245785927f 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c | |||
@@ -121,6 +121,47 @@ struct filter_parse_state { | |||
121 | } operand; | 121 | } operand; |
122 | }; | 122 | }; |
123 | 123 | ||
124 | #define DEFINE_COMPARISON_PRED(type) \ | ||
125 | static int filter_pred_##type(struct filter_pred *pred, void *event, \ | ||
126 | int val1, int val2) \ | ||
127 | { \ | ||
128 | type *addr = (type *)(event + pred->offset); \ | ||
129 | type val = (type)pred->val; \ | ||
130 | int match = 0; \ | ||
131 | \ | ||
132 | switch (pred->op) { \ | ||
133 | case OP_LT: \ | ||
134 | match = (*addr < val); \ | ||
135 | break; \ | ||
136 | case OP_LE: \ | ||
137 | match = (*addr <= val); \ | ||
138 | break; \ | ||
139 | case OP_GT: \ | ||
140 | match = (*addr > val); \ | ||
141 | break; \ | ||
142 | case OP_GE: \ | ||
143 | match = (*addr >= val); \ | ||
144 | break; \ | ||
145 | default: \ | ||
146 | break; \ | ||
147 | } \ | ||
148 | \ | ||
149 | return match; \ | ||
150 | } | ||
151 | |||
152 | #define DEFINE_EQUALITY_PRED(size) \ | ||
153 | static int filter_pred_##size(struct filter_pred *pred, void *event, \ | ||
154 | int val1, int val2) \ | ||
155 | { \ | ||
156 | u##size *addr = (u##size *)(event + pred->offset); \ | ||
157 | u##size val = (u##size)pred->val; \ | ||
158 | int match; \ | ||
159 | \ | ||
160 | match = (val == *addr) ^ pred->not; \ | ||
161 | \ | ||
162 | return match; \ | ||
163 | } | ||
164 | |||
124 | DEFINE_COMPARISON_PRED(s64); | 165 | DEFINE_COMPARISON_PRED(s64); |
125 | DEFINE_COMPARISON_PRED(u64); | 166 | DEFINE_COMPARISON_PRED(u64); |
126 | DEFINE_COMPARISON_PRED(s32); | 167 | DEFINE_COMPARISON_PRED(s32); |
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index df1bf6e48bb9..9753fcc61bc5 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c | |||
@@ -15,146 +15,125 @@ | |||
15 | 15 | ||
16 | #include "trace_output.h" | 16 | #include "trace_output.h" |
17 | 17 | ||
18 | #undef TRACE_SYSTEM | ||
19 | #define TRACE_SYSTEM ftrace | ||
18 | 20 | ||
19 | #undef TRACE_STRUCT | 21 | /* not needed for this file */ |
20 | #define TRACE_STRUCT(args...) args | 22 | #undef __field_struct |
23 | #define __field_struct(type, item) | ||
21 | 24 | ||
22 | extern void __bad_type_size(void); | 25 | #undef __field |
26 | #define __field(type, item) type item; | ||
23 | 27 | ||
24 | #undef TRACE_FIELD | 28 | #undef __field_desc |
25 | #define TRACE_FIELD(type, item, assign) \ | 29 | #define __field_desc(type, container, item) type item; |
26 | if (sizeof(type) != sizeof(field.item)) \ | 30 | |
27 | __bad_type_size(); \ | 31 | #undef __array |
32 | #define __array(type, item, size) type item[size]; | ||
33 | |||
34 | #undef __array_desc | ||
35 | #define __array_desc(type, container, item, size) type item[size]; | ||
36 | |||
37 | #undef __dynamic_array | ||
38 | #define __dynamic_array(type, item) type item[]; | ||
39 | |||
40 | #undef F_STRUCT | ||
41 | #define F_STRUCT(args...) args | ||
42 | |||
43 | #undef F_printk | ||
44 | #define F_printk(fmt, args...) fmt, args | ||
45 | |||
46 | #undef FTRACE_ENTRY | ||
47 | #define FTRACE_ENTRY(name, struct_name, id, tstruct, print) \ | ||
48 | struct ____ftrace_##name { \ | ||
49 | tstruct \ | ||
50 | }; \ | ||
51 | static void __used ____ftrace_check_##name(void) \ | ||
52 | { \ | ||
53 | struct ____ftrace_##name *__entry = NULL; \ | ||
54 | \ | ||
55 | /* force cmpile-time check on F_printk() */ \ | ||
56 | printk(print); \ | ||
57 | } | ||
58 | |||
59 | #undef FTRACE_ENTRY_DUP | ||
60 | #define FTRACE_ENTRY_DUP(name, struct_name, id, tstruct, print) \ | ||
61 | FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print)) | ||
62 | |||
63 | #include "trace_entries.h" | ||
64 | |||
65 | |||
66 | #undef __field | ||
67 | #define __field(type, item) \ | ||
28 | ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \ | 68 | ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \ |
29 | "offset:%u;\tsize:%u;\n", \ | 69 | "offset:%zu;\tsize:%zu;\n", \ |
30 | (unsigned int)offsetof(typeof(field), item), \ | 70 | offsetof(typeof(field), item), \ |
31 | (unsigned int)sizeof(field.item)); \ | 71 | sizeof(field.item)); \ |
32 | if (!ret) \ | 72 | if (!ret) \ |
33 | return 0; | 73 | return 0; |
34 | 74 | ||
75 | #undef __field_desc | ||
76 | #define __field_desc(type, container, item) \ | ||
77 | ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \ | ||
78 | "offset:%zu;\tsize:%zu;\n", \ | ||
79 | offsetof(typeof(field), container.item), \ | ||
80 | sizeof(field.container.item)); \ | ||
81 | if (!ret) \ | ||
82 | return 0; | ||
35 | 83 | ||
36 | #undef TRACE_FIELD_SPECIAL | 84 | #undef __array |
37 | #define TRACE_FIELD_SPECIAL(type_item, item, len, cmd) \ | 85 | #define __array(type, item, len) \ |
38 | ret = trace_seq_printf(s, "\tfield special:" #type_item ";\t" \ | 86 | ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \ |
39 | "offset:%u;\tsize:%u;\n", \ | 87 | "offset:%zu;\tsize:%zu;\n", \ |
40 | (unsigned int)offsetof(typeof(field), item), \ | 88 | offsetof(typeof(field), item), \ |
41 | (unsigned int)sizeof(field.item)); \ | 89 | sizeof(field.item)); \ |
42 | if (!ret) \ | 90 | if (!ret) \ |
43 | return 0; | 91 | return 0; |
44 | 92 | ||
45 | #undef TRACE_FIELD_ZERO_CHAR | 93 | #undef __array_desc |
46 | #define TRACE_FIELD_ZERO_CHAR(item) \ | 94 | #define __array_desc(type, container, item, len) \ |
47 | ret = trace_seq_printf(s, "\tfield:char " #item ";\t" \ | 95 | ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \ |
48 | "offset:%u;\tsize:0;\n", \ | 96 | "offset:%zu;\tsize:%zu;\n", \ |
49 | (unsigned int)offsetof(typeof(field), item)); \ | 97 | offsetof(typeof(field), container.item), \ |
98 | sizeof(field.container.item)); \ | ||
50 | if (!ret) \ | 99 | if (!ret) \ |
51 | return 0; | 100 | return 0; |
52 | 101 | ||
53 | #undef TRACE_FIELD_SIGN | 102 | #undef __dynamic_array |
54 | #define TRACE_FIELD_SIGN(type, item, assign, is_signed) \ | 103 | #define __dynamic_array(type, item) \ |
55 | TRACE_FIELD(type, item, assign) | 104 | ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \ |
105 | "offset:%zu;\tsize:0;\n", \ | ||
106 | offsetof(typeof(field), item)); \ | ||
107 | if (!ret) \ | ||
108 | return 0; | ||
56 | 109 | ||
57 | #undef TP_RAW_FMT | 110 | #undef F_printk |
58 | #define TP_RAW_FMT(args...) args | 111 | #define F_printk(fmt, args...) "%s, %s\n", #fmt, __stringify(args) |
59 | 112 | ||
60 | #undef TRACE_EVENT_FORMAT | 113 | #undef __entry |
61 | #define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \ | 114 | #define __entry REC |
62 | static int \ | ||
63 | ftrace_format_##call(struct ftrace_event_call *unused, \ | ||
64 | struct trace_seq *s) \ | ||
65 | { \ | ||
66 | struct args field; \ | ||
67 | int ret; \ | ||
68 | \ | ||
69 | tstruct; \ | ||
70 | \ | ||
71 | trace_seq_printf(s, "\nprint fmt: \"%s\"\n", tpfmt); \ | ||
72 | \ | ||
73 | return ret; \ | ||
74 | } | ||
75 | 115 | ||
76 | #undef TRACE_EVENT_FORMAT_NOFILTER | 116 | #undef FTRACE_ENTRY |
77 | #define TRACE_EVENT_FORMAT_NOFILTER(call, proto, args, fmt, tstruct, \ | 117 | #define FTRACE_ENTRY(name, struct_name, id, tstruct, print) \ |
78 | tpfmt) \ | ||
79 | static int \ | 118 | static int \ |
80 | ftrace_format_##call(struct ftrace_event_call *unused, \ | 119 | ftrace_format_##name(struct ftrace_event_call *unused, \ |
81 | struct trace_seq *s) \ | 120 | struct trace_seq *s) \ |
82 | { \ | 121 | { \ |
83 | struct args field; \ | 122 | struct struct_name field __attribute__((unused)); \ |
84 | int ret; \ | 123 | int ret = 0; \ |
85 | \ | 124 | \ |
86 | tstruct; \ | 125 | tstruct; \ |
87 | \ | 126 | \ |
88 | trace_seq_printf(s, "\nprint fmt: \"%s\"\n", tpfmt); \ | 127 | trace_seq_printf(s, "\nprint fmt: " print); \ |
89 | \ | 128 | \ |
90 | return ret; \ | 129 | return ret; \ |
91 | } | 130 | } |
92 | 131 | ||
93 | #include "trace_event_types.h" | 132 | #include "trace_entries.h" |
94 | |||
95 | #undef TRACE_ZERO_CHAR | ||
96 | #define TRACE_ZERO_CHAR(arg) | ||
97 | |||
98 | #undef TRACE_FIELD | ||
99 | #define TRACE_FIELD(type, item, assign)\ | ||
100 | entry->item = assign; | ||
101 | |||
102 | #undef TRACE_FIELD | ||
103 | #define TRACE_FIELD(type, item, assign)\ | ||
104 | entry->item = assign; | ||
105 | |||
106 | #undef TRACE_FIELD_SIGN | ||
107 | #define TRACE_FIELD_SIGN(type, item, assign, is_signed) \ | ||
108 | TRACE_FIELD(type, item, assign) | ||
109 | |||
110 | #undef TP_CMD | ||
111 | #define TP_CMD(cmd...) cmd | ||
112 | |||
113 | #undef TRACE_ENTRY | ||
114 | #define TRACE_ENTRY entry | ||
115 | |||
116 | #undef TRACE_FIELD_SPECIAL | ||
117 | #define TRACE_FIELD_SPECIAL(type_item, item, len, cmd) \ | ||
118 | cmd; | ||
119 | |||
120 | #undef TRACE_EVENT_FORMAT | ||
121 | #define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \ | ||
122 | int ftrace_define_fields_##call(struct ftrace_event_call *event_call); \ | ||
123 | static int ftrace_raw_init_event_##call(void); \ | ||
124 | \ | ||
125 | struct ftrace_event_call __used \ | ||
126 | __attribute__((__aligned__(4))) \ | ||
127 | __attribute__((section("_ftrace_events"))) event_##call = { \ | ||
128 | .name = #call, \ | ||
129 | .id = proto, \ | ||
130 | .system = __stringify(TRACE_SYSTEM), \ | ||
131 | .raw_init = ftrace_raw_init_event_##call, \ | ||
132 | .show_format = ftrace_format_##call, \ | ||
133 | .define_fields = ftrace_define_fields_##call, \ | ||
134 | }; \ | ||
135 | static int ftrace_raw_init_event_##call(void) \ | ||
136 | { \ | ||
137 | INIT_LIST_HEAD(&event_##call.fields); \ | ||
138 | return 0; \ | ||
139 | } \ | ||
140 | |||
141 | #undef TRACE_EVENT_FORMAT_NOFILTER | ||
142 | #define TRACE_EVENT_FORMAT_NOFILTER(call, proto, args, fmt, tstruct, \ | ||
143 | tpfmt) \ | ||
144 | \ | ||
145 | struct ftrace_event_call __used \ | ||
146 | __attribute__((__aligned__(4))) \ | ||
147 | __attribute__((section("_ftrace_events"))) event_##call = { \ | ||
148 | .name = #call, \ | ||
149 | .id = proto, \ | ||
150 | .system = __stringify(TRACE_SYSTEM), \ | ||
151 | .show_format = ftrace_format_##call, \ | ||
152 | }; | ||
153 | 133 | ||
154 | #include "trace_event_types.h" | ||
155 | 134 | ||
156 | #undef TRACE_FIELD | 135 | #undef __field |
157 | #define TRACE_FIELD(type, item, assign) \ | 136 | #define __field(type, item) \ |
158 | ret = trace_define_field(event_call, #type, #item, \ | 137 | ret = trace_define_field(event_call, #type, #item, \ |
159 | offsetof(typeof(field), item), \ | 138 | offsetof(typeof(field), item), \ |
160 | sizeof(field.item), \ | 139 | sizeof(field.item), \ |
@@ -162,32 +141,45 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ | |||
162 | if (ret) \ | 141 | if (ret) \ |
163 | return ret; | 142 | return ret; |
164 | 143 | ||
165 | #undef TRACE_FIELD_SPECIAL | 144 | #undef __field_desc |
166 | #define TRACE_FIELD_SPECIAL(type, item, len, cmd) \ | 145 | #define __field_desc(type, container, item) \ |
146 | ret = trace_define_field(event_call, #type, #item, \ | ||
147 | offsetof(typeof(field), \ | ||
148 | container.item), \ | ||
149 | sizeof(field.container.item), \ | ||
150 | is_signed_type(type), FILTER_OTHER); \ | ||
151 | if (ret) \ | ||
152 | return ret; | ||
153 | |||
154 | #undef __array | ||
155 | #define __array(type, item, len) \ | ||
156 | BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \ | ||
167 | ret = trace_define_field(event_call, #type "[" #len "]", #item, \ | 157 | ret = trace_define_field(event_call, #type "[" #len "]", #item, \ |
168 | offsetof(typeof(field), item), \ | 158 | offsetof(typeof(field), item), \ |
169 | sizeof(field.item), 0, FILTER_OTHER); \ | 159 | sizeof(field.item), 0, FILTER_OTHER); \ |
170 | if (ret) \ | 160 | if (ret) \ |
171 | return ret; | 161 | return ret; |
172 | 162 | ||
173 | #undef TRACE_FIELD_SIGN | 163 | #undef __array_desc |
174 | #define TRACE_FIELD_SIGN(type, item, assign, is_signed) \ | 164 | #define __array_desc(type, container, item, len) \ |
175 | ret = trace_define_field(event_call, #type, #item, \ | 165 | BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \ |
176 | offsetof(typeof(field), item), \ | 166 | ret = trace_define_field(event_call, #type "[" #len "]", #item, \ |
177 | sizeof(field.item), is_signed, \ | 167 | offsetof(typeof(field), \ |
168 | container.item), \ | ||
169 | sizeof(field.container.item), 0, \ | ||
178 | FILTER_OTHER); \ | 170 | FILTER_OTHER); \ |
179 | if (ret) \ | 171 | if (ret) \ |
180 | return ret; | 172 | return ret; |
181 | 173 | ||
182 | #undef TRACE_FIELD_ZERO_CHAR | 174 | #undef __dynamic_array |
183 | #define TRACE_FIELD_ZERO_CHAR(item) | 175 | #define __dynamic_array(type, item) |
184 | 176 | ||
185 | #undef TRACE_EVENT_FORMAT | 177 | #undef FTRACE_ENTRY |
186 | #define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \ | 178 | #define FTRACE_ENTRY(name, struct_name, id, tstruct, print) \ |
187 | int \ | 179 | int \ |
188 | ftrace_define_fields_##call(struct ftrace_event_call *event_call) \ | 180 | ftrace_define_fields_##name(struct ftrace_event_call *event_call) \ |
189 | { \ | 181 | { \ |
190 | struct args field; \ | 182 | struct struct_name field; \ |
191 | int ret; \ | 183 | int ret; \ |
192 | \ | 184 | \ |
193 | ret = trace_define_common_fields(event_call); \ | 185 | ret = trace_define_common_fields(event_call); \ |
@@ -199,8 +191,42 @@ ftrace_define_fields_##call(struct ftrace_event_call *event_call) \ | |||
199 | return ret; \ | 191 | return ret; \ |
200 | } | 192 | } |
201 | 193 | ||
202 | #undef TRACE_EVENT_FORMAT_NOFILTER | 194 | #include "trace_entries.h" |
203 | #define TRACE_EVENT_FORMAT_NOFILTER(call, proto, args, fmt, tstruct, \ | 195 | |
204 | tpfmt) | 196 | |
197 | #undef __field | ||
198 | #define __field(type, item) | ||
199 | |||
200 | #undef __field_desc | ||
201 | #define __field_desc(type, container, item) | ||
202 | |||
203 | #undef __array | ||
204 | #define __array(type, item, len) | ||
205 | |||
206 | #undef __array_desc | ||
207 | #define __array_desc(type, container, item, len) | ||
208 | |||
209 | #undef __dynamic_array | ||
210 | #define __dynamic_array(type, item) | ||
211 | |||
212 | #undef FTRACE_ENTRY | ||
213 | #define FTRACE_ENTRY(call, struct_name, type, tstruct, print) \ | ||
214 | static int ftrace_raw_init_event_##call(void); \ | ||
215 | \ | ||
216 | struct ftrace_event_call __used \ | ||
217 | __attribute__((__aligned__(4))) \ | ||
218 | __attribute__((section("_ftrace_events"))) event_##call = { \ | ||
219 | .name = #call, \ | ||
220 | .id = type, \ | ||
221 | .system = __stringify(TRACE_SYSTEM), \ | ||
222 | .raw_init = ftrace_raw_init_event_##call, \ | ||
223 | .show_format = ftrace_format_##call, \ | ||
224 | .define_fields = ftrace_define_fields_##call, \ | ||
225 | }; \ | ||
226 | static int ftrace_raw_init_event_##call(void) \ | ||
227 | { \ | ||
228 | INIT_LIST_HEAD(&event_##call.fields); \ | ||
229 | return 0; \ | ||
230 | } \ | ||
205 | 231 | ||
206 | #include "trace_event_types.h" | 232 | #include "trace_entries.h" |
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index 5b01b94518fc..b3f3776b0cd6 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c | |||
@@ -290,7 +290,7 @@ ftrace_trace_onoff_print(struct seq_file *m, unsigned long ip, | |||
290 | { | 290 | { |
291 | long count = (long)data; | 291 | long count = (long)data; |
292 | 292 | ||
293 | seq_printf(m, "%pf:", (void *)ip); | 293 | seq_printf(m, "%ps:", (void *)ip); |
294 | 294 | ||
295 | if (ops == &traceon_probe_ops) | 295 | if (ops == &traceon_probe_ops) |
296 | seq_printf(m, "traceon"); | 296 | seq_printf(m, "traceon"); |
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index b3749a2c3132..45e6c01b2e4d 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c | |||
@@ -124,7 +124,7 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret, | |||
124 | if (unlikely(current->ret_stack[index].fp != frame_pointer)) { | 124 | if (unlikely(current->ret_stack[index].fp != frame_pointer)) { |
125 | ftrace_graph_stop(); | 125 | ftrace_graph_stop(); |
126 | WARN(1, "Bad frame pointer: expected %lx, received %lx\n" | 126 | WARN(1, "Bad frame pointer: expected %lx, received %lx\n" |
127 | " from func %pF return to %lx\n", | 127 | " from func %ps return to %lx\n", |
128 | current->ret_stack[index].fp, | 128 | current->ret_stack[index].fp, |
129 | frame_pointer, | 129 | frame_pointer, |
130 | (void *)current->ret_stack[index].func, | 130 | (void *)current->ret_stack[index].func, |
@@ -364,6 +364,15 @@ print_graph_proc(struct trace_seq *s, pid_t pid) | |||
364 | } | 364 | } |
365 | 365 | ||
366 | 366 | ||
367 | static enum print_line_t | ||
368 | print_graph_lat_fmt(struct trace_seq *s, struct trace_entry *entry) | ||
369 | { | ||
370 | if (!trace_seq_putc(s, ' ')) | ||
371 | return 0; | ||
372 | |||
373 | return trace_print_lat_fmt(s, entry); | ||
374 | } | ||
375 | |||
367 | /* If the pid changed since the last trace, output this event */ | 376 | /* If the pid changed since the last trace, output this event */ |
368 | static enum print_line_t | 377 | static enum print_line_t |
369 | verif_pid(struct trace_seq *s, pid_t pid, int cpu, struct fgraph_data *data) | 378 | verif_pid(struct trace_seq *s, pid_t pid, int cpu, struct fgraph_data *data) |
@@ -521,6 +530,7 @@ print_graph_irq(struct trace_iterator *iter, unsigned long addr, | |||
521 | if (ret == TRACE_TYPE_PARTIAL_LINE) | 530 | if (ret == TRACE_TYPE_PARTIAL_LINE) |
522 | return TRACE_TYPE_PARTIAL_LINE; | 531 | return TRACE_TYPE_PARTIAL_LINE; |
523 | } | 532 | } |
533 | |||
524 | /* Proc */ | 534 | /* Proc */ |
525 | if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) { | 535 | if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) { |
526 | ret = print_graph_proc(s, pid); | 536 | ret = print_graph_proc(s, pid); |
@@ -659,7 +669,7 @@ print_graph_entry_leaf(struct trace_iterator *iter, | |||
659 | return TRACE_TYPE_PARTIAL_LINE; | 669 | return TRACE_TYPE_PARTIAL_LINE; |
660 | } | 670 | } |
661 | 671 | ||
662 | ret = trace_seq_printf(s, "%pf();\n", (void *)call->func); | 672 | ret = trace_seq_printf(s, "%ps();\n", (void *)call->func); |
663 | if (!ret) | 673 | if (!ret) |
664 | return TRACE_TYPE_PARTIAL_LINE; | 674 | return TRACE_TYPE_PARTIAL_LINE; |
665 | 675 | ||
@@ -702,7 +712,7 @@ print_graph_entry_nested(struct trace_iterator *iter, | |||
702 | return TRACE_TYPE_PARTIAL_LINE; | 712 | return TRACE_TYPE_PARTIAL_LINE; |
703 | } | 713 | } |
704 | 714 | ||
705 | ret = trace_seq_printf(s, "%pf() {\n", (void *)call->func); | 715 | ret = trace_seq_printf(s, "%ps() {\n", (void *)call->func); |
706 | if (!ret) | 716 | if (!ret) |
707 | return TRACE_TYPE_PARTIAL_LINE; | 717 | return TRACE_TYPE_PARTIAL_LINE; |
708 | 718 | ||
@@ -758,6 +768,13 @@ print_graph_prologue(struct trace_iterator *iter, struct trace_seq *s, | |||
758 | return TRACE_TYPE_PARTIAL_LINE; | 768 | return TRACE_TYPE_PARTIAL_LINE; |
759 | } | 769 | } |
760 | 770 | ||
771 | /* Latency format */ | ||
772 | if (trace_flags & TRACE_ITER_LATENCY_FMT) { | ||
773 | ret = print_graph_lat_fmt(s, ent); | ||
774 | if (ret == TRACE_TYPE_PARTIAL_LINE) | ||
775 | return TRACE_TYPE_PARTIAL_LINE; | ||
776 | } | ||
777 | |||
761 | return 0; | 778 | return 0; |
762 | } | 779 | } |
763 | 780 | ||
@@ -952,28 +969,59 @@ print_graph_function(struct trace_iterator *iter) | |||
952 | return TRACE_TYPE_HANDLED; | 969 | return TRACE_TYPE_HANDLED; |
953 | } | 970 | } |
954 | 971 | ||
972 | static void print_lat_header(struct seq_file *s) | ||
973 | { | ||
974 | static const char spaces[] = " " /* 16 spaces */ | ||
975 | " " /* 4 spaces */ | ||
976 | " "; /* 17 spaces */ | ||
977 | int size = 0; | ||
978 | |||
979 | if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME) | ||
980 | size += 16; | ||
981 | if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) | ||
982 | size += 4; | ||
983 | if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) | ||
984 | size += 17; | ||
985 | |||
986 | seq_printf(s, "#%.*s _-----=> irqs-off \n", size, spaces); | ||
987 | seq_printf(s, "#%.*s / _----=> need-resched \n", size, spaces); | ||
988 | seq_printf(s, "#%.*s| / _---=> hardirq/softirq \n", size, spaces); | ||
989 | seq_printf(s, "#%.*s|| / _--=> preempt-depth \n", size, spaces); | ||
990 | seq_printf(s, "#%.*s||| / _-=> lock-depth \n", size, spaces); | ||
991 | seq_printf(s, "#%.*s|||| / \n", size, spaces); | ||
992 | } | ||
993 | |||
955 | static void print_graph_headers(struct seq_file *s) | 994 | static void print_graph_headers(struct seq_file *s) |
956 | { | 995 | { |
996 | int lat = trace_flags & TRACE_ITER_LATENCY_FMT; | ||
997 | |||
998 | if (lat) | ||
999 | print_lat_header(s); | ||
1000 | |||
957 | /* 1st line */ | 1001 | /* 1st line */ |
958 | seq_printf(s, "# "); | 1002 | seq_printf(s, "#"); |
959 | if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME) | 1003 | if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME) |
960 | seq_printf(s, " TIME "); | 1004 | seq_printf(s, " TIME "); |
961 | if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) | 1005 | if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) |
962 | seq_printf(s, "CPU"); | 1006 | seq_printf(s, " CPU"); |
963 | if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) | 1007 | if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) |
964 | seq_printf(s, " TASK/PID "); | 1008 | seq_printf(s, " TASK/PID "); |
1009 | if (lat) | ||
1010 | seq_printf(s, "|||||"); | ||
965 | if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION) | 1011 | if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION) |
966 | seq_printf(s, " DURATION "); | 1012 | seq_printf(s, " DURATION "); |
967 | seq_printf(s, " FUNCTION CALLS\n"); | 1013 | seq_printf(s, " FUNCTION CALLS\n"); |
968 | 1014 | ||
969 | /* 2nd line */ | 1015 | /* 2nd line */ |
970 | seq_printf(s, "# "); | 1016 | seq_printf(s, "#"); |
971 | if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME) | 1017 | if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME) |
972 | seq_printf(s, " | "); | 1018 | seq_printf(s, " | "); |
973 | if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) | 1019 | if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) |
974 | seq_printf(s, "| "); | 1020 | seq_printf(s, " | "); |
975 | if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) | 1021 | if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) |
976 | seq_printf(s, " | | "); | 1022 | seq_printf(s, " | | "); |
1023 | if (lat) | ||
1024 | seq_printf(s, "|||||"); | ||
977 | if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION) | 1025 | if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION) |
978 | seq_printf(s, " | | "); | 1026 | seq_printf(s, " | | "); |
979 | seq_printf(s, " | | | |\n"); | 1027 | seq_printf(s, " | | | |\n"); |
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index 5555b75a0d12..3aa7eaa2114c 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c | |||
@@ -129,15 +129,10 @@ check_critical_timing(struct trace_array *tr, | |||
129 | unsigned long parent_ip, | 129 | unsigned long parent_ip, |
130 | int cpu) | 130 | int cpu) |
131 | { | 131 | { |
132 | unsigned long latency, t0, t1; | ||
133 | cycle_t T0, T1, delta; | 132 | cycle_t T0, T1, delta; |
134 | unsigned long flags; | 133 | unsigned long flags; |
135 | int pc; | 134 | int pc; |
136 | 135 | ||
137 | /* | ||
138 | * usecs conversion is slow so we try to delay the conversion | ||
139 | * as long as possible: | ||
140 | */ | ||
141 | T0 = data->preempt_timestamp; | 136 | T0 = data->preempt_timestamp; |
142 | T1 = ftrace_now(cpu); | 137 | T1 = ftrace_now(cpu); |
143 | delta = T1-T0; | 138 | delta = T1-T0; |
@@ -157,18 +152,15 @@ check_critical_timing(struct trace_array *tr, | |||
157 | 152 | ||
158 | trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc); | 153 | trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc); |
159 | 154 | ||
160 | latency = nsecs_to_usecs(delta); | ||
161 | |||
162 | if (data->critical_sequence != max_sequence) | 155 | if (data->critical_sequence != max_sequence) |
163 | goto out_unlock; | 156 | goto out_unlock; |
164 | 157 | ||
165 | tracing_max_latency = delta; | ||
166 | t0 = nsecs_to_usecs(T0); | ||
167 | t1 = nsecs_to_usecs(T1); | ||
168 | |||
169 | data->critical_end = parent_ip; | 158 | data->critical_end = parent_ip; |
170 | 159 | ||
171 | update_max_tr_single(tr, current, cpu); | 160 | if (likely(!is_tracing_stopped())) { |
161 | tracing_max_latency = delta; | ||
162 | update_max_tr_single(tr, current, cpu); | ||
163 | } | ||
172 | 164 | ||
173 | max_sequence++; | 165 | max_sequence++; |
174 | 166 | ||
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c index c4c9bbda53d3..0acd834659ed 100644 --- a/kernel/trace/trace_mmiotrace.c +++ b/kernel/trace/trace_mmiotrace.c | |||
@@ -307,6 +307,7 @@ static void __trace_mmiotrace_rw(struct trace_array *tr, | |||
307 | struct trace_array_cpu *data, | 307 | struct trace_array_cpu *data, |
308 | struct mmiotrace_rw *rw) | 308 | struct mmiotrace_rw *rw) |
309 | { | 309 | { |
310 | struct ftrace_event_call *call = &event_mmiotrace_rw; | ||
310 | struct ring_buffer *buffer = tr->buffer; | 311 | struct ring_buffer *buffer = tr->buffer; |
311 | struct ring_buffer_event *event; | 312 | struct ring_buffer_event *event; |
312 | struct trace_mmiotrace_rw *entry; | 313 | struct trace_mmiotrace_rw *entry; |
@@ -320,7 +321,9 @@ static void __trace_mmiotrace_rw(struct trace_array *tr, | |||
320 | } | 321 | } |
321 | entry = ring_buffer_event_data(event); | 322 | entry = ring_buffer_event_data(event); |
322 | entry->rw = *rw; | 323 | entry->rw = *rw; |
323 | trace_buffer_unlock_commit(buffer, event, 0, pc); | 324 | |
325 | if (!filter_check_discard(call, entry, buffer, event)) | ||
326 | trace_buffer_unlock_commit(buffer, event, 0, pc); | ||
324 | } | 327 | } |
325 | 328 | ||
326 | void mmio_trace_rw(struct mmiotrace_rw *rw) | 329 | void mmio_trace_rw(struct mmiotrace_rw *rw) |
@@ -334,6 +337,7 @@ static void __trace_mmiotrace_map(struct trace_array *tr, | |||
334 | struct trace_array_cpu *data, | 337 | struct trace_array_cpu *data, |
335 | struct mmiotrace_map *map) | 338 | struct mmiotrace_map *map) |
336 | { | 339 | { |
340 | struct ftrace_event_call *call = &event_mmiotrace_map; | ||
337 | struct ring_buffer *buffer = tr->buffer; | 341 | struct ring_buffer *buffer = tr->buffer; |
338 | struct ring_buffer_event *event; | 342 | struct ring_buffer_event *event; |
339 | struct trace_mmiotrace_map *entry; | 343 | struct trace_mmiotrace_map *entry; |
@@ -347,7 +351,9 @@ static void __trace_mmiotrace_map(struct trace_array *tr, | |||
347 | } | 351 | } |
348 | entry = ring_buffer_event_data(event); | 352 | entry = ring_buffer_event_data(event); |
349 | entry->map = *map; | 353 | entry->map = *map; |
350 | trace_buffer_unlock_commit(buffer, event, 0, pc); | 354 | |
355 | if (!filter_check_discard(call, entry, buffer, event)) | ||
356 | trace_buffer_unlock_commit(buffer, event, 0, pc); | ||
351 | } | 357 | } |
352 | 358 | ||
353 | void mmio_trace_mapping(struct mmiotrace_map *map) | 359 | void mmio_trace_mapping(struct mmiotrace_map *map) |
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index e0c2545622e8..f572f44c6e1e 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c | |||
@@ -407,7 +407,7 @@ seq_print_userip_objs(const struct userstack_entry *entry, struct trace_seq *s, | |||
407 | * since individual threads might have already quit! | 407 | * since individual threads might have already quit! |
408 | */ | 408 | */ |
409 | rcu_read_lock(); | 409 | rcu_read_lock(); |
410 | task = find_task_by_vpid(entry->ent.tgid); | 410 | task = find_task_by_vpid(entry->tgid); |
411 | if (task) | 411 | if (task) |
412 | mm = get_task_mm(task); | 412 | mm = get_task_mm(task); |
413 | rcu_read_unlock(); | 413 | rcu_read_unlock(); |
@@ -460,18 +460,23 @@ seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags) | |||
460 | return ret; | 460 | return ret; |
461 | } | 461 | } |
462 | 462 | ||
463 | static int | 463 | /** |
464 | lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu) | 464 | * trace_print_lat_fmt - print the irq, preempt and lockdep fields |
465 | * @s: trace seq struct to write to | ||
466 | * @entry: The trace entry field from the ring buffer | ||
467 | * | ||
468 | * Prints the generic fields of irqs off, in hard or softirq, preempt | ||
469 | * count and lock depth. | ||
470 | */ | ||
471 | int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry) | ||
465 | { | 472 | { |
466 | int hardirq, softirq; | 473 | int hardirq, softirq; |
467 | char comm[TASK_COMM_LEN]; | 474 | int ret; |
468 | 475 | ||
469 | trace_find_cmdline(entry->pid, comm); | ||
470 | hardirq = entry->flags & TRACE_FLAG_HARDIRQ; | 476 | hardirq = entry->flags & TRACE_FLAG_HARDIRQ; |
471 | softirq = entry->flags & TRACE_FLAG_SOFTIRQ; | 477 | softirq = entry->flags & TRACE_FLAG_SOFTIRQ; |
472 | 478 | ||
473 | if (!trace_seq_printf(s, "%8.8s-%-5d %3d%c%c%c", | 479 | if (!trace_seq_printf(s, "%c%c%c", |
474 | comm, entry->pid, cpu, | ||
475 | (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' : | 480 | (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' : |
476 | (entry->flags & TRACE_FLAG_IRQS_NOSUPPORT) ? | 481 | (entry->flags & TRACE_FLAG_IRQS_NOSUPPORT) ? |
477 | 'X' : '.', | 482 | 'X' : '.', |
@@ -481,9 +486,30 @@ lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu) | |||
481 | hardirq ? 'h' : softirq ? 's' : '.')) | 486 | hardirq ? 'h' : softirq ? 's' : '.')) |
482 | return 0; | 487 | return 0; |
483 | 488 | ||
489 | if (entry->lock_depth < 0) | ||
490 | ret = trace_seq_putc(s, '.'); | ||
491 | else | ||
492 | ret = trace_seq_printf(s, "%d", entry->lock_depth); | ||
493 | if (!ret) | ||
494 | return 0; | ||
495 | |||
484 | if (entry->preempt_count) | 496 | if (entry->preempt_count) |
485 | return trace_seq_printf(s, "%x", entry->preempt_count); | 497 | return trace_seq_printf(s, "%x", entry->preempt_count); |
486 | return trace_seq_puts(s, "."); | 498 | return trace_seq_putc(s, '.'); |
499 | } | ||
500 | |||
501 | static int | ||
502 | lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu) | ||
503 | { | ||
504 | char comm[TASK_COMM_LEN]; | ||
505 | |||
506 | trace_find_cmdline(entry->pid, comm); | ||
507 | |||
508 | if (!trace_seq_printf(s, "%8.8s-%-5d %3d", | ||
509 | comm, entry->pid, cpu)) | ||
510 | return 0; | ||
511 | |||
512 | return trace_print_lat_fmt(s, entry); | ||
487 | } | 513 | } |
488 | 514 | ||
489 | static unsigned long preempt_mark_thresh = 100; | 515 | static unsigned long preempt_mark_thresh = 100; |
diff --git a/kernel/trace/trace_output.h b/kernel/trace/trace_output.h index d38bec4a9c30..9d91c72ba38b 100644 --- a/kernel/trace/trace_output.h +++ b/kernel/trace/trace_output.h | |||
@@ -26,6 +26,8 @@ extern struct trace_event *ftrace_find_event(int type); | |||
26 | 26 | ||
27 | extern enum print_line_t trace_nop_print(struct trace_iterator *iter, | 27 | extern enum print_line_t trace_nop_print(struct trace_iterator *iter, |
28 | int flags); | 28 | int flags); |
29 | extern int | ||
30 | trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry); | ||
29 | 31 | ||
30 | /* used by module unregistering */ | 32 | /* used by module unregistering */ |
31 | extern int __unregister_ftrace_event(struct trace_event *event); | 33 | extern int __unregister_ftrace_event(struct trace_event *event); |
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index ad69f105a7c6..26185d727676 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c | |||
@@ -24,6 +24,7 @@ static int __read_mostly tracer_enabled; | |||
24 | 24 | ||
25 | static struct task_struct *wakeup_task; | 25 | static struct task_struct *wakeup_task; |
26 | static int wakeup_cpu; | 26 | static int wakeup_cpu; |
27 | static int wakeup_current_cpu; | ||
27 | static unsigned wakeup_prio = -1; | 28 | static unsigned wakeup_prio = -1; |
28 | static int wakeup_rt; | 29 | static int wakeup_rt; |
29 | 30 | ||
@@ -56,33 +57,23 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip) | |||
56 | resched = ftrace_preempt_disable(); | 57 | resched = ftrace_preempt_disable(); |
57 | 58 | ||
58 | cpu = raw_smp_processor_id(); | 59 | cpu = raw_smp_processor_id(); |
60 | if (cpu != wakeup_current_cpu) | ||
61 | goto out_enable; | ||
62 | |||
59 | data = tr->data[cpu]; | 63 | data = tr->data[cpu]; |
60 | disabled = atomic_inc_return(&data->disabled); | 64 | disabled = atomic_inc_return(&data->disabled); |
61 | if (unlikely(disabled != 1)) | 65 | if (unlikely(disabled != 1)) |
62 | goto out; | 66 | goto out; |
63 | 67 | ||
64 | local_irq_save(flags); | 68 | local_irq_save(flags); |
65 | __raw_spin_lock(&wakeup_lock); | ||
66 | |||
67 | if (unlikely(!wakeup_task)) | ||
68 | goto unlock; | ||
69 | |||
70 | /* | ||
71 | * The task can't disappear because it needs to | ||
72 | * wake up first, and we have the wakeup_lock. | ||
73 | */ | ||
74 | if (task_cpu(wakeup_task) != cpu) | ||
75 | goto unlock; | ||
76 | 69 | ||
77 | trace_function(tr, ip, parent_ip, flags, pc); | 70 | trace_function(tr, ip, parent_ip, flags, pc); |
78 | 71 | ||
79 | unlock: | ||
80 | __raw_spin_unlock(&wakeup_lock); | ||
81 | local_irq_restore(flags); | 72 | local_irq_restore(flags); |
82 | 73 | ||
83 | out: | 74 | out: |
84 | atomic_dec(&data->disabled); | 75 | atomic_dec(&data->disabled); |
85 | 76 | out_enable: | |
86 | ftrace_preempt_enable(resched); | 77 | ftrace_preempt_enable(resched); |
87 | } | 78 | } |
88 | 79 | ||
@@ -107,11 +98,18 @@ static int report_latency(cycle_t delta) | |||
107 | return 1; | 98 | return 1; |
108 | } | 99 | } |
109 | 100 | ||
101 | static void probe_wakeup_migrate_task(struct task_struct *task, int cpu) | ||
102 | { | ||
103 | if (task != wakeup_task) | ||
104 | return; | ||
105 | |||
106 | wakeup_current_cpu = cpu; | ||
107 | } | ||
108 | |||
110 | static void notrace | 109 | static void notrace |
111 | probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev, | 110 | probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev, |
112 | struct task_struct *next) | 111 | struct task_struct *next) |
113 | { | 112 | { |
114 | unsigned long latency = 0, t0 = 0, t1 = 0; | ||
115 | struct trace_array_cpu *data; | 113 | struct trace_array_cpu *data; |
116 | cycle_t T0, T1, delta; | 114 | cycle_t T0, T1, delta; |
117 | unsigned long flags; | 115 | unsigned long flags; |
@@ -157,10 +155,6 @@ probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev, | |||
157 | trace_function(wakeup_trace, CALLER_ADDR0, CALLER_ADDR1, flags, pc); | 155 | trace_function(wakeup_trace, CALLER_ADDR0, CALLER_ADDR1, flags, pc); |
158 | tracing_sched_switch_trace(wakeup_trace, prev, next, flags, pc); | 156 | tracing_sched_switch_trace(wakeup_trace, prev, next, flags, pc); |
159 | 157 | ||
160 | /* | ||
161 | * usecs conversion is slow so we try to delay the conversion | ||
162 | * as long as possible: | ||
163 | */ | ||
164 | T0 = data->preempt_timestamp; | 158 | T0 = data->preempt_timestamp; |
165 | T1 = ftrace_now(cpu); | 159 | T1 = ftrace_now(cpu); |
166 | delta = T1-T0; | 160 | delta = T1-T0; |
@@ -168,13 +162,10 @@ probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev, | |||
168 | if (!report_latency(delta)) | 162 | if (!report_latency(delta)) |
169 | goto out_unlock; | 163 | goto out_unlock; |
170 | 164 | ||
171 | latency = nsecs_to_usecs(delta); | 165 | if (likely(!is_tracing_stopped())) { |
172 | 166 | tracing_max_latency = delta; | |
173 | tracing_max_latency = delta; | 167 | update_max_tr(wakeup_trace, wakeup_task, wakeup_cpu); |
174 | t0 = nsecs_to_usecs(T0); | 168 | } |
175 | t1 = nsecs_to_usecs(T1); | ||
176 | |||
177 | update_max_tr(wakeup_trace, wakeup_task, wakeup_cpu); | ||
178 | 169 | ||
179 | out_unlock: | 170 | out_unlock: |
180 | __wakeup_reset(wakeup_trace); | 171 | __wakeup_reset(wakeup_trace); |
@@ -244,6 +235,7 @@ probe_wakeup(struct rq *rq, struct task_struct *p, int success) | |||
244 | __wakeup_reset(wakeup_trace); | 235 | __wakeup_reset(wakeup_trace); |
245 | 236 | ||
246 | wakeup_cpu = task_cpu(p); | 237 | wakeup_cpu = task_cpu(p); |
238 | wakeup_current_cpu = wakeup_cpu; | ||
247 | wakeup_prio = p->prio; | 239 | wakeup_prio = p->prio; |
248 | 240 | ||
249 | wakeup_task = p; | 241 | wakeup_task = p; |
@@ -293,6 +285,13 @@ static void start_wakeup_tracer(struct trace_array *tr) | |||
293 | goto fail_deprobe_wake_new; | 285 | goto fail_deprobe_wake_new; |
294 | } | 286 | } |
295 | 287 | ||
288 | ret = register_trace_sched_migrate_task(probe_wakeup_migrate_task); | ||
289 | if (ret) { | ||
290 | pr_info("wakeup trace: Couldn't activate tracepoint" | ||
291 | " probe to kernel_sched_migrate_task\n"); | ||
292 | return; | ||
293 | } | ||
294 | |||
296 | wakeup_reset(tr); | 295 | wakeup_reset(tr); |
297 | 296 | ||
298 | /* | 297 | /* |
@@ -325,6 +324,7 @@ static void stop_wakeup_tracer(struct trace_array *tr) | |||
325 | unregister_trace_sched_switch(probe_wakeup_sched_switch); | 324 | unregister_trace_sched_switch(probe_wakeup_sched_switch); |
326 | unregister_trace_sched_wakeup_new(probe_wakeup); | 325 | unregister_trace_sched_wakeup_new(probe_wakeup); |
327 | unregister_trace_sched_wakeup(probe_wakeup); | 326 | unregister_trace_sched_wakeup(probe_wakeup); |
327 | unregister_trace_sched_migrate_task(probe_wakeup_migrate_task); | ||
328 | } | 328 | } |
329 | 329 | ||
330 | static int __wakeup_tracer_init(struct trace_array *tr) | 330 | static int __wakeup_tracer_init(struct trace_array *tr) |