aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/kfifo.c2
-rw-r--r--kernel/perf_counter.c394
-rw-r--r--kernel/power/console.c63
-rw-r--r--kernel/sched_clock.c122
-rw-r--r--kernel/sched_fair.c1
-rw-r--r--kernel/trace/Makefile2
-rw-r--r--kernel/trace/power-traces.c20
-rw-r--r--kernel/trace/trace.h3
-rw-r--r--kernel/trace/trace_entries.h17
-rw-r--r--kernel/trace/trace_power.c218
10 files changed, 297 insertions, 545 deletions
diff --git a/kernel/kfifo.c b/kernel/kfifo.c
index 26539e3228e5..3765ff3c1bbe 100644
--- a/kernel/kfifo.c
+++ b/kernel/kfifo.c
@@ -117,7 +117,7 @@ EXPORT_SYMBOL(kfifo_free);
117 * writer, you don't need extra locking to use these functions. 117 * writer, you don't need extra locking to use these functions.
118 */ 118 */
119unsigned int __kfifo_put(struct kfifo *fifo, 119unsigned int __kfifo_put(struct kfifo *fifo,
120 unsigned char *buffer, unsigned int len) 120 const unsigned char *buffer, unsigned int len)
121{ 121{
122 unsigned int l; 122 unsigned int l;
123 123
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 8cb94a52d1bb..cc768ab81ac8 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -2176,6 +2176,13 @@ static int perf_mmap_data_alloc(struct perf_counter *counter, int nr_pages)
2176 data->nr_pages = nr_pages; 2176 data->nr_pages = nr_pages;
2177 atomic_set(&data->lock, -1); 2177 atomic_set(&data->lock, -1);
2178 2178
2179 if (counter->attr.watermark) {
2180 data->watermark = min_t(long, PAGE_SIZE * nr_pages,
2181 counter->attr.wakeup_watermark);
2182 }
2183 if (!data->watermark)
2184 data->watermark = max(PAGE_SIZE, PAGE_SIZE * nr_pages / 4);
2185
2179 rcu_assign_pointer(counter->data, data); 2186 rcu_assign_pointer(counter->data, data);
2180 2187
2181 return 0; 2188 return 0;
@@ -2315,7 +2322,8 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
2315 lock_limit >>= PAGE_SHIFT; 2322 lock_limit >>= PAGE_SHIFT;
2316 locked = vma->vm_mm->locked_vm + extra; 2323 locked = vma->vm_mm->locked_vm + extra;
2317 2324
2318 if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) { 2325 if ((locked > lock_limit) && perf_paranoid_tracepoint_raw() &&
2326 !capable(CAP_IPC_LOCK)) {
2319 ret = -EPERM; 2327 ret = -EPERM;
2320 goto unlock; 2328 goto unlock;
2321 } 2329 }
@@ -2504,35 +2512,15 @@ __weak struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
2504/* 2512/*
2505 * Output 2513 * Output
2506 */ 2514 */
2507 2515static bool perf_output_space(struct perf_mmap_data *data, unsigned long tail,
2508struct perf_output_handle { 2516 unsigned long offset, unsigned long head)
2509 struct perf_counter *counter;
2510 struct perf_mmap_data *data;
2511 unsigned long head;
2512 unsigned long offset;
2513 int nmi;
2514 int sample;
2515 int locked;
2516 unsigned long flags;
2517};
2518
2519static bool perf_output_space(struct perf_mmap_data *data,
2520 unsigned int offset, unsigned int head)
2521{ 2517{
2522 unsigned long tail;
2523 unsigned long mask; 2518 unsigned long mask;
2524 2519
2525 if (!data->writable) 2520 if (!data->writable)
2526 return true; 2521 return true;
2527 2522
2528 mask = (data->nr_pages << PAGE_SHIFT) - 1; 2523 mask = (data->nr_pages << PAGE_SHIFT) - 1;
2529 /*
2530 * Userspace could choose to issue a mb() before updating the tail
2531 * pointer. So that all reads will be completed before the write is
2532 * issued.
2533 */
2534 tail = ACCESS_ONCE(data->user_page->data_tail);
2535 smp_rmb();
2536 2524
2537 offset = (offset - tail) & mask; 2525 offset = (offset - tail) & mask;
2538 head = (head - tail) & mask; 2526 head = (head - tail) & mask;
@@ -2633,8 +2621,8 @@ out:
2633 local_irq_restore(handle->flags); 2621 local_irq_restore(handle->flags);
2634} 2622}
2635 2623
2636static void perf_output_copy(struct perf_output_handle *handle, 2624void perf_output_copy(struct perf_output_handle *handle,
2637 const void *buf, unsigned int len) 2625 const void *buf, unsigned int len)
2638{ 2626{
2639 unsigned int pages_mask; 2627 unsigned int pages_mask;
2640 unsigned int offset; 2628 unsigned int offset;
@@ -2669,16 +2657,13 @@ static void perf_output_copy(struct perf_output_handle *handle,
2669 WARN_ON_ONCE(((long)(handle->head - handle->offset)) < 0); 2657 WARN_ON_ONCE(((long)(handle->head - handle->offset)) < 0);
2670} 2658}
2671 2659
2672#define perf_output_put(handle, x) \ 2660int perf_output_begin(struct perf_output_handle *handle,
2673 perf_output_copy((handle), &(x), sizeof(x)) 2661 struct perf_counter *counter, unsigned int size,
2674 2662 int nmi, int sample)
2675static int perf_output_begin(struct perf_output_handle *handle,
2676 struct perf_counter *counter, unsigned int size,
2677 int nmi, int sample)
2678{ 2663{
2679 struct perf_counter *output_counter; 2664 struct perf_counter *output_counter;
2680 struct perf_mmap_data *data; 2665 struct perf_mmap_data *data;
2681 unsigned int offset, head; 2666 unsigned long tail, offset, head;
2682 int have_lost; 2667 int have_lost;
2683 struct { 2668 struct {
2684 struct perf_event_header header; 2669 struct perf_event_header header;
@@ -2716,16 +2701,23 @@ static int perf_output_begin(struct perf_output_handle *handle,
2716 perf_output_lock(handle); 2701 perf_output_lock(handle);
2717 2702
2718 do { 2703 do {
2704 /*
2705 * Userspace could choose to issue a mb() before updating the
2706 * tail pointer. So that all reads will be completed before the
2707 * write is issued.
2708 */
2709 tail = ACCESS_ONCE(data->user_page->data_tail);
2710 smp_rmb();
2719 offset = head = atomic_long_read(&data->head); 2711 offset = head = atomic_long_read(&data->head);
2720 head += size; 2712 head += size;
2721 if (unlikely(!perf_output_space(data, offset, head))) 2713 if (unlikely(!perf_output_space(data, tail, offset, head)))
2722 goto fail; 2714 goto fail;
2723 } while (atomic_long_cmpxchg(&data->head, offset, head) != offset); 2715 } while (atomic_long_cmpxchg(&data->head, offset, head) != offset);
2724 2716
2725 handle->offset = offset; 2717 handle->offset = offset;
2726 handle->head = head; 2718 handle->head = head;
2727 2719
2728 if ((offset >> PAGE_SHIFT) != (head >> PAGE_SHIFT)) 2720 if (head - tail > data->watermark)
2729 atomic_set(&data->wakeup, 1); 2721 atomic_set(&data->wakeup, 1);
2730 2722
2731 if (have_lost) { 2723 if (have_lost) {
@@ -2749,7 +2741,7 @@ out:
2749 return -ENOSPC; 2741 return -ENOSPC;
2750} 2742}
2751 2743
2752static void perf_output_end(struct perf_output_handle *handle) 2744void perf_output_end(struct perf_output_handle *handle)
2753{ 2745{
2754 struct perf_counter *counter = handle->counter; 2746 struct perf_counter *counter = handle->counter;
2755 struct perf_mmap_data *data = handle->data; 2747 struct perf_mmap_data *data = handle->data;
@@ -2863,156 +2855,176 @@ static void perf_output_read(struct perf_output_handle *handle,
2863 perf_output_read_one(handle, counter); 2855 perf_output_read_one(handle, counter);
2864} 2856}
2865 2857
2866void perf_counter_output(struct perf_counter *counter, int nmi, 2858void perf_output_sample(struct perf_output_handle *handle,
2867 struct perf_sample_data *data) 2859 struct perf_event_header *header,
2860 struct perf_sample_data *data,
2861 struct perf_counter *counter)
2868{ 2862{
2869 int ret; 2863 u64 sample_type = data->type;
2870 u64 sample_type = counter->attr.sample_type;
2871 struct perf_output_handle handle;
2872 struct perf_event_header header;
2873 u64 ip;
2874 struct {
2875 u32 pid, tid;
2876 } tid_entry;
2877 struct perf_callchain_entry *callchain = NULL;
2878 int callchain_size = 0;
2879 u64 time;
2880 struct {
2881 u32 cpu, reserved;
2882 } cpu_entry;
2883 2864
2884 header.type = PERF_EVENT_SAMPLE; 2865 perf_output_put(handle, *header);
2885 header.size = sizeof(header);
2886 2866
2887 header.misc = 0; 2867 if (sample_type & PERF_SAMPLE_IP)
2888 header.misc |= perf_misc_flags(data->regs); 2868 perf_output_put(handle, data->ip);
2889
2890 if (sample_type & PERF_SAMPLE_IP) {
2891 ip = perf_instruction_pointer(data->regs);
2892 header.size += sizeof(ip);
2893 }
2894
2895 if (sample_type & PERF_SAMPLE_TID) {
2896 /* namespace issues */
2897 tid_entry.pid = perf_counter_pid(counter, current);
2898 tid_entry.tid = perf_counter_tid(counter, current);
2899
2900 header.size += sizeof(tid_entry);
2901 }
2902 2869
2903 if (sample_type & PERF_SAMPLE_TIME) { 2870 if (sample_type & PERF_SAMPLE_TID)
2904 /* 2871 perf_output_put(handle, data->tid_entry);
2905 * Maybe do better on x86 and provide cpu_clock_nmi()
2906 */
2907 time = sched_clock();
2908 2872
2909 header.size += sizeof(u64); 2873 if (sample_type & PERF_SAMPLE_TIME)
2910 } 2874 perf_output_put(handle, data->time);
2911 2875
2912 if (sample_type & PERF_SAMPLE_ADDR) 2876 if (sample_type & PERF_SAMPLE_ADDR)
2913 header.size += sizeof(u64); 2877 perf_output_put(handle, data->addr);
2914 2878
2915 if (sample_type & PERF_SAMPLE_ID) 2879 if (sample_type & PERF_SAMPLE_ID)
2916 header.size += sizeof(u64); 2880 perf_output_put(handle, data->id);
2917 2881
2918 if (sample_type & PERF_SAMPLE_STREAM_ID) 2882 if (sample_type & PERF_SAMPLE_STREAM_ID)
2919 header.size += sizeof(u64); 2883 perf_output_put(handle, data->stream_id);
2920
2921 if (sample_type & PERF_SAMPLE_CPU) {
2922 header.size += sizeof(cpu_entry);
2923 2884
2924 cpu_entry.cpu = raw_smp_processor_id(); 2885 if (sample_type & PERF_SAMPLE_CPU)
2925 cpu_entry.reserved = 0; 2886 perf_output_put(handle, data->cpu_entry);
2926 }
2927 2887
2928 if (sample_type & PERF_SAMPLE_PERIOD) 2888 if (sample_type & PERF_SAMPLE_PERIOD)
2929 header.size += sizeof(u64); 2889 perf_output_put(handle, data->period);
2930 2890
2931 if (sample_type & PERF_SAMPLE_READ) 2891 if (sample_type & PERF_SAMPLE_READ)
2932 header.size += perf_counter_read_size(counter); 2892 perf_output_read(handle, counter);
2933 2893
2934 if (sample_type & PERF_SAMPLE_CALLCHAIN) { 2894 if (sample_type & PERF_SAMPLE_CALLCHAIN) {
2935 callchain = perf_callchain(data->regs); 2895 if (data->callchain) {
2896 int size = 1;
2936 2897
2937 if (callchain) { 2898 if (data->callchain)
2938 callchain_size = (1 + callchain->nr) * sizeof(u64); 2899 size += data->callchain->nr;
2939 header.size += callchain_size; 2900
2940 } else 2901 size *= sizeof(u64);
2941 header.size += sizeof(u64); 2902
2903 perf_output_copy(handle, data->callchain, size);
2904 } else {
2905 u64 nr = 0;
2906 perf_output_put(handle, nr);
2907 }
2942 } 2908 }
2943 2909
2944 if (sample_type & PERF_SAMPLE_RAW) { 2910 if (sample_type & PERF_SAMPLE_RAW) {
2945 int size = sizeof(u32); 2911 if (data->raw) {
2912 perf_output_put(handle, data->raw->size);
2913 perf_output_copy(handle, data->raw->data,
2914 data->raw->size);
2915 } else {
2916 struct {
2917 u32 size;
2918 u32 data;
2919 } raw = {
2920 .size = sizeof(u32),
2921 .data = 0,
2922 };
2923 perf_output_put(handle, raw);
2924 }
2925 }
2926}
2946 2927
2947 if (data->raw) 2928void perf_prepare_sample(struct perf_event_header *header,
2948 size += data->raw->size; 2929 struct perf_sample_data *data,
2949 else 2930 struct perf_counter *counter,
2950 size += sizeof(u32); 2931 struct pt_regs *regs)
2932{
2933 u64 sample_type = counter->attr.sample_type;
2951 2934
2952 WARN_ON_ONCE(size & (sizeof(u64)-1)); 2935 data->type = sample_type;
2953 header.size += size;
2954 }
2955 2936
2956 ret = perf_output_begin(&handle, counter, header.size, nmi, 1); 2937 header->type = PERF_EVENT_SAMPLE;
2957 if (ret) 2938 header->size = sizeof(*header);
2958 return;
2959 2939
2960 perf_output_put(&handle, header); 2940 header->misc = 0;
2941 header->misc |= perf_misc_flags(regs);
2961 2942
2962 if (sample_type & PERF_SAMPLE_IP) 2943 if (sample_type & PERF_SAMPLE_IP) {
2963 perf_output_put(&handle, ip); 2944 data->ip = perf_instruction_pointer(regs);
2964 2945
2965 if (sample_type & PERF_SAMPLE_TID) 2946 header->size += sizeof(data->ip);
2966 perf_output_put(&handle, tid_entry); 2947 }
2967 2948
2968 if (sample_type & PERF_SAMPLE_TIME) 2949 if (sample_type & PERF_SAMPLE_TID) {
2969 perf_output_put(&handle, time); 2950 /* namespace issues */
2951 data->tid_entry.pid = perf_counter_pid(counter, current);
2952 data->tid_entry.tid = perf_counter_tid(counter, current);
2953
2954 header->size += sizeof(data->tid_entry);
2955 }
2956
2957 if (sample_type & PERF_SAMPLE_TIME) {
2958 data->time = perf_clock();
2959
2960 header->size += sizeof(data->time);
2961 }
2970 2962
2971 if (sample_type & PERF_SAMPLE_ADDR) 2963 if (sample_type & PERF_SAMPLE_ADDR)
2972 perf_output_put(&handle, data->addr); 2964 header->size += sizeof(data->addr);
2973 2965
2974 if (sample_type & PERF_SAMPLE_ID) { 2966 if (sample_type & PERF_SAMPLE_ID) {
2975 u64 id = primary_counter_id(counter); 2967 data->id = primary_counter_id(counter);
2976 2968
2977 perf_output_put(&handle, id); 2969 header->size += sizeof(data->id);
2978 } 2970 }
2979 2971
2980 if (sample_type & PERF_SAMPLE_STREAM_ID) 2972 if (sample_type & PERF_SAMPLE_STREAM_ID) {
2981 perf_output_put(&handle, counter->id); 2973 data->stream_id = counter->id;
2982 2974
2983 if (sample_type & PERF_SAMPLE_CPU) 2975 header->size += sizeof(data->stream_id);
2984 perf_output_put(&handle, cpu_entry); 2976 }
2977
2978 if (sample_type & PERF_SAMPLE_CPU) {
2979 data->cpu_entry.cpu = raw_smp_processor_id();
2980 data->cpu_entry.reserved = 0;
2981
2982 header->size += sizeof(data->cpu_entry);
2983 }
2985 2984
2986 if (sample_type & PERF_SAMPLE_PERIOD) 2985 if (sample_type & PERF_SAMPLE_PERIOD)
2987 perf_output_put(&handle, data->period); 2986 header->size += sizeof(data->period);
2988 2987
2989 if (sample_type & PERF_SAMPLE_READ) 2988 if (sample_type & PERF_SAMPLE_READ)
2990 perf_output_read(&handle, counter); 2989 header->size += perf_counter_read_size(counter);
2991 2990
2992 if (sample_type & PERF_SAMPLE_CALLCHAIN) { 2991 if (sample_type & PERF_SAMPLE_CALLCHAIN) {
2993 if (callchain) 2992 int size = 1;
2994 perf_output_copy(&handle, callchain, callchain_size); 2993
2995 else { 2994 data->callchain = perf_callchain(regs);
2996 u64 nr = 0; 2995
2997 perf_output_put(&handle, nr); 2996 if (data->callchain)
2998 } 2997 size += data->callchain->nr;
2998
2999 header->size += size * sizeof(u64);
2999 } 3000 }
3000 3001
3001 if (sample_type & PERF_SAMPLE_RAW) { 3002 if (sample_type & PERF_SAMPLE_RAW) {
3002 if (data->raw) { 3003 int size = sizeof(u32);
3003 perf_output_put(&handle, data->raw->size); 3004
3004 perf_output_copy(&handle, data->raw->data, data->raw->size); 3005 if (data->raw)
3005 } else { 3006 size += data->raw->size;
3006 struct { 3007 else
3007 u32 size; 3008 size += sizeof(u32);
3008 u32 data; 3009
3009 } raw = { 3010 WARN_ON_ONCE(size & (sizeof(u64)-1));
3010 .size = sizeof(u32), 3011 header->size += size;
3011 .data = 0,
3012 };
3013 perf_output_put(&handle, raw);
3014 }
3015 } 3012 }
3013}
3014
3015static void perf_counter_output(struct perf_counter *counter, int nmi,
3016 struct perf_sample_data *data,
3017 struct pt_regs *regs)
3018{
3019 struct perf_output_handle handle;
3020 struct perf_event_header header;
3021
3022 perf_prepare_sample(&header, data, counter, regs);
3023
3024 if (perf_output_begin(&handle, counter, header.size, nmi, 1))
3025 return;
3026
3027 perf_output_sample(&handle, &header, data, counter);
3016 3028
3017 perf_output_end(&handle); 3029 perf_output_end(&handle);
3018} 3030}
@@ -3071,6 +3083,7 @@ struct perf_task_event {
3071 u32 ppid; 3083 u32 ppid;
3072 u32 tid; 3084 u32 tid;
3073 u32 ptid; 3085 u32 ptid;
3086 u64 time;
3074 } event; 3087 } event;
3075}; 3088};
3076 3089
@@ -3078,9 +3091,12 @@ static void perf_counter_task_output(struct perf_counter *counter,
3078 struct perf_task_event *task_event) 3091 struct perf_task_event *task_event)
3079{ 3092{
3080 struct perf_output_handle handle; 3093 struct perf_output_handle handle;
3081 int size = task_event->event.header.size; 3094 int size;
3082 struct task_struct *task = task_event->task; 3095 struct task_struct *task = task_event->task;
3083 int ret = perf_output_begin(&handle, counter, size, 0, 0); 3096 int ret;
3097
3098 size = task_event->event.header.size;
3099 ret = perf_output_begin(&handle, counter, size, 0, 0);
3084 3100
3085 if (ret) 3101 if (ret)
3086 return; 3102 return;
@@ -3091,7 +3107,10 @@ static void perf_counter_task_output(struct perf_counter *counter,
3091 task_event->event.tid = perf_counter_tid(counter, task); 3107 task_event->event.tid = perf_counter_tid(counter, task);
3092 task_event->event.ptid = perf_counter_tid(counter, current); 3108 task_event->event.ptid = perf_counter_tid(counter, current);
3093 3109
3110 task_event->event.time = perf_clock();
3111
3094 perf_output_put(&handle, task_event->event); 3112 perf_output_put(&handle, task_event->event);
3113
3095 perf_output_end(&handle); 3114 perf_output_end(&handle);
3096} 3115}
3097 3116
@@ -3473,7 +3492,7 @@ static void perf_log_throttle(struct perf_counter *counter, int enable)
3473 .misc = 0, 3492 .misc = 0,
3474 .size = sizeof(throttle_event), 3493 .size = sizeof(throttle_event),
3475 }, 3494 },
3476 .time = sched_clock(), 3495 .time = perf_clock(),
3477 .id = primary_counter_id(counter), 3496 .id = primary_counter_id(counter),
3478 .stream_id = counter->id, 3497 .stream_id = counter->id,
3479 }; 3498 };
@@ -3493,14 +3512,16 @@ static void perf_log_throttle(struct perf_counter *counter, int enable)
3493 * Generic counter overflow handling, sampling. 3512 * Generic counter overflow handling, sampling.
3494 */ 3513 */
3495 3514
3496int perf_counter_overflow(struct perf_counter *counter, int nmi, 3515static int __perf_counter_overflow(struct perf_counter *counter, int nmi,
3497 struct perf_sample_data *data) 3516 int throttle, struct perf_sample_data *data,
3517 struct pt_regs *regs)
3498{ 3518{
3499 int events = atomic_read(&counter->event_limit); 3519 int events = atomic_read(&counter->event_limit);
3500 int throttle = counter->pmu->unthrottle != NULL;
3501 struct hw_perf_counter *hwc = &counter->hw; 3520 struct hw_perf_counter *hwc = &counter->hw;
3502 int ret = 0; 3521 int ret = 0;
3503 3522
3523 throttle = (throttle && counter->pmu->unthrottle != NULL);
3524
3504 if (!throttle) { 3525 if (!throttle) {
3505 hwc->interrupts++; 3526 hwc->interrupts++;
3506 } else { 3527 } else {
@@ -3523,7 +3544,7 @@ int perf_counter_overflow(struct perf_counter *counter, int nmi,
3523 } 3544 }
3524 3545
3525 if (counter->attr.freq) { 3546 if (counter->attr.freq) {
3526 u64 now = sched_clock(); 3547 u64 now = perf_clock();
3527 s64 delta = now - hwc->freq_stamp; 3548 s64 delta = now - hwc->freq_stamp;
3528 3549
3529 hwc->freq_stamp = now; 3550 hwc->freq_stamp = now;
@@ -3549,10 +3570,17 @@ int perf_counter_overflow(struct perf_counter *counter, int nmi,
3549 perf_counter_disable(counter); 3570 perf_counter_disable(counter);
3550 } 3571 }
3551 3572
3552 perf_counter_output(counter, nmi, data); 3573 perf_counter_output(counter, nmi, data, regs);
3553 return ret; 3574 return ret;
3554} 3575}
3555 3576
3577int perf_counter_overflow(struct perf_counter *counter, int nmi,
3578 struct perf_sample_data *data,
3579 struct pt_regs *regs)
3580{
3581 return __perf_counter_overflow(counter, nmi, 1, data, regs);
3582}
3583
3556/* 3584/*
3557 * Generic software counter infrastructure 3585 * Generic software counter infrastructure
3558 */ 3586 */
@@ -3588,9 +3616,11 @@ again:
3588} 3616}
3589 3617
3590static void perf_swcounter_overflow(struct perf_counter *counter, 3618static void perf_swcounter_overflow(struct perf_counter *counter,
3591 int nmi, struct perf_sample_data *data) 3619 int nmi, struct perf_sample_data *data,
3620 struct pt_regs *regs)
3592{ 3621{
3593 struct hw_perf_counter *hwc = &counter->hw; 3622 struct hw_perf_counter *hwc = &counter->hw;
3623 int throttle = 0;
3594 u64 overflow; 3624 u64 overflow;
3595 3625
3596 data->period = counter->hw.last_period; 3626 data->period = counter->hw.last_period;
@@ -3600,13 +3630,15 @@ static void perf_swcounter_overflow(struct perf_counter *counter,
3600 return; 3630 return;
3601 3631
3602 for (; overflow; overflow--) { 3632 for (; overflow; overflow--) {
3603 if (perf_counter_overflow(counter, nmi, data)) { 3633 if (__perf_counter_overflow(counter, nmi, throttle,
3634 data, regs)) {
3604 /* 3635 /*
3605 * We inhibit the overflow from happening when 3636 * We inhibit the overflow from happening when
3606 * hwc->interrupts == MAX_INTERRUPTS. 3637 * hwc->interrupts == MAX_INTERRUPTS.
3607 */ 3638 */
3608 break; 3639 break;
3609 } 3640 }
3641 throttle = 1;
3610 } 3642 }
3611} 3643}
3612 3644
@@ -3618,7 +3650,8 @@ static void perf_swcounter_unthrottle(struct perf_counter *counter)
3618} 3650}
3619 3651
3620static void perf_swcounter_add(struct perf_counter *counter, u64 nr, 3652static void perf_swcounter_add(struct perf_counter *counter, u64 nr,
3621 int nmi, struct perf_sample_data *data) 3653 int nmi, struct perf_sample_data *data,
3654 struct pt_regs *regs)
3622{ 3655{
3623 struct hw_perf_counter *hwc = &counter->hw; 3656 struct hw_perf_counter *hwc = &counter->hw;
3624 3657
@@ -3627,11 +3660,11 @@ static void perf_swcounter_add(struct perf_counter *counter, u64 nr,
3627 if (!hwc->sample_period) 3660 if (!hwc->sample_period)
3628 return; 3661 return;
3629 3662
3630 if (!data->regs) 3663 if (!regs)
3631 return; 3664 return;
3632 3665
3633 if (!atomic64_add_negative(nr, &hwc->period_left)) 3666 if (!atomic64_add_negative(nr, &hwc->period_left))
3634 perf_swcounter_overflow(counter, nmi, data); 3667 perf_swcounter_overflow(counter, nmi, data, regs);
3635} 3668}
3636 3669
3637static int perf_swcounter_is_counting(struct perf_counter *counter) 3670static int perf_swcounter_is_counting(struct perf_counter *counter)
@@ -3690,7 +3723,8 @@ static int perf_swcounter_match(struct perf_counter *counter,
3690static void perf_swcounter_ctx_event(struct perf_counter_context *ctx, 3723static void perf_swcounter_ctx_event(struct perf_counter_context *ctx,
3691 enum perf_type_id type, 3724 enum perf_type_id type,
3692 u32 event, u64 nr, int nmi, 3725 u32 event, u64 nr, int nmi,
3693 struct perf_sample_data *data) 3726 struct perf_sample_data *data,
3727 struct pt_regs *regs)
3694{ 3728{
3695 struct perf_counter *counter; 3729 struct perf_counter *counter;
3696 3730
@@ -3699,8 +3733,8 @@ static void perf_swcounter_ctx_event(struct perf_counter_context *ctx,
3699 3733
3700 rcu_read_lock(); 3734 rcu_read_lock();
3701 list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) { 3735 list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) {
3702 if (perf_swcounter_match(counter, type, event, data->regs)) 3736 if (perf_swcounter_match(counter, type, event, regs))
3703 perf_swcounter_add(counter, nr, nmi, data); 3737 perf_swcounter_add(counter, nr, nmi, data, regs);
3704 } 3738 }
3705 rcu_read_unlock(); 3739 rcu_read_unlock();
3706} 3740}
@@ -3721,7 +3755,8 @@ static int *perf_swcounter_recursion_context(struct perf_cpu_context *cpuctx)
3721 3755
3722static void do_perf_swcounter_event(enum perf_type_id type, u32 event, 3756static void do_perf_swcounter_event(enum perf_type_id type, u32 event,
3723 u64 nr, int nmi, 3757 u64 nr, int nmi,
3724 struct perf_sample_data *data) 3758 struct perf_sample_data *data,
3759 struct pt_regs *regs)
3725{ 3760{
3726 struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context); 3761 struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context);
3727 int *recursion = perf_swcounter_recursion_context(cpuctx); 3762 int *recursion = perf_swcounter_recursion_context(cpuctx);
@@ -3734,7 +3769,7 @@ static void do_perf_swcounter_event(enum perf_type_id type, u32 event,
3734 barrier(); 3769 barrier();
3735 3770
3736 perf_swcounter_ctx_event(&cpuctx->ctx, type, event, 3771 perf_swcounter_ctx_event(&cpuctx->ctx, type, event,
3737 nr, nmi, data); 3772 nr, nmi, data, regs);
3738 rcu_read_lock(); 3773 rcu_read_lock();
3739 /* 3774 /*
3740 * doesn't really matter which of the child contexts the 3775 * doesn't really matter which of the child contexts the
@@ -3742,7 +3777,7 @@ static void do_perf_swcounter_event(enum perf_type_id type, u32 event,
3742 */ 3777 */
3743 ctx = rcu_dereference(current->perf_counter_ctxp); 3778 ctx = rcu_dereference(current->perf_counter_ctxp);
3744 if (ctx) 3779 if (ctx)
3745 perf_swcounter_ctx_event(ctx, type, event, nr, nmi, data); 3780 perf_swcounter_ctx_event(ctx, type, event, nr, nmi, data, regs);
3746 rcu_read_unlock(); 3781 rcu_read_unlock();
3747 3782
3748 barrier(); 3783 barrier();
@@ -3756,11 +3791,11 @@ void __perf_swcounter_event(u32 event, u64 nr, int nmi,
3756 struct pt_regs *regs, u64 addr) 3791 struct pt_regs *regs, u64 addr)
3757{ 3792{
3758 struct perf_sample_data data = { 3793 struct perf_sample_data data = {
3759 .regs = regs,
3760 .addr = addr, 3794 .addr = addr,
3761 }; 3795 };
3762 3796
3763 do_perf_swcounter_event(PERF_TYPE_SOFTWARE, event, nr, nmi, &data); 3797 do_perf_swcounter_event(PERF_TYPE_SOFTWARE, event, nr, nmi,
3798 &data, regs);
3764} 3799}
3765 3800
3766static void perf_swcounter_read(struct perf_counter *counter) 3801static void perf_swcounter_read(struct perf_counter *counter)
@@ -3797,6 +3832,7 @@ static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer)
3797{ 3832{
3798 enum hrtimer_restart ret = HRTIMER_RESTART; 3833 enum hrtimer_restart ret = HRTIMER_RESTART;
3799 struct perf_sample_data data; 3834 struct perf_sample_data data;
3835 struct pt_regs *regs;
3800 struct perf_counter *counter; 3836 struct perf_counter *counter;
3801 u64 period; 3837 u64 period;
3802 3838
@@ -3804,17 +3840,17 @@ static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer)
3804 counter->pmu->read(counter); 3840 counter->pmu->read(counter);
3805 3841
3806 data.addr = 0; 3842 data.addr = 0;
3807 data.regs = get_irq_regs(); 3843 regs = get_irq_regs();
3808 /* 3844 /*
3809 * In case we exclude kernel IPs or are somehow not in interrupt 3845 * In case we exclude kernel IPs or are somehow not in interrupt
3810 * context, provide the next best thing, the user IP. 3846 * context, provide the next best thing, the user IP.
3811 */ 3847 */
3812 if ((counter->attr.exclude_kernel || !data.regs) && 3848 if ((counter->attr.exclude_kernel || !regs) &&
3813 !counter->attr.exclude_user) 3849 !counter->attr.exclude_user)
3814 data.regs = task_pt_regs(current); 3850 regs = task_pt_regs(current);
3815 3851
3816 if (data.regs) { 3852 if (regs) {
3817 if (perf_counter_overflow(counter, 0, &data)) 3853 if (perf_counter_overflow(counter, 0, &data, regs))
3818 ret = HRTIMER_NORESTART; 3854 ret = HRTIMER_NORESTART;
3819 } 3855 }
3820 3856
@@ -3950,15 +3986,17 @@ void perf_tpcounter_event(int event_id, u64 addr, u64 count, void *record,
3950 }; 3986 };
3951 3987
3952 struct perf_sample_data data = { 3988 struct perf_sample_data data = {
3953 .regs = get_irq_regs(),
3954 .addr = addr, 3989 .addr = addr,
3955 .raw = &raw, 3990 .raw = &raw,
3956 }; 3991 };
3957 3992
3958 if (!data.regs) 3993 struct pt_regs *regs = get_irq_regs();
3959 data.regs = task_pt_regs(current); 3994
3995 if (!regs)
3996 regs = task_pt_regs(current);
3960 3997
3961 do_perf_swcounter_event(PERF_TYPE_TRACEPOINT, event_id, count, 1, &data); 3998 do_perf_swcounter_event(PERF_TYPE_TRACEPOINT, event_id, count, 1,
3999 &data, regs);
3962} 4000}
3963EXPORT_SYMBOL_GPL(perf_tpcounter_event); 4001EXPORT_SYMBOL_GPL(perf_tpcounter_event);
3964 4002
@@ -4170,8 +4208,8 @@ done:
4170static int perf_copy_attr(struct perf_counter_attr __user *uattr, 4208static int perf_copy_attr(struct perf_counter_attr __user *uattr,
4171 struct perf_counter_attr *attr) 4209 struct perf_counter_attr *attr)
4172{ 4210{
4173 int ret;
4174 u32 size; 4211 u32 size;
4212 int ret;
4175 4213
4176 if (!access_ok(VERIFY_WRITE, uattr, PERF_ATTR_SIZE_VER0)) 4214 if (!access_ok(VERIFY_WRITE, uattr, PERF_ATTR_SIZE_VER0))
4177 return -EFAULT; 4215 return -EFAULT;
@@ -4196,19 +4234,19 @@ static int perf_copy_attr(struct perf_counter_attr __user *uattr,
4196 4234
4197 /* 4235 /*
4198 * If we're handed a bigger struct than we know of, 4236 * If we're handed a bigger struct than we know of,
4199 * ensure all the unknown bits are 0. 4237 * ensure all the unknown bits are 0 - i.e. new
4238 * user-space does not rely on any kernel feature
4239 * extensions we dont know about yet.
4200 */ 4240 */
4201 if (size > sizeof(*attr)) { 4241 if (size > sizeof(*attr)) {
4202 unsigned long val; 4242 unsigned char __user *addr;
4203 unsigned long __user *addr; 4243 unsigned char __user *end;
4204 unsigned long __user *end; 4244 unsigned char val;
4205 4245
4206 addr = PTR_ALIGN((void __user *)uattr + sizeof(*attr), 4246 addr = (void __user *)uattr + sizeof(*attr);
4207 sizeof(unsigned long)); 4247 end = (void __user *)uattr + size;
4208 end = PTR_ALIGN((void __user *)uattr + size,
4209 sizeof(unsigned long));
4210 4248
4211 for (; addr < end; addr += sizeof(unsigned long)) { 4249 for (; addr < end; addr++) {
4212 ret = get_user(val, addr); 4250 ret = get_user(val, addr);
4213 if (ret) 4251 if (ret)
4214 return ret; 4252 return ret;
diff --git a/kernel/power/console.c b/kernel/power/console.c
index a3961b205de7..5187136fe1de 100644
--- a/kernel/power/console.c
+++ b/kernel/power/console.c
@@ -14,56 +14,13 @@
14#define SUSPEND_CONSOLE (MAX_NR_CONSOLES-1) 14#define SUSPEND_CONSOLE (MAX_NR_CONSOLES-1)
15 15
16static int orig_fgconsole, orig_kmsg; 16static int orig_fgconsole, orig_kmsg;
17static int disable_vt_switch;
18
19/*
20 * Normally during a suspend, we allocate a new console and switch to it.
21 * When we resume, we switch back to the original console. This switch
22 * can be slow, so on systems where the framebuffer can handle restoration
23 * of video registers anyways, there's little point in doing the console
24 * switch. This function allows you to disable it by passing it '0'.
25 */
26void pm_set_vt_switch(int do_switch)
27{
28 acquire_console_sem();
29 disable_vt_switch = !do_switch;
30 release_console_sem();
31}
32EXPORT_SYMBOL(pm_set_vt_switch);
33 17
34int pm_prepare_console(void) 18int pm_prepare_console(void)
35{ 19{
36 acquire_console_sem(); 20 orig_fgconsole = vt_move_to_console(SUSPEND_CONSOLE, 1);
37 21 if (orig_fgconsole < 0)
38 if (disable_vt_switch) {
39 release_console_sem();
40 return 0;
41 }
42
43 orig_fgconsole = fg_console;
44
45 if (vc_allocate(SUSPEND_CONSOLE)) {
46 /* we can't have a free VC for now. Too bad,
47 * we don't want to mess the screen for now. */
48 release_console_sem();
49 return 1; 22 return 1;
50 }
51 23
52 if (set_console(SUSPEND_CONSOLE)) {
53 /*
54 * We're unable to switch to the SUSPEND_CONSOLE.
55 * Let the calling function know so it can decide
56 * what to do.
57 */
58 release_console_sem();
59 return 1;
60 }
61 release_console_sem();
62
63 if (vt_waitactive(SUSPEND_CONSOLE)) {
64 pr_debug("Suspend: Can't switch VCs.");
65 return 1;
66 }
67 orig_kmsg = kmsg_redirect; 24 orig_kmsg = kmsg_redirect;
68 kmsg_redirect = SUSPEND_CONSOLE; 25 kmsg_redirect = SUSPEND_CONSOLE;
69 return 0; 26 return 0;
@@ -71,19 +28,9 @@ int pm_prepare_console(void)
71 28
72void pm_restore_console(void) 29void pm_restore_console(void)
73{ 30{
74 acquire_console_sem(); 31 if (orig_fgconsole >= 0) {
75 if (disable_vt_switch) { 32 vt_move_to_console(orig_fgconsole, 0);
76 release_console_sem(); 33 kmsg_redirect = orig_kmsg;
77 return;
78 }
79 set_console(orig_fgconsole);
80 release_console_sem();
81
82 if (vt_waitactive(orig_fgconsole)) {
83 pr_debug("Resume: Can't switch VCs.");
84 return;
85 } 34 }
86
87 kmsg_redirect = orig_kmsg;
88} 35}
89#endif 36#endif
diff --git a/kernel/sched_clock.c b/kernel/sched_clock.c
index e1d16c9a7680..ac2e1dc708bd 100644
--- a/kernel/sched_clock.c
+++ b/kernel/sched_clock.c
@@ -48,13 +48,6 @@ static __read_mostly int sched_clock_running;
48__read_mostly int sched_clock_stable; 48__read_mostly int sched_clock_stable;
49 49
50struct sched_clock_data { 50struct sched_clock_data {
51 /*
52 * Raw spinlock - this is a special case: this might be called
53 * from within instrumentation code so we dont want to do any
54 * instrumentation ourselves.
55 */
56 raw_spinlock_t lock;
57
58 u64 tick_raw; 51 u64 tick_raw;
59 u64 tick_gtod; 52 u64 tick_gtod;
60 u64 clock; 53 u64 clock;
@@ -80,7 +73,6 @@ void sched_clock_init(void)
80 for_each_possible_cpu(cpu) { 73 for_each_possible_cpu(cpu) {
81 struct sched_clock_data *scd = cpu_sdc(cpu); 74 struct sched_clock_data *scd = cpu_sdc(cpu);
82 75
83 scd->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
84 scd->tick_raw = 0; 76 scd->tick_raw = 0;
85 scd->tick_gtod = ktime_now; 77 scd->tick_gtod = ktime_now;
86 scd->clock = ktime_now; 78 scd->clock = ktime_now;
@@ -109,14 +101,19 @@ static inline u64 wrap_max(u64 x, u64 y)
109 * - filter out backward motion 101 * - filter out backward motion
110 * - use the GTOD tick value to create a window to filter crazy TSC values 102 * - use the GTOD tick value to create a window to filter crazy TSC values
111 */ 103 */
112static u64 __update_sched_clock(struct sched_clock_data *scd, u64 now) 104static u64 sched_clock_local(struct sched_clock_data *scd)
113{ 105{
114 s64 delta = now - scd->tick_raw; 106 u64 now, clock, old_clock, min_clock, max_clock;
115 u64 clock, min_clock, max_clock; 107 s64 delta;
116 108
109again:
110 now = sched_clock();
111 delta = now - scd->tick_raw;
117 if (unlikely(delta < 0)) 112 if (unlikely(delta < 0))
118 delta = 0; 113 delta = 0;
119 114
115 old_clock = scd->clock;
116
120 /* 117 /*
121 * scd->clock = clamp(scd->tick_gtod + delta, 118 * scd->clock = clamp(scd->tick_gtod + delta,
122 * max(scd->tick_gtod, scd->clock), 119 * max(scd->tick_gtod, scd->clock),
@@ -124,84 +121,73 @@ static u64 __update_sched_clock(struct sched_clock_data *scd, u64 now)
124 */ 121 */
125 122
126 clock = scd->tick_gtod + delta; 123 clock = scd->tick_gtod + delta;
127 min_clock = wrap_max(scd->tick_gtod, scd->clock); 124 min_clock = wrap_max(scd->tick_gtod, old_clock);
128 max_clock = wrap_max(scd->clock, scd->tick_gtod + TICK_NSEC); 125 max_clock = wrap_max(old_clock, scd->tick_gtod + TICK_NSEC);
129 126
130 clock = wrap_max(clock, min_clock); 127 clock = wrap_max(clock, min_clock);
131 clock = wrap_min(clock, max_clock); 128 clock = wrap_min(clock, max_clock);
132 129
133 scd->clock = clock; 130 if (cmpxchg(&scd->clock, old_clock, clock) != old_clock)
131 goto again;
134 132
135 return scd->clock; 133 return clock;
136} 134}
137 135
138static void lock_double_clock(struct sched_clock_data *data1, 136static u64 sched_clock_remote(struct sched_clock_data *scd)
139 struct sched_clock_data *data2)
140{ 137{
141 if (data1 < data2) { 138 struct sched_clock_data *my_scd = this_scd();
142 __raw_spin_lock(&data1->lock); 139 u64 this_clock, remote_clock;
143 __raw_spin_lock(&data2->lock); 140 u64 *ptr, old_val, val;
141
142 sched_clock_local(my_scd);
143again:
144 this_clock = my_scd->clock;
145 remote_clock = scd->clock;
146
147 /*
148 * Use the opportunity that we have both locks
149 * taken to couple the two clocks: we take the
150 * larger time as the latest time for both
151 * runqueues. (this creates monotonic movement)
152 */
153 if (likely((s64)(remote_clock - this_clock) < 0)) {
154 ptr = &scd->clock;
155 old_val = remote_clock;
156 val = this_clock;
144 } else { 157 } else {
145 __raw_spin_lock(&data2->lock); 158 /*
146 __raw_spin_lock(&data1->lock); 159 * Should be rare, but possible:
160 */
161 ptr = &my_scd->clock;
162 old_val = this_clock;
163 val = remote_clock;
147 } 164 }
165
166 if (cmpxchg(ptr, old_val, val) != old_val)
167 goto again;
168
169 return val;
148} 170}
149 171
150u64 sched_clock_cpu(int cpu) 172u64 sched_clock_cpu(int cpu)
151{ 173{
152 u64 now, clock, this_clock, remote_clock;
153 struct sched_clock_data *scd; 174 struct sched_clock_data *scd;
175 u64 clock;
176
177 WARN_ON_ONCE(!irqs_disabled());
154 178
155 if (sched_clock_stable) 179 if (sched_clock_stable)
156 return sched_clock(); 180 return sched_clock();
157 181
158 scd = cpu_sdc(cpu);
159
160 /*
161 * Normally this is not called in NMI context - but if it is,
162 * trying to do any locking here is totally lethal.
163 */
164 if (unlikely(in_nmi()))
165 return scd->clock;
166
167 if (unlikely(!sched_clock_running)) 182 if (unlikely(!sched_clock_running))
168 return 0ull; 183 return 0ull;
169 184
170 WARN_ON_ONCE(!irqs_disabled()); 185 scd = cpu_sdc(cpu);
171 now = sched_clock();
172
173 if (cpu != raw_smp_processor_id()) {
174 struct sched_clock_data *my_scd = this_scd();
175
176 lock_double_clock(scd, my_scd);
177
178 this_clock = __update_sched_clock(my_scd, now);
179 remote_clock = scd->clock;
180
181 /*
182 * Use the opportunity that we have both locks
183 * taken to couple the two clocks: we take the
184 * larger time as the latest time for both
185 * runqueues. (this creates monotonic movement)
186 */
187 if (likely((s64)(remote_clock - this_clock) < 0)) {
188 clock = this_clock;
189 scd->clock = clock;
190 } else {
191 /*
192 * Should be rare, but possible:
193 */
194 clock = remote_clock;
195 my_scd->clock = remote_clock;
196 }
197
198 __raw_spin_unlock(&my_scd->lock);
199 } else {
200 __raw_spin_lock(&scd->lock);
201 clock = __update_sched_clock(scd, now);
202 }
203 186
204 __raw_spin_unlock(&scd->lock); 187 if (cpu != smp_processor_id())
188 clock = sched_clock_remote(scd);
189 else
190 clock = sched_clock_local(scd);
205 191
206 return clock; 192 return clock;
207} 193}
@@ -223,11 +209,9 @@ void sched_clock_tick(void)
223 now_gtod = ktime_to_ns(ktime_get()); 209 now_gtod = ktime_to_ns(ktime_get());
224 now = sched_clock(); 210 now = sched_clock();
225 211
226 __raw_spin_lock(&scd->lock);
227 scd->tick_raw = now; 212 scd->tick_raw = now;
228 scd->tick_gtod = now_gtod; 213 scd->tick_gtod = now_gtod;
229 __update_sched_clock(scd, now); 214 sched_clock_local(scd);
230 __raw_spin_unlock(&scd->lock);
231} 215}
232 216
233/* 217/*
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 10d218ab69f2..990b188803ce 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -513,6 +513,7 @@ static void update_curr(struct cfs_rq *cfs_rq)
513 if (entity_is_task(curr)) { 513 if (entity_is_task(curr)) {
514 struct task_struct *curtask = task_of(curr); 514 struct task_struct *curtask = task_of(curr);
515 515
516 trace_sched_stat_runtime(curtask, delta_exec, curr->vruntime);
516 cpuacct_charge(curtask, delta_exec); 517 cpuacct_charge(curtask, delta_exec);
517 account_group_exec_runtime(curtask, delta_exec); 518 account_group_exec_runtime(curtask, delta_exec);
518 } 519 }
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 844164dca90a..26f03ac07c2b 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -42,7 +42,6 @@ obj-$(CONFIG_BOOT_TRACER) += trace_boot.o
42obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += trace_functions_graph.o 42obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += trace_functions_graph.o
43obj-$(CONFIG_TRACE_BRANCH_PROFILING) += trace_branch.o 43obj-$(CONFIG_TRACE_BRANCH_PROFILING) += trace_branch.o
44obj-$(CONFIG_HW_BRANCH_TRACER) += trace_hw_branches.o 44obj-$(CONFIG_HW_BRANCH_TRACER) += trace_hw_branches.o
45obj-$(CONFIG_POWER_TRACER) += trace_power.o
46obj-$(CONFIG_KMEMTRACE) += kmemtrace.o 45obj-$(CONFIG_KMEMTRACE) += kmemtrace.o
47obj-$(CONFIG_WORKQUEUE_TRACER) += trace_workqueue.o 46obj-$(CONFIG_WORKQUEUE_TRACER) += trace_workqueue.o
48obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o 47obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o
@@ -54,5 +53,6 @@ obj-$(CONFIG_EVENT_TRACING) += trace_export.o
54obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o 53obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o
55obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o 54obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o
56obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o 55obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
56obj-$(CONFIG_EVENT_TRACING) += power-traces.o
57 57
58libftrace-y := ftrace.o 58libftrace-y := ftrace.o
diff --git a/kernel/trace/power-traces.c b/kernel/trace/power-traces.c
new file mode 100644
index 000000000000..e06c6e3d56a3
--- /dev/null
+++ b/kernel/trace/power-traces.c
@@ -0,0 +1,20 @@
1/*
2 * Power trace points
3 *
4 * Copyright (C) 2009 Arjan van de Ven <arjan@linux.intel.com>
5 */
6
7#include <linux/string.h>
8#include <linux/types.h>
9#include <linux/workqueue.h>
10#include <linux/sched.h>
11#include <linux/module.h>
12#include <linux/slab.h>
13
14#define CREATE_TRACE_POINTS
15#include <trace/events/power.h>
16
17EXPORT_TRACEPOINT_SYMBOL_GPL(power_start);
18EXPORT_TRACEPOINT_SYMBOL_GPL(power_end);
19EXPORT_TRACEPOINT_SYMBOL_GPL(power_frequency);
20
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 86bcff94791a..405cb850b75d 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -11,7 +11,6 @@
11#include <linux/ftrace.h> 11#include <linux/ftrace.h>
12#include <trace/boot.h> 12#include <trace/boot.h>
13#include <linux/kmemtrace.h> 13#include <linux/kmemtrace.h>
14#include <trace/power.h>
15 14
16#include <linux/trace_seq.h> 15#include <linux/trace_seq.h>
17#include <linux/ftrace_event.h> 16#include <linux/ftrace_event.h>
@@ -37,7 +36,6 @@ enum trace_type {
37 TRACE_HW_BRANCHES, 36 TRACE_HW_BRANCHES,
38 TRACE_KMEM_ALLOC, 37 TRACE_KMEM_ALLOC,
39 TRACE_KMEM_FREE, 38 TRACE_KMEM_FREE,
40 TRACE_POWER,
41 TRACE_BLK, 39 TRACE_BLK,
42 40
43 __TRACE_LAST_TYPE, 41 __TRACE_LAST_TYPE,
@@ -207,7 +205,6 @@ extern void __ftrace_bad_type(void);
207 IF_ASSIGN(var, ent, struct ftrace_graph_ret_entry, \ 205 IF_ASSIGN(var, ent, struct ftrace_graph_ret_entry, \
208 TRACE_GRAPH_RET); \ 206 TRACE_GRAPH_RET); \
209 IF_ASSIGN(var, ent, struct hw_branch_entry, TRACE_HW_BRANCHES);\ 207 IF_ASSIGN(var, ent, struct hw_branch_entry, TRACE_HW_BRANCHES);\
210 IF_ASSIGN(var, ent, struct trace_power, TRACE_POWER); \
211 IF_ASSIGN(var, ent, struct kmemtrace_alloc_entry, \ 208 IF_ASSIGN(var, ent, struct kmemtrace_alloc_entry, \
212 TRACE_KMEM_ALLOC); \ 209 TRACE_KMEM_ALLOC); \
213 IF_ASSIGN(var, ent, struct kmemtrace_free_entry, \ 210 IF_ASSIGN(var, ent, struct kmemtrace_free_entry, \
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
index a431748ddd6e..ead3d724599d 100644
--- a/kernel/trace/trace_entries.h
+++ b/kernel/trace/trace_entries.h
@@ -330,23 +330,6 @@ FTRACE_ENTRY(hw_branch, hw_branch_entry,
330 F_printk("from: %llx to: %llx", __entry->from, __entry->to) 330 F_printk("from: %llx to: %llx", __entry->from, __entry->to)
331); 331);
332 332
333FTRACE_ENTRY(power, trace_power,
334
335 TRACE_POWER,
336
337 F_STRUCT(
338 __field_struct( struct power_trace, state_data )
339 __field_desc( s64, state_data, stamp )
340 __field_desc( s64, state_data, end )
341 __field_desc( int, state_data, type )
342 __field_desc( int, state_data, state )
343 ),
344
345 F_printk("%llx->%llx type:%u state:%u",
346 __entry->stamp, __entry->end,
347 __entry->type, __entry->state)
348);
349
350FTRACE_ENTRY(kmem_alloc, kmemtrace_alloc_entry, 333FTRACE_ENTRY(kmem_alloc, kmemtrace_alloc_entry,
351 334
352 TRACE_KMEM_ALLOC, 335 TRACE_KMEM_ALLOC,
diff --git a/kernel/trace/trace_power.c b/kernel/trace/trace_power.c
deleted file mode 100644
index fe1a00f1445a..000000000000
--- a/kernel/trace/trace_power.c
+++ /dev/null
@@ -1,218 +0,0 @@
1/*
2 * ring buffer based C-state tracer
3 *
4 * Arjan van de Ven <arjan@linux.intel.com>
5 * Copyright (C) 2008 Intel Corporation
6 *
7 * Much is borrowed from trace_boot.c which is
8 * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com>
9 *
10 */
11
12#include <linux/init.h>
13#include <linux/debugfs.h>
14#include <trace/power.h>
15#include <linux/kallsyms.h>
16#include <linux/module.h>
17
18#include "trace.h"
19#include "trace_output.h"
20
21static struct trace_array *power_trace;
22static int __read_mostly trace_power_enabled;
23
24static void probe_power_start(struct power_trace *it, unsigned int type,
25 unsigned int level)
26{
27 if (!trace_power_enabled)
28 return;
29
30 memset(it, 0, sizeof(struct power_trace));
31 it->state = level;
32 it->type = type;
33 it->stamp = ktime_get();
34}
35
36
37static void probe_power_end(struct power_trace *it)
38{
39 struct ftrace_event_call *call = &event_power;
40 struct ring_buffer_event *event;
41 struct ring_buffer *buffer;
42 struct trace_power *entry;
43 struct trace_array_cpu *data;
44 struct trace_array *tr = power_trace;
45
46 if (!trace_power_enabled)
47 return;
48
49 buffer = tr->buffer;
50
51 preempt_disable();
52 it->end = ktime_get();
53 data = tr->data[smp_processor_id()];
54
55 event = trace_buffer_lock_reserve(buffer, TRACE_POWER,
56 sizeof(*entry), 0, 0);
57 if (!event)
58 goto out;
59 entry = ring_buffer_event_data(event);
60 entry->state_data = *it;
61 if (!filter_check_discard(call, entry, buffer, event))
62 trace_buffer_unlock_commit(buffer, event, 0, 0);
63 out:
64 preempt_enable();
65}
66
67static void probe_power_mark(struct power_trace *it, unsigned int type,
68 unsigned int level)
69{
70 struct ftrace_event_call *call = &event_power;
71 struct ring_buffer_event *event;
72 struct ring_buffer *buffer;
73 struct trace_power *entry;
74 struct trace_array_cpu *data;
75 struct trace_array *tr = power_trace;
76
77 if (!trace_power_enabled)
78 return;
79
80 buffer = tr->buffer;
81
82 memset(it, 0, sizeof(struct power_trace));
83 it->state = level;
84 it->type = type;
85 it->stamp = ktime_get();
86 preempt_disable();
87 it->end = it->stamp;
88 data = tr->data[smp_processor_id()];
89
90 event = trace_buffer_lock_reserve(buffer, TRACE_POWER,
91 sizeof(*entry), 0, 0);
92 if (!event)
93 goto out;
94 entry = ring_buffer_event_data(event);
95 entry->state_data = *it;
96 if (!filter_check_discard(call, entry, buffer, event))
97 trace_buffer_unlock_commit(buffer, event, 0, 0);
98 out:
99 preempt_enable();
100}
101
102static int tracing_power_register(void)
103{
104 int ret;
105
106 ret = register_trace_power_start(probe_power_start);
107 if (ret) {
108 pr_info("power trace: Couldn't activate tracepoint"
109 " probe to trace_power_start\n");
110 return ret;
111 }
112 ret = register_trace_power_end(probe_power_end);
113 if (ret) {
114 pr_info("power trace: Couldn't activate tracepoint"
115 " probe to trace_power_end\n");
116 goto fail_start;
117 }
118 ret = register_trace_power_mark(probe_power_mark);
119 if (ret) {
120 pr_info("power trace: Couldn't activate tracepoint"
121 " probe to trace_power_mark\n");
122 goto fail_end;
123 }
124 return ret;
125fail_end:
126 unregister_trace_power_end(probe_power_end);
127fail_start:
128 unregister_trace_power_start(probe_power_start);
129 return ret;
130}
131
132static void start_power_trace(struct trace_array *tr)
133{
134 trace_power_enabled = 1;
135}
136
137static void stop_power_trace(struct trace_array *tr)
138{
139 trace_power_enabled = 0;
140}
141
142static void power_trace_reset(struct trace_array *tr)
143{
144 trace_power_enabled = 0;
145 unregister_trace_power_start(probe_power_start);
146 unregister_trace_power_end(probe_power_end);
147 unregister_trace_power_mark(probe_power_mark);
148}
149
150
151static int power_trace_init(struct trace_array *tr)
152{
153 power_trace = tr;
154
155 trace_power_enabled = 1;
156 tracing_power_register();
157
158 tracing_reset_online_cpus(tr);
159 return 0;
160}
161
162static enum print_line_t power_print_line(struct trace_iterator *iter)
163{
164 int ret = 0;
165 struct trace_entry *entry = iter->ent;
166 struct trace_power *field ;
167 struct power_trace *it;
168 struct trace_seq *s = &iter->seq;
169 struct timespec stamp;
170 struct timespec duration;
171
172 trace_assign_type(field, entry);
173 it = &field->state_data;
174 stamp = ktime_to_timespec(it->stamp);
175 duration = ktime_to_timespec(ktime_sub(it->end, it->stamp));
176
177 if (entry->type == TRACE_POWER) {
178 if (it->type == POWER_CSTATE)
179 ret = trace_seq_printf(s, "[%5ld.%09ld] CSTATE: Going to C%i on cpu %i for %ld.%09ld\n",
180 stamp.tv_sec,
181 stamp.tv_nsec,
182 it->state, iter->cpu,
183 duration.tv_sec,
184 duration.tv_nsec);
185 if (it->type == POWER_PSTATE)
186 ret = trace_seq_printf(s, "[%5ld.%09ld] PSTATE: Going to P%i on cpu %i\n",
187 stamp.tv_sec,
188 stamp.tv_nsec,
189 it->state, iter->cpu);
190 if (!ret)
191 return TRACE_TYPE_PARTIAL_LINE;
192 return TRACE_TYPE_HANDLED;
193 }
194 return TRACE_TYPE_UNHANDLED;
195}
196
197static void power_print_header(struct seq_file *s)
198{
199 seq_puts(s, "# TIMESTAMP STATE EVENT\n");
200 seq_puts(s, "# | | |\n");
201}
202
203static struct tracer power_tracer __read_mostly =
204{
205 .name = "power",
206 .init = power_trace_init,
207 .start = start_power_trace,
208 .stop = stop_power_trace,
209 .reset = power_trace_reset,
210 .print_line = power_print_line,
211 .print_header = power_print_header,
212};
213
214static int init_power_trace(void)
215{
216 return register_tracer(&power_tracer);
217}
218device_initcall(init_power_trace);