diff options
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/events/core.c | 89 | ||||
| -rw-r--r-- | kernel/events/internal.h | 3 | ||||
| -rw-r--r-- | kernel/events/ring_buffer.c | 3 | ||||
| -rw-r--r-- | kernel/irq/manage.c | 5 | ||||
| -rw-r--r-- | kernel/jump_label.c | 3 | ||||
| -rw-r--r-- | kernel/sched.c | 17 | ||||
| -rw-r--r-- | kernel/sched_fair.c | 159 | ||||
| -rw-r--r-- | kernel/sched_features.h | 1 | ||||
| -rw-r--r-- | kernel/sched_rt.c | 3 | ||||
| -rw-r--r-- | kernel/time/clockevents.c | 1 | ||||
| -rw-r--r-- | kernel/time/clocksource.c | 4 | ||||
| -rw-r--r-- | kernel/time/tick-broadcast.c | 2 | ||||
| -rw-r--r-- | kernel/trace/ftrace.c | 5 | ||||
| -rw-r--r-- | kernel/trace/trace_events.c | 1 | ||||
| -rw-r--r-- | kernel/trace/trace_events_filter.c | 13 |
15 files changed, 260 insertions, 49 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c index 0e8457da6f95..600c1629b64d 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
| @@ -185,6 +185,9 @@ static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx, | |||
| 185 | static void update_context_time(struct perf_event_context *ctx); | 185 | static void update_context_time(struct perf_event_context *ctx); |
| 186 | static u64 perf_event_time(struct perf_event *event); | 186 | static u64 perf_event_time(struct perf_event *event); |
| 187 | 187 | ||
| 188 | static void ring_buffer_attach(struct perf_event *event, | ||
| 189 | struct ring_buffer *rb); | ||
| 190 | |||
| 188 | void __weak perf_event_print_debug(void) { } | 191 | void __weak perf_event_print_debug(void) { } |
| 189 | 192 | ||
| 190 | extern __weak const char *perf_pmu_name(void) | 193 | extern __weak const char *perf_pmu_name(void) |
| @@ -2173,7 +2176,8 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx, | |||
| 2173 | 2176 | ||
| 2174 | perf_event_sched_in(cpuctx, ctx, task); | 2177 | perf_event_sched_in(cpuctx, ctx, task); |
| 2175 | 2178 | ||
| 2176 | cpuctx->task_ctx = ctx; | 2179 | if (ctx->nr_events) |
| 2180 | cpuctx->task_ctx = ctx; | ||
| 2177 | 2181 | ||
| 2178 | perf_pmu_enable(ctx->pmu); | 2182 | perf_pmu_enable(ctx->pmu); |
| 2179 | perf_ctx_unlock(cpuctx, ctx); | 2183 | perf_ctx_unlock(cpuctx, ctx); |
| @@ -3190,12 +3194,33 @@ static unsigned int perf_poll(struct file *file, poll_table *wait) | |||
| 3190 | struct ring_buffer *rb; | 3194 | struct ring_buffer *rb; |
| 3191 | unsigned int events = POLL_HUP; | 3195 | unsigned int events = POLL_HUP; |
| 3192 | 3196 | ||
| 3197 | /* | ||
| 3198 | * Race between perf_event_set_output() and perf_poll(): perf_poll() | ||
| 3199 | * grabs the rb reference but perf_event_set_output() overrides it. | ||
| 3200 | * Here is the timeline for two threads T1, T2: | ||
| 3201 | * t0: T1, rb = rcu_dereference(event->rb) | ||
| 3202 | * t1: T2, old_rb = event->rb | ||
| 3203 | * t2: T2, event->rb = new rb | ||
| 3204 | * t3: T2, ring_buffer_detach(old_rb) | ||
| 3205 | * t4: T1, ring_buffer_attach(rb1) | ||
| 3206 | * t5: T1, poll_wait(event->waitq) | ||
| 3207 | * | ||
| 3208 | * To avoid this problem, we grab mmap_mutex in perf_poll() | ||
| 3209 | * thereby ensuring that the assignment of the new ring buffer | ||
| 3210 | * and the detachment of the old buffer appear atomic to perf_poll() | ||
| 3211 | */ | ||
| 3212 | mutex_lock(&event->mmap_mutex); | ||
| 3213 | |||
| 3193 | rcu_read_lock(); | 3214 | rcu_read_lock(); |
| 3194 | rb = rcu_dereference(event->rb); | 3215 | rb = rcu_dereference(event->rb); |
| 3195 | if (rb) | 3216 | if (rb) { |
| 3217 | ring_buffer_attach(event, rb); | ||
| 3196 | events = atomic_xchg(&rb->poll, 0); | 3218 | events = atomic_xchg(&rb->poll, 0); |
| 3219 | } | ||
| 3197 | rcu_read_unlock(); | 3220 | rcu_read_unlock(); |
| 3198 | 3221 | ||
| 3222 | mutex_unlock(&event->mmap_mutex); | ||
| 3223 | |||
| 3199 | poll_wait(file, &event->waitq, wait); | 3224 | poll_wait(file, &event->waitq, wait); |
| 3200 | 3225 | ||
| 3201 | return events; | 3226 | return events; |
| @@ -3496,6 +3521,49 @@ unlock: | |||
| 3496 | return ret; | 3521 | return ret; |
| 3497 | } | 3522 | } |
| 3498 | 3523 | ||
| 3524 | static void ring_buffer_attach(struct perf_event *event, | ||
| 3525 | struct ring_buffer *rb) | ||
| 3526 | { | ||
| 3527 | unsigned long flags; | ||
| 3528 | |||
| 3529 | if (!list_empty(&event->rb_entry)) | ||
| 3530 | return; | ||
| 3531 | |||
| 3532 | spin_lock_irqsave(&rb->event_lock, flags); | ||
| 3533 | if (!list_empty(&event->rb_entry)) | ||
| 3534 | goto unlock; | ||
| 3535 | |||
| 3536 | list_add(&event->rb_entry, &rb->event_list); | ||
| 3537 | unlock: | ||
| 3538 | spin_unlock_irqrestore(&rb->event_lock, flags); | ||
| 3539 | } | ||
| 3540 | |||
| 3541 | static void ring_buffer_detach(struct perf_event *event, | ||
| 3542 | struct ring_buffer *rb) | ||
| 3543 | { | ||
| 3544 | unsigned long flags; | ||
| 3545 | |||
| 3546 | if (list_empty(&event->rb_entry)) | ||
| 3547 | return; | ||
| 3548 | |||
| 3549 | spin_lock_irqsave(&rb->event_lock, flags); | ||
| 3550 | list_del_init(&event->rb_entry); | ||
| 3551 | wake_up_all(&event->waitq); | ||
| 3552 | spin_unlock_irqrestore(&rb->event_lock, flags); | ||
| 3553 | } | ||
| 3554 | |||
| 3555 | static void ring_buffer_wakeup(struct perf_event *event) | ||
| 3556 | { | ||
| 3557 | struct ring_buffer *rb; | ||
| 3558 | |||
| 3559 | rcu_read_lock(); | ||
| 3560 | rb = rcu_dereference(event->rb); | ||
| 3561 | list_for_each_entry_rcu(event, &rb->event_list, rb_entry) { | ||
| 3562 | wake_up_all(&event->waitq); | ||
| 3563 | } | ||
| 3564 | rcu_read_unlock(); | ||
| 3565 | } | ||
| 3566 | |||
| 3499 | static void rb_free_rcu(struct rcu_head *rcu_head) | 3567 | static void rb_free_rcu(struct rcu_head *rcu_head) |
| 3500 | { | 3568 | { |
| 3501 | struct ring_buffer *rb; | 3569 | struct ring_buffer *rb; |
| @@ -3521,9 +3589,19 @@ static struct ring_buffer *ring_buffer_get(struct perf_event *event) | |||
| 3521 | 3589 | ||
| 3522 | static void ring_buffer_put(struct ring_buffer *rb) | 3590 | static void ring_buffer_put(struct ring_buffer *rb) |
| 3523 | { | 3591 | { |
| 3592 | struct perf_event *event, *n; | ||
| 3593 | unsigned long flags; | ||
| 3594 | |||
| 3524 | if (!atomic_dec_and_test(&rb->refcount)) | 3595 | if (!atomic_dec_and_test(&rb->refcount)) |
| 3525 | return; | 3596 | return; |
| 3526 | 3597 | ||
| 3598 | spin_lock_irqsave(&rb->event_lock, flags); | ||
| 3599 | list_for_each_entry_safe(event, n, &rb->event_list, rb_entry) { | ||
| 3600 | list_del_init(&event->rb_entry); | ||
| 3601 | wake_up_all(&event->waitq); | ||
| 3602 | } | ||
| 3603 | spin_unlock_irqrestore(&rb->event_lock, flags); | ||
| 3604 | |||
| 3527 | call_rcu(&rb->rcu_head, rb_free_rcu); | 3605 | call_rcu(&rb->rcu_head, rb_free_rcu); |
| 3528 | } | 3606 | } |
| 3529 | 3607 | ||
| @@ -3546,6 +3624,7 @@ static void perf_mmap_close(struct vm_area_struct *vma) | |||
| 3546 | atomic_long_sub((size >> PAGE_SHIFT) + 1, &user->locked_vm); | 3624 | atomic_long_sub((size >> PAGE_SHIFT) + 1, &user->locked_vm); |
| 3547 | vma->vm_mm->pinned_vm -= event->mmap_locked; | 3625 | vma->vm_mm->pinned_vm -= event->mmap_locked; |
| 3548 | rcu_assign_pointer(event->rb, NULL); | 3626 | rcu_assign_pointer(event->rb, NULL); |
| 3627 | ring_buffer_detach(event, rb); | ||
| 3549 | mutex_unlock(&event->mmap_mutex); | 3628 | mutex_unlock(&event->mmap_mutex); |
| 3550 | 3629 | ||
| 3551 | ring_buffer_put(rb); | 3630 | ring_buffer_put(rb); |
| @@ -3700,7 +3779,7 @@ static const struct file_operations perf_fops = { | |||
| 3700 | 3779 | ||
| 3701 | void perf_event_wakeup(struct perf_event *event) | 3780 | void perf_event_wakeup(struct perf_event *event) |
| 3702 | { | 3781 | { |
| 3703 | wake_up_all(&event->waitq); | 3782 | ring_buffer_wakeup(event); |
| 3704 | 3783 | ||
| 3705 | if (event->pending_kill) { | 3784 | if (event->pending_kill) { |
| 3706 | kill_fasync(&event->fasync, SIGIO, event->pending_kill); | 3785 | kill_fasync(&event->fasync, SIGIO, event->pending_kill); |
| @@ -5822,6 +5901,8 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, | |||
| 5822 | INIT_LIST_HEAD(&event->group_entry); | 5901 | INIT_LIST_HEAD(&event->group_entry); |
| 5823 | INIT_LIST_HEAD(&event->event_entry); | 5902 | INIT_LIST_HEAD(&event->event_entry); |
| 5824 | INIT_LIST_HEAD(&event->sibling_list); | 5903 | INIT_LIST_HEAD(&event->sibling_list); |
| 5904 | INIT_LIST_HEAD(&event->rb_entry); | ||
| 5905 | |||
| 5825 | init_waitqueue_head(&event->waitq); | 5906 | init_waitqueue_head(&event->waitq); |
| 5826 | init_irq_work(&event->pending, perf_pending_event); | 5907 | init_irq_work(&event->pending, perf_pending_event); |
| 5827 | 5908 | ||
| @@ -6028,6 +6109,8 @@ set: | |||
| 6028 | 6109 | ||
| 6029 | old_rb = event->rb; | 6110 | old_rb = event->rb; |
| 6030 | rcu_assign_pointer(event->rb, rb); | 6111 | rcu_assign_pointer(event->rb, rb); |
| 6112 | if (old_rb) | ||
| 6113 | ring_buffer_detach(event, old_rb); | ||
| 6031 | ret = 0; | 6114 | ret = 0; |
| 6032 | unlock: | 6115 | unlock: |
| 6033 | mutex_unlock(&event->mmap_mutex); | 6116 | mutex_unlock(&event->mmap_mutex); |
diff --git a/kernel/events/internal.h b/kernel/events/internal.h index 09097dd8116c..64568a699375 100644 --- a/kernel/events/internal.h +++ b/kernel/events/internal.h | |||
| @@ -22,6 +22,9 @@ struct ring_buffer { | |||
| 22 | local_t lost; /* nr records lost */ | 22 | local_t lost; /* nr records lost */ |
| 23 | 23 | ||
| 24 | long watermark; /* wakeup watermark */ | 24 | long watermark; /* wakeup watermark */ |
| 25 | /* poll crap */ | ||
| 26 | spinlock_t event_lock; | ||
| 27 | struct list_head event_list; | ||
| 25 | 28 | ||
| 26 | struct perf_event_mmap_page *user_page; | 29 | struct perf_event_mmap_page *user_page; |
| 27 | void *data_pages[0]; | 30 | void *data_pages[0]; |
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index a2a29205cc0f..7f3011c6b57f 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c | |||
| @@ -209,6 +209,9 @@ ring_buffer_init(struct ring_buffer *rb, long watermark, int flags) | |||
| 209 | rb->writable = 1; | 209 | rb->writable = 1; |
| 210 | 210 | ||
| 211 | atomic_set(&rb->refcount, 1); | 211 | atomic_set(&rb->refcount, 1); |
| 212 | |||
| 213 | INIT_LIST_HEAD(&rb->event_list); | ||
| 214 | spin_lock_init(&rb->event_lock); | ||
| 212 | } | 215 | } |
| 213 | 216 | ||
| 214 | #ifndef CONFIG_PERF_USE_VMALLOC | 217 | #ifndef CONFIG_PERF_USE_VMALLOC |
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 0e2b179bc7b3..1da999f5e746 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c | |||
| @@ -623,8 +623,9 @@ static irqreturn_t irq_nested_primary_handler(int irq, void *dev_id) | |||
| 623 | 623 | ||
| 624 | static int irq_wait_for_interrupt(struct irqaction *action) | 624 | static int irq_wait_for_interrupt(struct irqaction *action) |
| 625 | { | 625 | { |
| 626 | set_current_state(TASK_INTERRUPTIBLE); | ||
| 627 | |||
| 626 | while (!kthread_should_stop()) { | 628 | while (!kthread_should_stop()) { |
| 627 | set_current_state(TASK_INTERRUPTIBLE); | ||
| 628 | 629 | ||
| 629 | if (test_and_clear_bit(IRQTF_RUNTHREAD, | 630 | if (test_and_clear_bit(IRQTF_RUNTHREAD, |
| 630 | &action->thread_flags)) { | 631 | &action->thread_flags)) { |
| @@ -632,7 +633,9 @@ static int irq_wait_for_interrupt(struct irqaction *action) | |||
| 632 | return 0; | 633 | return 0; |
| 633 | } | 634 | } |
| 634 | schedule(); | 635 | schedule(); |
| 636 | set_current_state(TASK_INTERRUPTIBLE); | ||
| 635 | } | 637 | } |
| 638 | __set_current_state(TASK_RUNNING); | ||
| 636 | return -1; | 639 | return -1; |
| 637 | } | 640 | } |
| 638 | 641 | ||
diff --git a/kernel/jump_label.c b/kernel/jump_label.c index bbdfe2a462a0..66ff7109f697 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c | |||
| @@ -66,8 +66,9 @@ void jump_label_inc(struct jump_label_key *key) | |||
| 66 | return; | 66 | return; |
| 67 | 67 | ||
| 68 | jump_label_lock(); | 68 | jump_label_lock(); |
| 69 | if (atomic_add_return(1, &key->enabled) == 1) | 69 | if (atomic_read(&key->enabled) == 0) |
| 70 | jump_label_update(key, JUMP_LABEL_ENABLE); | 70 | jump_label_update(key, JUMP_LABEL_ENABLE); |
| 71 | atomic_inc(&key->enabled); | ||
| 71 | jump_label_unlock(); | 72 | jump_label_unlock(); |
| 72 | } | 73 | } |
| 73 | 74 | ||
diff --git a/kernel/sched.c b/kernel/sched.c index 0e9344a71be3..d6b149ccf925 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
| @@ -71,6 +71,7 @@ | |||
| 71 | #include <linux/ctype.h> | 71 | #include <linux/ctype.h> |
| 72 | #include <linux/ftrace.h> | 72 | #include <linux/ftrace.h> |
| 73 | #include <linux/slab.h> | 73 | #include <linux/slab.h> |
| 74 | #include <linux/init_task.h> | ||
| 74 | 75 | ||
| 75 | #include <asm/tlb.h> | 76 | #include <asm/tlb.h> |
| 76 | #include <asm/irq_regs.h> | 77 | #include <asm/irq_regs.h> |
| @@ -4810,6 +4811,9 @@ EXPORT_SYMBOL(wait_for_completion); | |||
| 4810 | * This waits for either a completion of a specific task to be signaled or for a | 4811 | * This waits for either a completion of a specific task to be signaled or for a |
| 4811 | * specified timeout to expire. The timeout is in jiffies. It is not | 4812 | * specified timeout to expire. The timeout is in jiffies. It is not |
| 4812 | * interruptible. | 4813 | * interruptible. |
| 4814 | * | ||
| 4815 | * The return value is 0 if timed out, and positive (at least 1, or number of | ||
| 4816 | * jiffies left till timeout) if completed. | ||
| 4813 | */ | 4817 | */ |
| 4814 | unsigned long __sched | 4818 | unsigned long __sched |
| 4815 | wait_for_completion_timeout(struct completion *x, unsigned long timeout) | 4819 | wait_for_completion_timeout(struct completion *x, unsigned long timeout) |
| @@ -4824,6 +4828,8 @@ EXPORT_SYMBOL(wait_for_completion_timeout); | |||
| 4824 | * | 4828 | * |
| 4825 | * This waits for completion of a specific task to be signaled. It is | 4829 | * This waits for completion of a specific task to be signaled. It is |
| 4826 | * interruptible. | 4830 | * interruptible. |
| 4831 | * | ||
| 4832 | * The return value is -ERESTARTSYS if interrupted, 0 if completed. | ||
| 4827 | */ | 4833 | */ |
| 4828 | int __sched wait_for_completion_interruptible(struct completion *x) | 4834 | int __sched wait_for_completion_interruptible(struct completion *x) |
| 4829 | { | 4835 | { |
| @@ -4841,6 +4847,9 @@ EXPORT_SYMBOL(wait_for_completion_interruptible); | |||
| 4841 | * | 4847 | * |
| 4842 | * This waits for either a completion of a specific task to be signaled or for a | 4848 | * This waits for either a completion of a specific task to be signaled or for a |
| 4843 | * specified timeout to expire. It is interruptible. The timeout is in jiffies. | 4849 | * specified timeout to expire. It is interruptible. The timeout is in jiffies. |
| 4850 | * | ||
| 4851 | * The return value is -ERESTARTSYS if interrupted, 0 if timed out, | ||
| 4852 | * positive (at least 1, or number of jiffies left till timeout) if completed. | ||
| 4844 | */ | 4853 | */ |
| 4845 | long __sched | 4854 | long __sched |
| 4846 | wait_for_completion_interruptible_timeout(struct completion *x, | 4855 | wait_for_completion_interruptible_timeout(struct completion *x, |
| @@ -4856,6 +4865,8 @@ EXPORT_SYMBOL(wait_for_completion_interruptible_timeout); | |||
| 4856 | * | 4865 | * |
| 4857 | * This waits to be signaled for completion of a specific task. It can be | 4866 | * This waits to be signaled for completion of a specific task. It can be |
| 4858 | * interrupted by a kill signal. | 4867 | * interrupted by a kill signal. |
| 4868 | * | ||
| 4869 | * The return value is -ERESTARTSYS if interrupted, 0 if completed. | ||
| 4859 | */ | 4870 | */ |
| 4860 | int __sched wait_for_completion_killable(struct completion *x) | 4871 | int __sched wait_for_completion_killable(struct completion *x) |
| 4861 | { | 4872 | { |
| @@ -4874,6 +4885,9 @@ EXPORT_SYMBOL(wait_for_completion_killable); | |||
| 4874 | * This waits for either a completion of a specific task to be | 4885 | * This waits for either a completion of a specific task to be |
| 4875 | * signaled or for a specified timeout to expire. It can be | 4886 | * signaled or for a specified timeout to expire. It can be |
| 4876 | * interrupted by a kill signal. The timeout is in jiffies. | 4887 | * interrupted by a kill signal. The timeout is in jiffies. |
| 4888 | * | ||
| 4889 | * The return value is -ERESTARTSYS if interrupted, 0 if timed out, | ||
| 4890 | * positive (at least 1, or number of jiffies left till timeout) if completed. | ||
| 4877 | */ | 4891 | */ |
| 4878 | long __sched | 4892 | long __sched |
| 4879 | wait_for_completion_killable_timeout(struct completion *x, | 4893 | wait_for_completion_killable_timeout(struct completion *x, |
| @@ -6099,6 +6113,9 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu) | |||
| 6099 | */ | 6113 | */ |
| 6100 | idle->sched_class = &idle_sched_class; | 6114 | idle->sched_class = &idle_sched_class; |
| 6101 | ftrace_graph_init_idle_task(idle, cpu); | 6115 | ftrace_graph_init_idle_task(idle, cpu); |
| 6116 | #if defined(CONFIG_SMP) | ||
| 6117 | sprintf(idle->comm, "%s/%d", INIT_TASK_COMM, cpu); | ||
| 6118 | #endif | ||
| 6102 | } | 6119 | } |
| 6103 | 6120 | ||
| 6104 | /* | 6121 | /* |
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 5c9e67923b7c..a78ed2736ba7 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
| @@ -772,19 +772,32 @@ static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update) | |||
| 772 | list_del_leaf_cfs_rq(cfs_rq); | 772 | list_del_leaf_cfs_rq(cfs_rq); |
| 773 | } | 773 | } |
| 774 | 774 | ||
| 775 | static inline long calc_tg_weight(struct task_group *tg, struct cfs_rq *cfs_rq) | ||
| 776 | { | ||
| 777 | long tg_weight; | ||
| 778 | |||
| 779 | /* | ||
| 780 | * Use this CPU's actual weight instead of the last load_contribution | ||
| 781 | * to gain a more accurate current total weight. See | ||
| 782 | * update_cfs_rq_load_contribution(). | ||
| 783 | */ | ||
| 784 | tg_weight = atomic_read(&tg->load_weight); | ||
| 785 | tg_weight -= cfs_rq->load_contribution; | ||
| 786 | tg_weight += cfs_rq->load.weight; | ||
| 787 | |||
| 788 | return tg_weight; | ||
| 789 | } | ||
| 790 | |||
| 775 | static long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg) | 791 | static long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg) |
| 776 | { | 792 | { |
| 777 | long load_weight, load, shares; | 793 | long tg_weight, load, shares; |
| 778 | 794 | ||
| 795 | tg_weight = calc_tg_weight(tg, cfs_rq); | ||
| 779 | load = cfs_rq->load.weight; | 796 | load = cfs_rq->load.weight; |
| 780 | 797 | ||
| 781 | load_weight = atomic_read(&tg->load_weight); | ||
| 782 | load_weight += load; | ||
| 783 | load_weight -= cfs_rq->load_contribution; | ||
| 784 | |||
| 785 | shares = (tg->shares * load); | 798 | shares = (tg->shares * load); |
| 786 | if (load_weight) | 799 | if (tg_weight) |
| 787 | shares /= load_weight; | 800 | shares /= tg_weight; |
| 788 | 801 | ||
| 789 | if (shares < MIN_SHARES) | 802 | if (shares < MIN_SHARES) |
| 790 | shares = MIN_SHARES; | 803 | shares = MIN_SHARES; |
| @@ -1743,7 +1756,7 @@ static void __return_cfs_rq_runtime(struct cfs_rq *cfs_rq) | |||
| 1743 | 1756 | ||
| 1744 | static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq) | 1757 | static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq) |
| 1745 | { | 1758 | { |
| 1746 | if (!cfs_rq->runtime_enabled || !cfs_rq->nr_running) | 1759 | if (!cfs_rq->runtime_enabled || cfs_rq->nr_running) |
| 1747 | return; | 1760 | return; |
| 1748 | 1761 | ||
| 1749 | __return_cfs_rq_runtime(cfs_rq); | 1762 | __return_cfs_rq_runtime(cfs_rq); |
| @@ -2036,36 +2049,100 @@ static void task_waking_fair(struct task_struct *p) | |||
| 2036 | * Adding load to a group doesn't make a group heavier, but can cause movement | 2049 | * Adding load to a group doesn't make a group heavier, but can cause movement |
| 2037 | * of group shares between cpus. Assuming the shares were perfectly aligned one | 2050 | * of group shares between cpus. Assuming the shares were perfectly aligned one |
| 2038 | * can calculate the shift in shares. | 2051 | * can calculate the shift in shares. |
| 2052 | * | ||
| 2053 | * Calculate the effective load difference if @wl is added (subtracted) to @tg | ||
| 2054 | * on this @cpu and results in a total addition (subtraction) of @wg to the | ||
| 2055 | * total group weight. | ||
| 2056 | * | ||
| 2057 | * Given a runqueue weight distribution (rw_i) we can compute a shares | ||
| 2058 | * distribution (s_i) using: | ||
| 2059 | * | ||
| 2060 | * s_i = rw_i / \Sum rw_j (1) | ||
| 2061 | * | ||
| 2062 | * Suppose we have 4 CPUs and our @tg is a direct child of the root group and | ||
| 2063 | * has 7 equal weight tasks, distributed as below (rw_i), with the resulting | ||
| 2064 | * shares distribution (s_i): | ||
| 2065 | * | ||
| 2066 | * rw_i = { 2, 4, 1, 0 } | ||
| 2067 | * s_i = { 2/7, 4/7, 1/7, 0 } | ||
| 2068 | * | ||
| 2069 | * As per wake_affine() we're interested in the load of two CPUs (the CPU the | ||
| 2070 | * task used to run on and the CPU the waker is running on), we need to | ||
| 2071 | * compute the effect of waking a task on either CPU and, in case of a sync | ||
| 2072 | * wakeup, compute the effect of the current task going to sleep. | ||
| 2073 | * | ||
| 2074 | * So for a change of @wl to the local @cpu with an overall group weight change | ||
| 2075 | * of @wl we can compute the new shares distribution (s'_i) using: | ||
| 2076 | * | ||
| 2077 | * s'_i = (rw_i + @wl) / (@wg + \Sum rw_j) (2) | ||
| 2078 | * | ||
| 2079 | * Suppose we're interested in CPUs 0 and 1, and want to compute the load | ||
| 2080 | * differences in waking a task to CPU 0. The additional task changes the | ||
| 2081 | * weight and shares distributions like: | ||
| 2082 | * | ||
| 2083 | * rw'_i = { 3, 4, 1, 0 } | ||
| 2084 | * s'_i = { 3/8, 4/8, 1/8, 0 } | ||
| 2085 | * | ||
| 2086 | * We can then compute the difference in effective weight by using: | ||
| 2087 | * | ||
| 2088 | * dw_i = S * (s'_i - s_i) (3) | ||
| 2089 | * | ||
| 2090 | * Where 'S' is the group weight as seen by its parent. | ||
| 2091 | * | ||
| 2092 | * Therefore the effective change in loads on CPU 0 would be 5/56 (3/8 - 2/7) | ||
| 2093 | * times the weight of the group. The effect on CPU 1 would be -4/56 (4/8 - | ||
| 2094 | * 4/7) times the weight of the group. | ||
| 2039 | */ | 2095 | */ |
| 2040 | static long effective_load(struct task_group *tg, int cpu, long wl, long wg) | 2096 | static long effective_load(struct task_group *tg, int cpu, long wl, long wg) |
| 2041 | { | 2097 | { |
| 2042 | struct sched_entity *se = tg->se[cpu]; | 2098 | struct sched_entity *se = tg->se[cpu]; |
| 2043 | 2099 | ||
| 2044 | if (!tg->parent) | 2100 | if (!tg->parent) /* the trivial, non-cgroup case */ |
| 2045 | return wl; | 2101 | return wl; |
| 2046 | 2102 | ||
| 2047 | for_each_sched_entity(se) { | 2103 | for_each_sched_entity(se) { |
| 2048 | long lw, w; | 2104 | long w, W; |
| 2049 | 2105 | ||
| 2050 | tg = se->my_q->tg; | 2106 | tg = se->my_q->tg; |
| 2051 | w = se->my_q->load.weight; | ||
| 2052 | 2107 | ||
| 2053 | /* use this cpu's instantaneous contribution */ | 2108 | /* |
| 2054 | lw = atomic_read(&tg->load_weight); | 2109 | * W = @wg + \Sum rw_j |
| 2055 | lw -= se->my_q->load_contribution; | 2110 | */ |
| 2056 | lw += w + wg; | 2111 | W = wg + calc_tg_weight(tg, se->my_q); |
| 2057 | 2112 | ||
| 2058 | wl += w; | 2113 | /* |
| 2114 | * w = rw_i + @wl | ||
| 2115 | */ | ||
| 2116 | w = se->my_q->load.weight + wl; | ||
| 2059 | 2117 | ||
| 2060 | if (lw > 0 && wl < lw) | 2118 | /* |
| 2061 | wl = (wl * tg->shares) / lw; | 2119 | * wl = S * s'_i; see (2) |
| 2120 | */ | ||
| 2121 | if (W > 0 && w < W) | ||
| 2122 | wl = (w * tg->shares) / W; | ||
| 2062 | else | 2123 | else |
| 2063 | wl = tg->shares; | 2124 | wl = tg->shares; |
| 2064 | 2125 | ||
| 2065 | /* zero point is MIN_SHARES */ | 2126 | /* |
| 2127 | * Per the above, wl is the new se->load.weight value; since | ||
| 2128 | * those are clipped to [MIN_SHARES, ...) do so now. See | ||
| 2129 | * calc_cfs_shares(). | ||
| 2130 | */ | ||
| 2066 | if (wl < MIN_SHARES) | 2131 | if (wl < MIN_SHARES) |
| 2067 | wl = MIN_SHARES; | 2132 | wl = MIN_SHARES; |
| 2133 | |||
| 2134 | /* | ||
| 2135 | * wl = dw_i = S * (s'_i - s_i); see (3) | ||
| 2136 | */ | ||
| 2068 | wl -= se->load.weight; | 2137 | wl -= se->load.weight; |
| 2138 | |||
| 2139 | /* | ||
| 2140 | * Recursively apply this logic to all parent groups to compute | ||
| 2141 | * the final effective load change on the root group. Since | ||
| 2142 | * only the @tg group gets extra weight, all parent groups can | ||
| 2143 | * only redistribute existing shares. @wl is the shift in shares | ||
| 2144 | * resulting from this level per the above. | ||
| 2145 | */ | ||
| 2069 | wg = 0; | 2146 | wg = 0; |
| 2070 | } | 2147 | } |
| 2071 | 2148 | ||
| @@ -2249,7 +2326,8 @@ static int select_idle_sibling(struct task_struct *p, int target) | |||
| 2249 | int cpu = smp_processor_id(); | 2326 | int cpu = smp_processor_id(); |
| 2250 | int prev_cpu = task_cpu(p); | 2327 | int prev_cpu = task_cpu(p); |
| 2251 | struct sched_domain *sd; | 2328 | struct sched_domain *sd; |
| 2252 | int i; | 2329 | struct sched_group *sg; |
| 2330 | int i, smt = 0; | ||
| 2253 | 2331 | ||
| 2254 | /* | 2332 | /* |
| 2255 | * If the task is going to be woken-up on this cpu and if it is | 2333 | * If the task is going to be woken-up on this cpu and if it is |
| @@ -2269,25 +2347,38 @@ static int select_idle_sibling(struct task_struct *p, int target) | |||
| 2269 | * Otherwise, iterate the domains and find an elegible idle cpu. | 2347 | * Otherwise, iterate the domains and find an elegible idle cpu. |
| 2270 | */ | 2348 | */ |
| 2271 | rcu_read_lock(); | 2349 | rcu_read_lock(); |
| 2350 | again: | ||
| 2272 | for_each_domain(target, sd) { | 2351 | for_each_domain(target, sd) { |
| 2273 | if (!(sd->flags & SD_SHARE_PKG_RESOURCES)) | 2352 | if (!smt && (sd->flags & SD_SHARE_CPUPOWER)) |
| 2274 | break; | 2353 | continue; |
| 2275 | 2354 | ||
| 2276 | for_each_cpu_and(i, sched_domain_span(sd), tsk_cpus_allowed(p)) { | 2355 | if (!(sd->flags & SD_SHARE_PKG_RESOURCES)) { |
| 2277 | if (idle_cpu(i)) { | 2356 | if (!smt) { |
| 2278 | target = i; | 2357 | smt = 1; |
| 2279 | break; | 2358 | goto again; |
| 2280 | } | 2359 | } |
| 2360 | break; | ||
| 2281 | } | 2361 | } |
| 2282 | 2362 | ||
| 2283 | /* | 2363 | sg = sd->groups; |
| 2284 | * Lets stop looking for an idle sibling when we reached | 2364 | do { |
| 2285 | * the domain that spans the current cpu and prev_cpu. | 2365 | if (!cpumask_intersects(sched_group_cpus(sg), |
| 2286 | */ | 2366 | tsk_cpus_allowed(p))) |
| 2287 | if (cpumask_test_cpu(cpu, sched_domain_span(sd)) && | 2367 | goto next; |
| 2288 | cpumask_test_cpu(prev_cpu, sched_domain_span(sd))) | 2368 | |
| 2289 | break; | 2369 | for_each_cpu(i, sched_group_cpus(sg)) { |
| 2370 | if (!idle_cpu(i)) | ||
| 2371 | goto next; | ||
| 2372 | } | ||
| 2373 | |||
| 2374 | target = cpumask_first_and(sched_group_cpus(sg), | ||
| 2375 | tsk_cpus_allowed(p)); | ||
| 2376 | goto done; | ||
| 2377 | next: | ||
| 2378 | sg = sg->next; | ||
| 2379 | } while (sg != sd->groups); | ||
| 2290 | } | 2380 | } |
| 2381 | done: | ||
| 2291 | rcu_read_unlock(); | 2382 | rcu_read_unlock(); |
| 2292 | 2383 | ||
| 2293 | return target; | 2384 | return target; |
| @@ -3511,7 +3602,7 @@ static bool update_sd_pick_busiest(struct sched_domain *sd, | |||
| 3511 | } | 3602 | } |
| 3512 | 3603 | ||
| 3513 | /** | 3604 | /** |
| 3514 | * update_sd_lb_stats - Update sched_group's statistics for load balancing. | 3605 | * update_sd_lb_stats - Update sched_domain's statistics for load balancing. |
| 3515 | * @sd: sched_domain whose statistics are to be updated. | 3606 | * @sd: sched_domain whose statistics are to be updated. |
| 3516 | * @this_cpu: Cpu for which load balance is currently performed. | 3607 | * @this_cpu: Cpu for which load balance is currently performed. |
| 3517 | * @idle: Idle status of this_cpu | 3608 | * @idle: Idle status of this_cpu |
diff --git a/kernel/sched_features.h b/kernel/sched_features.h index efa0a7b75dde..84802245abd2 100644 --- a/kernel/sched_features.h +++ b/kernel/sched_features.h | |||
| @@ -67,3 +67,4 @@ SCHED_FEAT(NONTASK_POWER, 1) | |||
| 67 | SCHED_FEAT(TTWU_QUEUE, 1) | 67 | SCHED_FEAT(TTWU_QUEUE, 1) |
| 68 | 68 | ||
| 69 | SCHED_FEAT(FORCE_SD_OVERLAP, 0) | 69 | SCHED_FEAT(FORCE_SD_OVERLAP, 0) |
| 70 | SCHED_FEAT(RT_RUNTIME_SHARE, 1) | ||
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 056cbd2e2a27..583a1368afe6 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c | |||
| @@ -560,6 +560,9 @@ static int balance_runtime(struct rt_rq *rt_rq) | |||
| 560 | { | 560 | { |
| 561 | int more = 0; | 561 | int more = 0; |
| 562 | 562 | ||
| 563 | if (!sched_feat(RT_RUNTIME_SHARE)) | ||
| 564 | return more; | ||
| 565 | |||
| 563 | if (rt_rq->rt_time > rt_rq->rt_runtime) { | 566 | if (rt_rq->rt_time > rt_rq->rt_runtime) { |
| 564 | raw_spin_unlock(&rt_rq->rt_runtime_lock); | 567 | raw_spin_unlock(&rt_rq->rt_runtime_lock); |
| 565 | more = do_balance_runtime(rt_rq); | 568 | more = do_balance_runtime(rt_rq); |
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 1ecd6ba36d6c..c4eb71c8b2ea 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c | |||
| @@ -387,6 +387,7 @@ void clockevents_exchange_device(struct clock_event_device *old, | |||
| 387 | * released list and do a notify add later. | 387 | * released list and do a notify add later. |
| 388 | */ | 388 | */ |
| 389 | if (old) { | 389 | if (old) { |
| 390 | old->event_handler = clockevents_handle_noop; | ||
| 390 | clockevents_set_mode(old, CLOCK_EVT_MODE_UNUSED); | 391 | clockevents_set_mode(old, CLOCK_EVT_MODE_UNUSED); |
| 391 | list_del(&old->list); | 392 | list_del(&old->list); |
| 392 | list_add(&old->list, &clockevents_released); | 393 | list_add(&old->list, &clockevents_released); |
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index cfc65e1eb9fb..da2f760e780c 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c | |||
| @@ -548,7 +548,7 @@ static u64 clocksource_max_deferment(struct clocksource *cs) | |||
| 548 | * note a margin of 12.5% is used because this can be computed with | 548 | * note a margin of 12.5% is used because this can be computed with |
| 549 | * a shift, versus say 10% which would require division. | 549 | * a shift, versus say 10% which would require division. |
| 550 | */ | 550 | */ |
| 551 | return max_nsecs - (max_nsecs >> 5); | 551 | return max_nsecs - (max_nsecs >> 3); |
| 552 | } | 552 | } |
| 553 | 553 | ||
| 554 | #ifndef CONFIG_ARCH_USES_GETTIMEOFFSET | 554 | #ifndef CONFIG_ARCH_USES_GETTIMEOFFSET |
| @@ -669,7 +669,7 @@ void __clocksource_updatefreq_scale(struct clocksource *cs, u32 scale, u32 freq) | |||
| 669 | * ~ 0.06ppm granularity for NTP. We apply the same 12.5% | 669 | * ~ 0.06ppm granularity for NTP. We apply the same 12.5% |
| 670 | * margin as we do in clocksource_max_deferment() | 670 | * margin as we do in clocksource_max_deferment() |
| 671 | */ | 671 | */ |
| 672 | sec = (cs->mask - (cs->mask >> 5)); | 672 | sec = (cs->mask - (cs->mask >> 3)); |
| 673 | do_div(sec, freq); | 673 | do_div(sec, freq); |
| 674 | do_div(sec, scale); | 674 | do_div(sec, scale); |
| 675 | if (!sec) | 675 | if (!sec) |
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index f954282d9a82..fd4a7b1625a2 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c | |||
| @@ -71,7 +71,7 @@ int tick_check_broadcast_device(struct clock_event_device *dev) | |||
| 71 | (dev->features & CLOCK_EVT_FEAT_C3STOP)) | 71 | (dev->features & CLOCK_EVT_FEAT_C3STOP)) |
| 72 | return 0; | 72 | return 0; |
| 73 | 73 | ||
| 74 | clockevents_exchange_device(NULL, dev); | 74 | clockevents_exchange_device(tick_broadcast_device.evtdev, dev); |
| 75 | tick_broadcast_device.evtdev = dev; | 75 | tick_broadcast_device.evtdev = dev; |
| 76 | if (!cpumask_empty(tick_get_broadcast_mask())) | 76 | if (!cpumask_empty(tick_get_broadcast_mask())) |
| 77 | tick_broadcast_start_periodic(dev); | 77 | tick_broadcast_start_periodic(dev); |
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 900b409543db..b1e8943fed1d 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c | |||
| @@ -152,7 +152,6 @@ void clear_ftrace_function(void) | |||
| 152 | ftrace_pid_function = ftrace_stub; | 152 | ftrace_pid_function = ftrace_stub; |
| 153 | } | 153 | } |
| 154 | 154 | ||
| 155 | #undef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST | ||
| 156 | #ifndef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST | 155 | #ifndef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST |
| 157 | /* | 156 | /* |
| 158 | * For those archs that do not test ftrace_trace_stop in their | 157 | * For those archs that do not test ftrace_trace_stop in their |
| @@ -1212,7 +1211,9 @@ ftrace_hash_move(struct ftrace_ops *ops, int enable, | |||
| 1212 | if (!src->count) { | 1211 | if (!src->count) { |
| 1213 | free_ftrace_hash_rcu(*dst); | 1212 | free_ftrace_hash_rcu(*dst); |
| 1214 | rcu_assign_pointer(*dst, EMPTY_HASH); | 1213 | rcu_assign_pointer(*dst, EMPTY_HASH); |
| 1215 | return 0; | 1214 | /* still need to update the function records */ |
| 1215 | ret = 0; | ||
| 1216 | goto out; | ||
| 1216 | } | 1217 | } |
| 1217 | 1218 | ||
| 1218 | /* | 1219 | /* |
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 581876f9f387..c212a7f934ec 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c | |||
| @@ -1078,7 +1078,6 @@ event_subsystem_dir(const char *name, struct dentry *d_events) | |||
| 1078 | /* First see if we did not already create this dir */ | 1078 | /* First see if we did not already create this dir */ |
| 1079 | list_for_each_entry(system, &event_subsystems, list) { | 1079 | list_for_each_entry(system, &event_subsystems, list) { |
| 1080 | if (strcmp(system->name, name) == 0) { | 1080 | if (strcmp(system->name, name) == 0) { |
| 1081 | __get_system(system); | ||
| 1082 | system->nr_events++; | 1081 | system->nr_events++; |
| 1083 | return system->entry; | 1082 | return system->entry; |
| 1084 | } | 1083 | } |
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 816d3d074979..95dc31efd6dd 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c | |||
| @@ -1649,7 +1649,9 @@ static int replace_system_preds(struct event_subsystem *system, | |||
| 1649 | */ | 1649 | */ |
| 1650 | err = replace_preds(call, NULL, ps, filter_string, true); | 1650 | err = replace_preds(call, NULL, ps, filter_string, true); |
| 1651 | if (err) | 1651 | if (err) |
| 1652 | goto fail; | 1652 | call->flags |= TRACE_EVENT_FL_NO_SET_FILTER; |
| 1653 | else | ||
| 1654 | call->flags &= ~TRACE_EVENT_FL_NO_SET_FILTER; | ||
| 1653 | } | 1655 | } |
| 1654 | 1656 | ||
| 1655 | list_for_each_entry(call, &ftrace_events, list) { | 1657 | list_for_each_entry(call, &ftrace_events, list) { |
| @@ -1658,6 +1660,9 @@ static int replace_system_preds(struct event_subsystem *system, | |||
| 1658 | if (strcmp(call->class->system, system->name) != 0) | 1660 | if (strcmp(call->class->system, system->name) != 0) |
| 1659 | continue; | 1661 | continue; |
| 1660 | 1662 | ||
| 1663 | if (call->flags & TRACE_EVENT_FL_NO_SET_FILTER) | ||
| 1664 | continue; | ||
| 1665 | |||
| 1661 | filter_item = kzalloc(sizeof(*filter_item), GFP_KERNEL); | 1666 | filter_item = kzalloc(sizeof(*filter_item), GFP_KERNEL); |
| 1662 | if (!filter_item) | 1667 | if (!filter_item) |
| 1663 | goto fail_mem; | 1668 | goto fail_mem; |
| @@ -1686,7 +1691,7 @@ static int replace_system_preds(struct event_subsystem *system, | |||
| 1686 | * replace the filter for the call. | 1691 | * replace the filter for the call. |
| 1687 | */ | 1692 | */ |
| 1688 | filter = call->filter; | 1693 | filter = call->filter; |
| 1689 | call->filter = filter_item->filter; | 1694 | rcu_assign_pointer(call->filter, filter_item->filter); |
| 1690 | filter_item->filter = filter; | 1695 | filter_item->filter = filter; |
| 1691 | 1696 | ||
| 1692 | fail = false; | 1697 | fail = false; |
| @@ -1741,7 +1746,7 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string) | |||
| 1741 | filter = call->filter; | 1746 | filter = call->filter; |
| 1742 | if (!filter) | 1747 | if (!filter) |
| 1743 | goto out_unlock; | 1748 | goto out_unlock; |
| 1744 | call->filter = NULL; | 1749 | RCU_INIT_POINTER(call->filter, NULL); |
| 1745 | /* Make sure the filter is not being used */ | 1750 | /* Make sure the filter is not being used */ |
| 1746 | synchronize_sched(); | 1751 | synchronize_sched(); |
| 1747 | __free_filter(filter); | 1752 | __free_filter(filter); |
| @@ -1782,7 +1787,7 @@ out: | |||
| 1782 | * string | 1787 | * string |
| 1783 | */ | 1788 | */ |
| 1784 | tmp = call->filter; | 1789 | tmp = call->filter; |
| 1785 | call->filter = filter; | 1790 | rcu_assign_pointer(call->filter, filter); |
| 1786 | if (tmp) { | 1791 | if (tmp) { |
| 1787 | /* Make sure the call is done with the filter */ | 1792 | /* Make sure the call is done with the filter */ |
| 1788 | synchronize_sched(); | 1793 | synchronize_sched(); |
