diff options
| author | Ingo Molnar <mingo@elte.hu> | 2011-01-05 08:14:42 -0500 |
|---|---|---|
| committer | Ingo Molnar <mingo@elte.hu> | 2011-01-05 08:14:46 -0500 |
| commit | 27066fd484a32c80630136aa2b91c980f3198f9d (patch) | |
| tree | 78ddabdedbfd7525d13ecd62a745525843f1d0e8 /kernel | |
| parent | 101e5f77bf35679809586e250b6c62193d2ed179 (diff) | |
| parent | 3c0eee3fe6a3a1c745379547c7e7c904aa64f6d5 (diff) | |
Merge commit 'v2.6.37' into sched/core
Merge reason: Merge the final .37 tree.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/fork.c | 1 | ||||
| -rw-r--r-- | kernel/kthread.c | 11 | ||||
| -rw-r--r-- | kernel/perf_event.c | 37 | ||||
| -rw-r--r-- | kernel/power/swap.c | 2 | ||||
| -rw-r--r-- | kernel/power/user.c | 2 | ||||
| -rw-r--r-- | kernel/resource.c | 104 | ||||
| -rw-r--r-- | kernel/sched.c | 287 | ||||
| -rw-r--r-- | kernel/taskstats.c | 57 | ||||
| -rw-r--r-- | kernel/timer.c | 8 | ||||
| -rw-r--r-- | kernel/trace/ring_buffer.c | 9 | ||||
| -rw-r--r-- | kernel/trace/trace.c | 10 | ||||
| -rw-r--r-- | kernel/user.c | 1 | ||||
| -rw-r--r-- | kernel/watchdog.c | 3 |
13 files changed, 361 insertions, 171 deletions
diff --git a/kernel/fork.c b/kernel/fork.c index 067244495966..7d164e25b0f0 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
| @@ -275,6 +275,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) | |||
| 275 | 275 | ||
| 276 | setup_thread_stack(tsk, orig); | 276 | setup_thread_stack(tsk, orig); |
| 277 | clear_user_return_notifier(tsk); | 277 | clear_user_return_notifier(tsk); |
| 278 | clear_tsk_need_resched(tsk); | ||
| 278 | stackend = end_of_stack(tsk); | 279 | stackend = end_of_stack(tsk); |
| 279 | *stackend = STACK_END_MAGIC; /* for overflow detection */ | 280 | *stackend = STACK_END_MAGIC; /* for overflow detection */ |
| 280 | 281 | ||
diff --git a/kernel/kthread.c b/kernel/kthread.c index 74cf6f5e7ade..5355cfd44a3f 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c | |||
| @@ -265,6 +265,17 @@ int kthreadd(void *unused) | |||
| 265 | return 0; | 265 | return 0; |
| 266 | } | 266 | } |
| 267 | 267 | ||
| 268 | void __init_kthread_worker(struct kthread_worker *worker, | ||
| 269 | const char *name, | ||
| 270 | struct lock_class_key *key) | ||
| 271 | { | ||
| 272 | spin_lock_init(&worker->lock); | ||
| 273 | lockdep_set_class_and_name(&worker->lock, key, name); | ||
| 274 | INIT_LIST_HEAD(&worker->work_list); | ||
| 275 | worker->task = NULL; | ||
| 276 | } | ||
| 277 | EXPORT_SYMBOL_GPL(__init_kthread_worker); | ||
| 278 | |||
| 268 | /** | 279 | /** |
| 269 | * kthread_worker_fn - kthread function to process kthread_worker | 280 | * kthread_worker_fn - kthread function to process kthread_worker |
| 270 | * @worker_ptr: pointer to initialized kthread_worker | 281 | * @worker_ptr: pointer to initialized kthread_worker |
diff --git a/kernel/perf_event.c b/kernel/perf_event.c index eac7e3364335..2870feee81dd 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c | |||
| @@ -3824,6 +3824,8 @@ static void perf_event_task_event(struct perf_task_event *task_event) | |||
| 3824 | rcu_read_lock(); | 3824 | rcu_read_lock(); |
| 3825 | list_for_each_entry_rcu(pmu, &pmus, entry) { | 3825 | list_for_each_entry_rcu(pmu, &pmus, entry) { |
| 3826 | cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); | 3826 | cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); |
| 3827 | if (cpuctx->active_pmu != pmu) | ||
| 3828 | goto next; | ||
| 3827 | perf_event_task_ctx(&cpuctx->ctx, task_event); | 3829 | perf_event_task_ctx(&cpuctx->ctx, task_event); |
| 3828 | 3830 | ||
| 3829 | ctx = task_event->task_ctx; | 3831 | ctx = task_event->task_ctx; |
| @@ -3959,6 +3961,8 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event) | |||
| 3959 | rcu_read_lock(); | 3961 | rcu_read_lock(); |
| 3960 | list_for_each_entry_rcu(pmu, &pmus, entry) { | 3962 | list_for_each_entry_rcu(pmu, &pmus, entry) { |
| 3961 | cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); | 3963 | cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); |
| 3964 | if (cpuctx->active_pmu != pmu) | ||
| 3965 | goto next; | ||
| 3962 | perf_event_comm_ctx(&cpuctx->ctx, comm_event); | 3966 | perf_event_comm_ctx(&cpuctx->ctx, comm_event); |
| 3963 | 3967 | ||
| 3964 | ctxn = pmu->task_ctx_nr; | 3968 | ctxn = pmu->task_ctx_nr; |
| @@ -4144,6 +4148,8 @@ got_name: | |||
| 4144 | rcu_read_lock(); | 4148 | rcu_read_lock(); |
| 4145 | list_for_each_entry_rcu(pmu, &pmus, entry) { | 4149 | list_for_each_entry_rcu(pmu, &pmus, entry) { |
| 4146 | cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); | 4150 | cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); |
| 4151 | if (cpuctx->active_pmu != pmu) | ||
| 4152 | goto next; | ||
| 4147 | perf_event_mmap_ctx(&cpuctx->ctx, mmap_event, | 4153 | perf_event_mmap_ctx(&cpuctx->ctx, mmap_event, |
| 4148 | vma->vm_flags & VM_EXEC); | 4154 | vma->vm_flags & VM_EXEC); |
| 4149 | 4155 | ||
| @@ -4713,7 +4719,7 @@ static int perf_swevent_init(struct perf_event *event) | |||
| 4713 | break; | 4719 | break; |
| 4714 | } | 4720 | } |
| 4715 | 4721 | ||
| 4716 | if (event_id > PERF_COUNT_SW_MAX) | 4722 | if (event_id >= PERF_COUNT_SW_MAX) |
| 4717 | return -ENOENT; | 4723 | return -ENOENT; |
| 4718 | 4724 | ||
| 4719 | if (!event->parent) { | 4725 | if (!event->parent) { |
| @@ -5145,20 +5151,36 @@ static void *find_pmu_context(int ctxn) | |||
| 5145 | return NULL; | 5151 | return NULL; |
| 5146 | } | 5152 | } |
| 5147 | 5153 | ||
| 5148 | static void free_pmu_context(void * __percpu cpu_context) | 5154 | static void update_pmu_context(struct pmu *pmu, struct pmu *old_pmu) |
| 5149 | { | 5155 | { |
| 5150 | struct pmu *pmu; | 5156 | int cpu; |
| 5157 | |||
| 5158 | for_each_possible_cpu(cpu) { | ||
| 5159 | struct perf_cpu_context *cpuctx; | ||
| 5160 | |||
| 5161 | cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu); | ||
| 5162 | |||
| 5163 | if (cpuctx->active_pmu == old_pmu) | ||
| 5164 | cpuctx->active_pmu = pmu; | ||
| 5165 | } | ||
| 5166 | } | ||
| 5167 | |||
| 5168 | static void free_pmu_context(struct pmu *pmu) | ||
| 5169 | { | ||
| 5170 | struct pmu *i; | ||
| 5151 | 5171 | ||
| 5152 | mutex_lock(&pmus_lock); | 5172 | mutex_lock(&pmus_lock); |
| 5153 | /* | 5173 | /* |
| 5154 | * Like a real lame refcount. | 5174 | * Like a real lame refcount. |
| 5155 | */ | 5175 | */ |
| 5156 | list_for_each_entry(pmu, &pmus, entry) { | 5176 | list_for_each_entry(i, &pmus, entry) { |
| 5157 | if (pmu->pmu_cpu_context == cpu_context) | 5177 | if (i->pmu_cpu_context == pmu->pmu_cpu_context) { |
| 5178 | update_pmu_context(i, pmu); | ||
| 5158 | goto out; | 5179 | goto out; |
| 5180 | } | ||
| 5159 | } | 5181 | } |
| 5160 | 5182 | ||
| 5161 | free_percpu(cpu_context); | 5183 | free_percpu(pmu->pmu_cpu_context); |
| 5162 | out: | 5184 | out: |
| 5163 | mutex_unlock(&pmus_lock); | 5185 | mutex_unlock(&pmus_lock); |
| 5164 | } | 5186 | } |
| @@ -5190,6 +5212,7 @@ int perf_pmu_register(struct pmu *pmu) | |||
| 5190 | cpuctx->ctx.pmu = pmu; | 5212 | cpuctx->ctx.pmu = pmu; |
| 5191 | cpuctx->jiffies_interval = 1; | 5213 | cpuctx->jiffies_interval = 1; |
| 5192 | INIT_LIST_HEAD(&cpuctx->rotation_list); | 5214 | INIT_LIST_HEAD(&cpuctx->rotation_list); |
| 5215 | cpuctx->active_pmu = pmu; | ||
| 5193 | } | 5216 | } |
| 5194 | 5217 | ||
| 5195 | got_cpu_context: | 5218 | got_cpu_context: |
| @@ -5241,7 +5264,7 @@ void perf_pmu_unregister(struct pmu *pmu) | |||
| 5241 | synchronize_rcu(); | 5264 | synchronize_rcu(); |
| 5242 | 5265 | ||
| 5243 | free_percpu(pmu->pmu_disable_count); | 5266 | free_percpu(pmu->pmu_disable_count); |
| 5244 | free_pmu_context(pmu->pmu_cpu_context); | 5267 | free_pmu_context(pmu); |
| 5245 | } | 5268 | } |
| 5246 | 5269 | ||
| 5247 | struct pmu *perf_init_event(struct perf_event *event) | 5270 | struct pmu *perf_init_event(struct perf_event *event) |
diff --git a/kernel/power/swap.c b/kernel/power/swap.c index baf667bb2794..8c7e4832b9be 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c | |||
| @@ -30,7 +30,7 @@ | |||
| 30 | 30 | ||
| 31 | #include "power.h" | 31 | #include "power.h" |
| 32 | 32 | ||
| 33 | #define HIBERNATE_SIG "LINHIB0001" | 33 | #define HIBERNATE_SIG "S1SUSPEND" |
| 34 | 34 | ||
| 35 | /* | 35 | /* |
| 36 | * The swap map is a data structure used for keeping track of each page | 36 | * The swap map is a data structure used for keeping track of each page |
diff --git a/kernel/power/user.c b/kernel/power/user.c index 1b2ea31e6bd8..c36c3b9e8a84 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c | |||
| @@ -137,7 +137,7 @@ static int snapshot_release(struct inode *inode, struct file *filp) | |||
| 137 | free_all_swap_pages(data->swap); | 137 | free_all_swap_pages(data->swap); |
| 138 | if (data->frozen) | 138 | if (data->frozen) |
| 139 | thaw_processes(); | 139 | thaw_processes(); |
| 140 | pm_notifier_call_chain(data->mode == O_WRONLY ? | 140 | pm_notifier_call_chain(data->mode == O_RDONLY ? |
| 141 | PM_POST_HIBERNATION : PM_POST_RESTORE); | 141 | PM_POST_HIBERNATION : PM_POST_RESTORE); |
| 142 | atomic_inc(&snapshot_device_available); | 142 | atomic_inc(&snapshot_device_available); |
| 143 | 143 | ||
diff --git a/kernel/resource.c b/kernel/resource.c index 9fad33efd0db..798e2fae2a06 100644 --- a/kernel/resource.c +++ b/kernel/resource.c | |||
| @@ -40,23 +40,6 @@ EXPORT_SYMBOL(iomem_resource); | |||
| 40 | 40 | ||
| 41 | static DEFINE_RWLOCK(resource_lock); | 41 | static DEFINE_RWLOCK(resource_lock); |
| 42 | 42 | ||
| 43 | /* | ||
| 44 | * By default, we allocate free space bottom-up. The architecture can request | ||
| 45 | * top-down by clearing this flag. The user can override the architecture's | ||
| 46 | * choice with the "resource_alloc_from_bottom" kernel boot option, but that | ||
| 47 | * should only be a debugging tool. | ||
| 48 | */ | ||
| 49 | int resource_alloc_from_bottom = 1; | ||
| 50 | |||
| 51 | static __init int setup_alloc_from_bottom(char *s) | ||
| 52 | { | ||
| 53 | printk(KERN_INFO | ||
| 54 | "resource: allocating from bottom-up; please report a bug\n"); | ||
| 55 | resource_alloc_from_bottom = 1; | ||
| 56 | return 0; | ||
| 57 | } | ||
| 58 | early_param("resource_alloc_from_bottom", setup_alloc_from_bottom); | ||
| 59 | |||
| 60 | static void *r_next(struct seq_file *m, void *v, loff_t *pos) | 43 | static void *r_next(struct seq_file *m, void *v, loff_t *pos) |
| 61 | { | 44 | { |
| 62 | struct resource *p = v; | 45 | struct resource *p = v; |
| @@ -374,6 +357,10 @@ int __weak page_is_ram(unsigned long pfn) | |||
| 374 | return walk_system_ram_range(pfn, 1, NULL, __is_ram) == 1; | 357 | return walk_system_ram_range(pfn, 1, NULL, __is_ram) == 1; |
| 375 | } | 358 | } |
| 376 | 359 | ||
| 360 | void __weak arch_remove_reservations(struct resource *avail) | ||
| 361 | { | ||
| 362 | } | ||
| 363 | |||
| 377 | static resource_size_t simple_align_resource(void *data, | 364 | static resource_size_t simple_align_resource(void *data, |
| 378 | const struct resource *avail, | 365 | const struct resource *avail, |
| 379 | resource_size_t size, | 366 | resource_size_t size, |
| @@ -397,74 +384,7 @@ static bool resource_contains(struct resource *res1, struct resource *res2) | |||
| 397 | } | 384 | } |
| 398 | 385 | ||
| 399 | /* | 386 | /* |
| 400 | * Find the resource before "child" in the sibling list of "root" children. | ||
| 401 | */ | ||
| 402 | static struct resource *find_sibling_prev(struct resource *root, struct resource *child) | ||
| 403 | { | ||
| 404 | struct resource *this; | ||
| 405 | |||
| 406 | for (this = root->child; this; this = this->sibling) | ||
| 407 | if (this->sibling == child) | ||
| 408 | return this; | ||
| 409 | |||
| 410 | return NULL; | ||
| 411 | } | ||
| 412 | |||
| 413 | /* | ||
| 414 | * Find empty slot in the resource tree given range and alignment. | 387 | * Find empty slot in the resource tree given range and alignment. |
| 415 | * This version allocates from the end of the root resource first. | ||
| 416 | */ | ||
| 417 | static int find_resource_from_top(struct resource *root, struct resource *new, | ||
| 418 | resource_size_t size, resource_size_t min, | ||
| 419 | resource_size_t max, resource_size_t align, | ||
| 420 | resource_size_t (*alignf)(void *, | ||
| 421 | const struct resource *, | ||
| 422 | resource_size_t, | ||
| 423 | resource_size_t), | ||
| 424 | void *alignf_data) | ||
| 425 | { | ||
| 426 | struct resource *this; | ||
| 427 | struct resource tmp, avail, alloc; | ||
| 428 | |||
| 429 | tmp.start = root->end; | ||
| 430 | tmp.end = root->end; | ||
| 431 | |||
| 432 | this = find_sibling_prev(root, NULL); | ||
| 433 | for (;;) { | ||
| 434 | if (this) { | ||
| 435 | if (this->end < root->end) | ||
| 436 | tmp.start = this->end + 1; | ||
| 437 | } else | ||
| 438 | tmp.start = root->start; | ||
| 439 | |||
| 440 | resource_clip(&tmp, min, max); | ||
| 441 | |||
| 442 | /* Check for overflow after ALIGN() */ | ||
| 443 | avail = *new; | ||
| 444 | avail.start = ALIGN(tmp.start, align); | ||
| 445 | avail.end = tmp.end; | ||
| 446 | if (avail.start >= tmp.start) { | ||
| 447 | alloc.start = alignf(alignf_data, &avail, size, align); | ||
| 448 | alloc.end = alloc.start + size - 1; | ||
| 449 | if (resource_contains(&avail, &alloc)) { | ||
| 450 | new->start = alloc.start; | ||
| 451 | new->end = alloc.end; | ||
| 452 | return 0; | ||
| 453 | } | ||
| 454 | } | ||
| 455 | |||
| 456 | if (!this || this->start == root->start) | ||
| 457 | break; | ||
| 458 | |||
| 459 | tmp.end = this->start - 1; | ||
| 460 | this = find_sibling_prev(root, this); | ||
| 461 | } | ||
| 462 | return -EBUSY; | ||
| 463 | } | ||
| 464 | |||
| 465 | /* | ||
| 466 | * Find empty slot in the resource tree given range and alignment. | ||
| 467 | * This version allocates from the beginning of the root resource first. | ||
| 468 | */ | 388 | */ |
| 469 | static int find_resource(struct resource *root, struct resource *new, | 389 | static int find_resource(struct resource *root, struct resource *new, |
| 470 | resource_size_t size, resource_size_t min, | 390 | resource_size_t size, resource_size_t min, |
| @@ -478,23 +398,24 @@ static int find_resource(struct resource *root, struct resource *new, | |||
| 478 | struct resource *this = root->child; | 398 | struct resource *this = root->child; |
| 479 | struct resource tmp = *new, avail, alloc; | 399 | struct resource tmp = *new, avail, alloc; |
| 480 | 400 | ||
| 401 | tmp.flags = new->flags; | ||
| 481 | tmp.start = root->start; | 402 | tmp.start = root->start; |
| 482 | /* | 403 | /* |
| 483 | * Skip past an allocated resource that starts at 0, since the | 404 | * Skip past an allocated resource that starts at 0, since the assignment |
| 484 | * assignment of this->start - 1 to tmp->end below would cause an | 405 | * of this->start - 1 to tmp->end below would cause an underflow. |
| 485 | * underflow. | ||
| 486 | */ | 406 | */ |
| 487 | if (this && this->start == 0) { | 407 | if (this && this->start == 0) { |
| 488 | tmp.start = this->end + 1; | 408 | tmp.start = this->end + 1; |
| 489 | this = this->sibling; | 409 | this = this->sibling; |
| 490 | } | 410 | } |
| 491 | for (;;) { | 411 | for(;;) { |
| 492 | if (this) | 412 | if (this) |
| 493 | tmp.end = this->start - 1; | 413 | tmp.end = this->start - 1; |
| 494 | else | 414 | else |
| 495 | tmp.end = root->end; | 415 | tmp.end = root->end; |
| 496 | 416 | ||
| 497 | resource_clip(&tmp, min, max); | 417 | resource_clip(&tmp, min, max); |
| 418 | arch_remove_reservations(&tmp); | ||
| 498 | 419 | ||
| 499 | /* Check for overflow after ALIGN() */ | 420 | /* Check for overflow after ALIGN() */ |
| 500 | avail = *new; | 421 | avail = *new; |
| @@ -509,10 +430,8 @@ static int find_resource(struct resource *root, struct resource *new, | |||
| 509 | return 0; | 430 | return 0; |
| 510 | } | 431 | } |
| 511 | } | 432 | } |
| 512 | |||
| 513 | if (!this) | 433 | if (!this) |
| 514 | break; | 434 | break; |
| 515 | |||
| 516 | tmp.start = this->end + 1; | 435 | tmp.start = this->end + 1; |
| 517 | this = this->sibling; | 436 | this = this->sibling; |
| 518 | } | 437 | } |
| @@ -545,10 +464,7 @@ int allocate_resource(struct resource *root, struct resource *new, | |||
| 545 | alignf = simple_align_resource; | 464 | alignf = simple_align_resource; |
| 546 | 465 | ||
| 547 | write_lock(&resource_lock); | 466 | write_lock(&resource_lock); |
| 548 | if (resource_alloc_from_bottom) | 467 | err = find_resource(root, new, size, min, max, align, alignf, alignf_data); |
| 549 | err = find_resource(root, new, size, min, max, align, alignf, alignf_data); | ||
| 550 | else | ||
| 551 | err = find_resource_from_top(root, new, size, min, max, align, alignf, alignf_data); | ||
| 552 | if (err >= 0 && __request_resource(root, new)) | 468 | if (err >= 0 && __request_resource(root, new)) |
| 553 | err = -EBUSY; | 469 | err = -EBUSY; |
| 554 | write_unlock(&resource_lock); | 470 | write_unlock(&resource_lock); |
diff --git a/kernel/sched.c b/kernel/sched.c index 9f9dd8dda53c..f2f914e0c47c 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
| @@ -642,22 +642,18 @@ static inline struct task_group *task_group(struct task_struct *p) | |||
| 642 | 642 | ||
| 643 | #endif /* CONFIG_CGROUP_SCHED */ | 643 | #endif /* CONFIG_CGROUP_SCHED */ |
| 644 | 644 | ||
| 645 | static u64 irq_time_cpu(int cpu); | 645 | static void update_rq_clock_task(struct rq *rq, s64 delta); |
| 646 | static void sched_irq_time_avg_update(struct rq *rq, u64 irq_time); | ||
| 647 | 646 | ||
| 648 | inline void update_rq_clock(struct rq *rq) | 647 | static void update_rq_clock(struct rq *rq) |
| 649 | { | 648 | { |
| 650 | if (!rq->skip_clock_update) { | 649 | s64 delta; |
| 651 | int cpu = cpu_of(rq); | ||
| 652 | u64 irq_time; | ||
| 653 | 650 | ||
| 654 | rq->clock = sched_clock_cpu(cpu); | 651 | if (rq->skip_clock_update) |
| 655 | irq_time = irq_time_cpu(cpu); | 652 | return; |
| 656 | if (rq->clock - irq_time > rq->clock_task) | ||
| 657 | rq->clock_task = rq->clock - irq_time; | ||
| 658 | 653 | ||
| 659 | sched_irq_time_avg_update(rq, irq_time); | 654 | delta = sched_clock_cpu(cpu_of(rq)) - rq->clock; |
| 660 | } | 655 | rq->clock += delta; |
| 656 | update_rq_clock_task(rq, delta); | ||
| 661 | } | 657 | } |
| 662 | 658 | ||
| 663 | /* | 659 | /* |
| @@ -1795,10 +1791,9 @@ static void deactivate_task(struct rq *rq, struct task_struct *p, int flags) | |||
| 1795 | * They are read and saved off onto struct rq in update_rq_clock(). | 1791 | * They are read and saved off onto struct rq in update_rq_clock(). |
| 1796 | * This may result in other CPU reading this CPU's irq time and can | 1792 | * This may result in other CPU reading this CPU's irq time and can |
| 1797 | * race with irq/account_system_vtime on this CPU. We would either get old | 1793 | * race with irq/account_system_vtime on this CPU. We would either get old |
| 1798 | * or new value (or semi updated value on 32 bit) with a side effect of | 1794 | * or new value with a side effect of accounting a slice of irq time to wrong |
| 1799 | * accounting a slice of irq time to wrong task when irq is in progress | 1795 | * task when irq is in progress while we read rq->clock. That is a worthy |
| 1800 | * while we read rq->clock. That is a worthy compromise in place of having | 1796 | * compromise in place of having locks on each irq in account_system_time. |
| 1801 | * locks on each irq in account_system_time. | ||
| 1802 | */ | 1797 | */ |
| 1803 | static DEFINE_PER_CPU(u64, cpu_hardirq_time); | 1798 | static DEFINE_PER_CPU(u64, cpu_hardirq_time); |
| 1804 | static DEFINE_PER_CPU(u64, cpu_softirq_time); | 1799 | static DEFINE_PER_CPU(u64, cpu_softirq_time); |
| @@ -1816,19 +1811,58 @@ void disable_sched_clock_irqtime(void) | |||
| 1816 | sched_clock_irqtime = 0; | 1811 | sched_clock_irqtime = 0; |
| 1817 | } | 1812 | } |
| 1818 | 1813 | ||
| 1819 | static u64 irq_time_cpu(int cpu) | 1814 | #ifndef CONFIG_64BIT |
| 1815 | static DEFINE_PER_CPU(seqcount_t, irq_time_seq); | ||
| 1816 | |||
| 1817 | static inline void irq_time_write_begin(void) | ||
| 1820 | { | 1818 | { |
| 1821 | if (!sched_clock_irqtime) | 1819 | __this_cpu_inc(irq_time_seq.sequence); |
| 1822 | return 0; | 1820 | smp_wmb(); |
| 1821 | } | ||
| 1822 | |||
| 1823 | static inline void irq_time_write_end(void) | ||
| 1824 | { | ||
| 1825 | smp_wmb(); | ||
| 1826 | __this_cpu_inc(irq_time_seq.sequence); | ||
| 1827 | } | ||
| 1828 | |||
| 1829 | static inline u64 irq_time_read(int cpu) | ||
| 1830 | { | ||
| 1831 | u64 irq_time; | ||
| 1832 | unsigned seq; | ||
| 1823 | 1833 | ||
| 1834 | do { | ||
| 1835 | seq = read_seqcount_begin(&per_cpu(irq_time_seq, cpu)); | ||
| 1836 | irq_time = per_cpu(cpu_softirq_time, cpu) + | ||
| 1837 | per_cpu(cpu_hardirq_time, cpu); | ||
| 1838 | } while (read_seqcount_retry(&per_cpu(irq_time_seq, cpu), seq)); | ||
| 1839 | |||
| 1840 | return irq_time; | ||
| 1841 | } | ||
| 1842 | #else /* CONFIG_64BIT */ | ||
| 1843 | static inline void irq_time_write_begin(void) | ||
| 1844 | { | ||
| 1845 | } | ||
| 1846 | |||
| 1847 | static inline void irq_time_write_end(void) | ||
| 1848 | { | ||
| 1849 | } | ||
| 1850 | |||
| 1851 | static inline u64 irq_time_read(int cpu) | ||
| 1852 | { | ||
| 1824 | return per_cpu(cpu_softirq_time, cpu) + per_cpu(cpu_hardirq_time, cpu); | 1853 | return per_cpu(cpu_softirq_time, cpu) + per_cpu(cpu_hardirq_time, cpu); |
| 1825 | } | 1854 | } |
| 1855 | #endif /* CONFIG_64BIT */ | ||
| 1826 | 1856 | ||
| 1857 | /* | ||
| 1858 | * Called before incrementing preempt_count on {soft,}irq_enter | ||
| 1859 | * and before decrementing preempt_count on {soft,}irq_exit. | ||
| 1860 | */ | ||
| 1827 | void account_system_vtime(struct task_struct *curr) | 1861 | void account_system_vtime(struct task_struct *curr) |
| 1828 | { | 1862 | { |
| 1829 | unsigned long flags; | 1863 | unsigned long flags; |
| 1864 | s64 delta; | ||
| 1830 | int cpu; | 1865 | int cpu; |
| 1831 | u64 now, delta; | ||
| 1832 | 1866 | ||
| 1833 | if (!sched_clock_irqtime) | 1867 | if (!sched_clock_irqtime) |
| 1834 | return; | 1868 | return; |
| @@ -1836,9 +1870,10 @@ void account_system_vtime(struct task_struct *curr) | |||
| 1836 | local_irq_save(flags); | 1870 | local_irq_save(flags); |
| 1837 | 1871 | ||
| 1838 | cpu = smp_processor_id(); | 1872 | cpu = smp_processor_id(); |
| 1839 | now = sched_clock_cpu(cpu); | 1873 | delta = sched_clock_cpu(cpu) - __this_cpu_read(irq_start_time); |
| 1840 | delta = now - per_cpu(irq_start_time, cpu); | 1874 | __this_cpu_add(irq_start_time, delta); |
| 1841 | per_cpu(irq_start_time, cpu) = now; | 1875 | |
| 1876 | irq_time_write_begin(); | ||
| 1842 | /* | 1877 | /* |
| 1843 | * We do not account for softirq time from ksoftirqd here. | 1878 | * We do not account for softirq time from ksoftirqd here. |
| 1844 | * We want to continue accounting softirq time to ksoftirqd thread | 1879 | * We want to continue accounting softirq time to ksoftirqd thread |
| @@ -1846,33 +1881,55 @@ void account_system_vtime(struct task_struct *curr) | |||
| 1846 | * that do not consume any time, but still wants to run. | 1881 | * that do not consume any time, but still wants to run. |
| 1847 | */ | 1882 | */ |
| 1848 | if (hardirq_count()) | 1883 | if (hardirq_count()) |
| 1849 | per_cpu(cpu_hardirq_time, cpu) += delta; | 1884 | __this_cpu_add(cpu_hardirq_time, delta); |
| 1850 | else if (in_serving_softirq() && !(curr->flags & PF_KSOFTIRQD)) | 1885 | else if (in_serving_softirq() && !(curr->flags & PF_KSOFTIRQD)) |
| 1851 | per_cpu(cpu_softirq_time, cpu) += delta; | 1886 | __this_cpu_add(cpu_softirq_time, delta); |
| 1852 | 1887 | ||
| 1888 | irq_time_write_end(); | ||
| 1853 | local_irq_restore(flags); | 1889 | local_irq_restore(flags); |
| 1854 | } | 1890 | } |
| 1855 | EXPORT_SYMBOL_GPL(account_system_vtime); | 1891 | EXPORT_SYMBOL_GPL(account_system_vtime); |
| 1856 | 1892 | ||
| 1857 | static void sched_irq_time_avg_update(struct rq *rq, u64 curr_irq_time) | 1893 | static void update_rq_clock_task(struct rq *rq, s64 delta) |
| 1858 | { | 1894 | { |
| 1859 | if (sched_clock_irqtime && sched_feat(NONIRQ_POWER)) { | 1895 | s64 irq_delta; |
| 1860 | u64 delta_irq = curr_irq_time - rq->prev_irq_time; | 1896 | |
| 1861 | rq->prev_irq_time = curr_irq_time; | 1897 | irq_delta = irq_time_read(cpu_of(rq)) - rq->prev_irq_time; |
| 1862 | sched_rt_avg_update(rq, delta_irq); | 1898 | |
| 1863 | } | 1899 | /* |
| 1900 | * Since irq_time is only updated on {soft,}irq_exit, we might run into | ||
| 1901 | * this case when a previous update_rq_clock() happened inside a | ||
| 1902 | * {soft,}irq region. | ||
| 1903 | * | ||
| 1904 | * When this happens, we stop ->clock_task and only update the | ||
| 1905 | * prev_irq_time stamp to account for the part that fit, so that a next | ||
| 1906 | * update will consume the rest. This ensures ->clock_task is | ||
| 1907 | * monotonic. | ||
| 1908 | * | ||
| 1909 | * It does however cause some slight miss-attribution of {soft,}irq | ||
| 1910 | * time, a more accurate solution would be to update the irq_time using | ||
| 1911 | * the current rq->clock timestamp, except that would require using | ||
| 1912 | * atomic ops. | ||
| 1913 | */ | ||
| 1914 | if (irq_delta > delta) | ||
| 1915 | irq_delta = delta; | ||
| 1916 | |||
| 1917 | rq->prev_irq_time += irq_delta; | ||
| 1918 | delta -= irq_delta; | ||
| 1919 | rq->clock_task += delta; | ||
| 1920 | |||
| 1921 | if (irq_delta && sched_feat(NONIRQ_POWER)) | ||
| 1922 | sched_rt_avg_update(rq, irq_delta); | ||
| 1864 | } | 1923 | } |
| 1865 | 1924 | ||
| 1866 | #else | 1925 | #else /* CONFIG_IRQ_TIME_ACCOUNTING */ |
| 1867 | 1926 | ||
| 1868 | static u64 irq_time_cpu(int cpu) | 1927 | static void update_rq_clock_task(struct rq *rq, s64 delta) |
| 1869 | { | 1928 | { |
| 1870 | return 0; | 1929 | rq->clock_task += delta; |
| 1871 | } | 1930 | } |
| 1872 | 1931 | ||
| 1873 | static void sched_irq_time_avg_update(struct rq *rq, u64 curr_irq_time) { } | 1932 | #endif /* CONFIG_IRQ_TIME_ACCOUNTING */ |
| 1874 | |||
| 1875 | #endif | ||
| 1876 | 1933 | ||
| 1877 | #include "sched_idletask.c" | 1934 | #include "sched_idletask.c" |
| 1878 | #include "sched_fair.c" | 1935 | #include "sched_fair.c" |
| @@ -2001,7 +2058,7 @@ static void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags) | |||
| 2001 | * A queue event has occurred, and we're going to schedule. In | 2058 | * A queue event has occurred, and we're going to schedule. In |
| 2002 | * this case, we can save a useless back to back clock update. | 2059 | * this case, we can save a useless back to back clock update. |
| 2003 | */ | 2060 | */ |
| 2004 | if (test_tsk_need_resched(rq->curr)) | 2061 | if (rq->curr->se.on_rq && test_tsk_need_resched(rq->curr)) |
| 2005 | rq->skip_clock_update = 1; | 2062 | rq->skip_clock_update = 1; |
| 2006 | } | 2063 | } |
| 2007 | 2064 | ||
| @@ -2988,6 +3045,15 @@ static long calc_load_fold_active(struct rq *this_rq) | |||
| 2988 | return delta; | 3045 | return delta; |
| 2989 | } | 3046 | } |
| 2990 | 3047 | ||
| 3048 | static unsigned long | ||
| 3049 | calc_load(unsigned long load, unsigned long exp, unsigned long active) | ||
| 3050 | { | ||
| 3051 | load *= exp; | ||
| 3052 | load += active * (FIXED_1 - exp); | ||
| 3053 | load += 1UL << (FSHIFT - 1); | ||
| 3054 | return load >> FSHIFT; | ||
| 3055 | } | ||
| 3056 | |||
| 2991 | #ifdef CONFIG_NO_HZ | 3057 | #ifdef CONFIG_NO_HZ |
| 2992 | /* | 3058 | /* |
| 2993 | * For NO_HZ we delay the active fold to the next LOAD_FREQ update. | 3059 | * For NO_HZ we delay the active fold to the next LOAD_FREQ update. |
| @@ -3017,6 +3083,128 @@ static long calc_load_fold_idle(void) | |||
| 3017 | 3083 | ||
| 3018 | return delta; | 3084 | return delta; |
| 3019 | } | 3085 | } |
| 3086 | |||
| 3087 | /** | ||
| 3088 | * fixed_power_int - compute: x^n, in O(log n) time | ||
| 3089 | * | ||
| 3090 | * @x: base of the power | ||
| 3091 | * @frac_bits: fractional bits of @x | ||
| 3092 | * @n: power to raise @x to. | ||
| 3093 | * | ||
| 3094 | * By exploiting the relation between the definition of the natural power | ||
| 3095 | * function: x^n := x*x*...*x (x multiplied by itself for n times), and | ||
| 3096 | * the binary encoding of numbers used by computers: n := \Sum n_i * 2^i, | ||
| 3097 | * (where: n_i \elem {0, 1}, the binary vector representing n), | ||
| 3098 | * we find: x^n := x^(\Sum n_i * 2^i) := \Prod x^(n_i * 2^i), which is | ||
| 3099 | * of course trivially computable in O(log_2 n), the length of our binary | ||
| 3100 | * vector. | ||
| 3101 | */ | ||
| 3102 | static unsigned long | ||
| 3103 | fixed_power_int(unsigned long x, unsigned int frac_bits, unsigned int n) | ||
| 3104 | { | ||
| 3105 | unsigned long result = 1UL << frac_bits; | ||
| 3106 | |||
| 3107 | if (n) for (;;) { | ||
| 3108 | if (n & 1) { | ||
| 3109 | result *= x; | ||
| 3110 | result += 1UL << (frac_bits - 1); | ||
| 3111 | result >>= frac_bits; | ||
| 3112 | } | ||
| 3113 | n >>= 1; | ||
| 3114 | if (!n) | ||
| 3115 | break; | ||
| 3116 | x *= x; | ||
| 3117 | x += 1UL << (frac_bits - 1); | ||
| 3118 | x >>= frac_bits; | ||
| 3119 | } | ||
| 3120 | |||
| 3121 | return result; | ||
| 3122 | } | ||
| 3123 | |||
| 3124 | /* | ||
| 3125 | * a1 = a0 * e + a * (1 - e) | ||
| 3126 | * | ||
| 3127 | * a2 = a1 * e + a * (1 - e) | ||
| 3128 | * = (a0 * e + a * (1 - e)) * e + a * (1 - e) | ||
| 3129 | * = a0 * e^2 + a * (1 - e) * (1 + e) | ||
| 3130 | * | ||
| 3131 | * a3 = a2 * e + a * (1 - e) | ||
| 3132 | * = (a0 * e^2 + a * (1 - e) * (1 + e)) * e + a * (1 - e) | ||
| 3133 | * = a0 * e^3 + a * (1 - e) * (1 + e + e^2) | ||
| 3134 | * | ||
| 3135 | * ... | ||
| 3136 | * | ||
| 3137 | * an = a0 * e^n + a * (1 - e) * (1 + e + ... + e^n-1) [1] | ||
| 3138 | * = a0 * e^n + a * (1 - e) * (1 - e^n)/(1 - e) | ||
| 3139 | * = a0 * e^n + a * (1 - e^n) | ||
| 3140 | * | ||
| 3141 | * [1] application of the geometric series: | ||
| 3142 | * | ||
| 3143 | * n 1 - x^(n+1) | ||
| 3144 | * S_n := \Sum x^i = ------------- | ||
| 3145 | * i=0 1 - x | ||
| 3146 | */ | ||
| 3147 | static unsigned long | ||
| 3148 | calc_load_n(unsigned long load, unsigned long exp, | ||
| 3149 | unsigned long active, unsigned int n) | ||
| 3150 | { | ||
| 3151 | |||
| 3152 | return calc_load(load, fixed_power_int(exp, FSHIFT, n), active); | ||
| 3153 | } | ||
| 3154 | |||
| 3155 | /* | ||
| 3156 | * NO_HZ can leave us missing all per-cpu ticks calling | ||
| 3157 | * calc_load_account_active(), but since an idle CPU folds its delta into | ||
| 3158 | * calc_load_tasks_idle per calc_load_account_idle(), all we need to do is fold | ||
| 3159 | * in the pending idle delta if our idle period crossed a load cycle boundary. | ||
| 3160 | * | ||
| 3161 | * Once we've updated the global active value, we need to apply the exponential | ||
| 3162 | * weights adjusted to the number of cycles missed. | ||
| 3163 | */ | ||
| 3164 | static void calc_global_nohz(unsigned long ticks) | ||
| 3165 | { | ||
| 3166 | long delta, active, n; | ||
| 3167 | |||
| 3168 | if (time_before(jiffies, calc_load_update)) | ||
| 3169 | return; | ||
| 3170 | |||
| 3171 | /* | ||
| 3172 | * If we crossed a calc_load_update boundary, make sure to fold | ||
| 3173 | * any pending idle changes, the respective CPUs might have | ||
| 3174 | * missed the tick driven calc_load_account_active() update | ||
| 3175 | * due to NO_HZ. | ||
| 3176 | */ | ||
| 3177 | delta = calc_load_fold_idle(); | ||
| 3178 | if (delta) | ||
| 3179 | atomic_long_add(delta, &calc_load_tasks); | ||
| 3180 | |||
| 3181 | /* | ||
| 3182 | * If we were idle for multiple load cycles, apply them. | ||
| 3183 | */ | ||
| 3184 | if (ticks >= LOAD_FREQ) { | ||
| 3185 | n = ticks / LOAD_FREQ; | ||
| 3186 | |||
| 3187 | active = atomic_long_read(&calc_load_tasks); | ||
| 3188 | active = active > 0 ? active * FIXED_1 : 0; | ||
| 3189 | |||
| 3190 | avenrun[0] = calc_load_n(avenrun[0], EXP_1, active, n); | ||
| 3191 | avenrun[1] = calc_load_n(avenrun[1], EXP_5, active, n); | ||
| 3192 | avenrun[2] = calc_load_n(avenrun[2], EXP_15, active, n); | ||
| 3193 | |||
| 3194 | calc_load_update += n * LOAD_FREQ; | ||
| 3195 | } | ||
| 3196 | |||
| 3197 | /* | ||
| 3198 | * Its possible the remainder of the above division also crosses | ||
| 3199 | * a LOAD_FREQ period, the regular check in calc_global_load() | ||
| 3200 | * which comes after this will take care of that. | ||
| 3201 | * | ||
| 3202 | * Consider us being 11 ticks before a cycle completion, and us | ||
| 3203 | * sleeping for 4*LOAD_FREQ + 22 ticks, then the above code will | ||
| 3204 | * age us 4 cycles, and the test in calc_global_load() will | ||
| 3205 | * pick up the final one. | ||
| 3206 | */ | ||
| 3207 | } | ||
| 3020 | #else | 3208 | #else |
| 3021 | static void calc_load_account_idle(struct rq *this_rq) | 3209 | static void calc_load_account_idle(struct rq *this_rq) |
| 3022 | { | 3210 | { |
| @@ -3026,6 +3214,10 @@ static inline long calc_load_fold_idle(void) | |||
| 3026 | { | 3214 | { |
| 3027 | return 0; | 3215 | return 0; |
| 3028 | } | 3216 | } |
| 3217 | |||
| 3218 | static void calc_global_nohz(unsigned long ticks) | ||
| 3219 | { | ||
| 3220 | } | ||
| 3029 | #endif | 3221 | #endif |
| 3030 | 3222 | ||
| 3031 | /** | 3223 | /** |
| @@ -3043,24 +3235,17 @@ void get_avenrun(unsigned long *loads, unsigned long offset, int shift) | |||
| 3043 | loads[2] = (avenrun[2] + offset) << shift; | 3235 | loads[2] = (avenrun[2] + offset) << shift; |
| 3044 | } | 3236 | } |
| 3045 | 3237 | ||
| 3046 | static unsigned long | ||
| 3047 | calc_load(unsigned long load, unsigned long exp, unsigned long active) | ||
| 3048 | { | ||
| 3049 | load *= exp; | ||
| 3050 | load += active * (FIXED_1 - exp); | ||
| 3051 | return load >> FSHIFT; | ||
| 3052 | } | ||
| 3053 | |||
| 3054 | /* | 3238 | /* |
| 3055 | * calc_load - update the avenrun load estimates 10 ticks after the | 3239 | * calc_load - update the avenrun load estimates 10 ticks after the |
| 3056 | * CPUs have updated calc_load_tasks. | 3240 | * CPUs have updated calc_load_tasks. |
| 3057 | */ | 3241 | */ |
| 3058 | void calc_global_load(void) | 3242 | void calc_global_load(unsigned long ticks) |
| 3059 | { | 3243 | { |
| 3060 | unsigned long upd = calc_load_update + 10; | ||
| 3061 | long active; | 3244 | long active; |
| 3062 | 3245 | ||
| 3063 | if (time_before(jiffies, upd)) | 3246 | calc_global_nohz(ticks); |
| 3247 | |||
| 3248 | if (time_before(jiffies, calc_load_update + 10)) | ||
| 3064 | return; | 3249 | return; |
| 3065 | 3250 | ||
| 3066 | active = atomic_long_read(&calc_load_tasks); | 3251 | active = atomic_long_read(&calc_load_tasks); |
| @@ -3714,7 +3899,6 @@ static void put_prev_task(struct rq *rq, struct task_struct *prev) | |||
| 3714 | { | 3899 | { |
| 3715 | if (prev->se.on_rq) | 3900 | if (prev->se.on_rq) |
| 3716 | update_rq_clock(rq); | 3901 | update_rq_clock(rq); |
| 3717 | rq->skip_clock_update = 0; | ||
| 3718 | prev->sched_class->put_prev_task(rq, prev); | 3902 | prev->sched_class->put_prev_task(rq, prev); |
| 3719 | } | 3903 | } |
| 3720 | 3904 | ||
| @@ -3772,7 +3956,6 @@ need_resched_nonpreemptible: | |||
| 3772 | hrtick_clear(rq); | 3956 | hrtick_clear(rq); |
| 3773 | 3957 | ||
| 3774 | raw_spin_lock_irq(&rq->lock); | 3958 | raw_spin_lock_irq(&rq->lock); |
| 3775 | clear_tsk_need_resched(prev); | ||
| 3776 | 3959 | ||
| 3777 | switch_count = &prev->nivcsw; | 3960 | switch_count = &prev->nivcsw; |
| 3778 | if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) { | 3961 | if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) { |
| @@ -3804,6 +3987,8 @@ need_resched_nonpreemptible: | |||
| 3804 | 3987 | ||
| 3805 | put_prev_task(rq, prev); | 3988 | put_prev_task(rq, prev); |
| 3806 | next = pick_next_task(rq); | 3989 | next = pick_next_task(rq); |
| 3990 | clear_tsk_need_resched(prev); | ||
| 3991 | rq->skip_clock_update = 0; | ||
| 3807 | 3992 | ||
| 3808 | if (likely(prev != next)) { | 3993 | if (likely(prev != next)) { |
| 3809 | sched_info_switch(prev, next); | 3994 | sched_info_switch(prev, next); |
diff --git a/kernel/taskstats.c b/kernel/taskstats.c index c8231fb15708..3308fd7f1b52 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c | |||
| @@ -349,25 +349,47 @@ static int parse(struct nlattr *na, struct cpumask *mask) | |||
| 349 | return ret; | 349 | return ret; |
| 350 | } | 350 | } |
| 351 | 351 | ||
| 352 | #ifdef CONFIG_IA64 | ||
| 353 | #define TASKSTATS_NEEDS_PADDING 1 | ||
| 354 | #endif | ||
| 355 | |||
| 352 | static struct taskstats *mk_reply(struct sk_buff *skb, int type, u32 pid) | 356 | static struct taskstats *mk_reply(struct sk_buff *skb, int type, u32 pid) |
| 353 | { | 357 | { |
| 354 | struct nlattr *na, *ret; | 358 | struct nlattr *na, *ret; |
| 355 | int aggr; | 359 | int aggr; |
| 356 | 360 | ||
| 357 | /* If we don't pad, we end up with alignment on a 4 byte boundary. | ||
| 358 | * This causes lots of runtime warnings on systems requiring 8 byte | ||
| 359 | * alignment */ | ||
| 360 | u32 pids[2] = { pid, 0 }; | ||
| 361 | int pid_size = ALIGN(sizeof(pid), sizeof(long)); | ||
| 362 | |||
| 363 | aggr = (type == TASKSTATS_TYPE_PID) | 361 | aggr = (type == TASKSTATS_TYPE_PID) |
| 364 | ? TASKSTATS_TYPE_AGGR_PID | 362 | ? TASKSTATS_TYPE_AGGR_PID |
| 365 | : TASKSTATS_TYPE_AGGR_TGID; | 363 | : TASKSTATS_TYPE_AGGR_TGID; |
| 366 | 364 | ||
| 365 | /* | ||
| 366 | * The taskstats structure is internally aligned on 8 byte | ||
| 367 | * boundaries but the layout of the aggregrate reply, with | ||
| 368 | * two NLA headers and the pid (each 4 bytes), actually | ||
| 369 | * force the entire structure to be unaligned. This causes | ||
| 370 | * the kernel to issue unaligned access warnings on some | ||
| 371 | * architectures like ia64. Unfortunately, some software out there | ||
| 372 | * doesn't properly unroll the NLA packet and assumes that the start | ||
| 373 | * of the taskstats structure will always be 20 bytes from the start | ||
| 374 | * of the netlink payload. Aligning the start of the taskstats | ||
| 375 | * structure breaks this software, which we don't want. So, for now | ||
| 376 | * the alignment only happens on architectures that require it | ||
| 377 | * and those users will have to update to fixed versions of those | ||
| 378 | * packages. Space is reserved in the packet only when needed. | ||
| 379 | * This ifdef should be removed in several years e.g. 2012 once | ||
| 380 | * we can be confident that fixed versions are installed on most | ||
| 381 | * systems. We add the padding before the aggregate since the | ||
| 382 | * aggregate is already a defined type. | ||
| 383 | */ | ||
| 384 | #ifdef TASKSTATS_NEEDS_PADDING | ||
| 385 | if (nla_put(skb, TASKSTATS_TYPE_NULL, 0, NULL) < 0) | ||
| 386 | goto err; | ||
| 387 | #endif | ||
| 367 | na = nla_nest_start(skb, aggr); | 388 | na = nla_nest_start(skb, aggr); |
| 368 | if (!na) | 389 | if (!na) |
| 369 | goto err; | 390 | goto err; |
| 370 | if (nla_put(skb, type, pid_size, pids) < 0) | 391 | |
| 392 | if (nla_put(skb, type, sizeof(pid), &pid) < 0) | ||
| 371 | goto err; | 393 | goto err; |
| 372 | ret = nla_reserve(skb, TASKSTATS_TYPE_STATS, sizeof(struct taskstats)); | 394 | ret = nla_reserve(skb, TASKSTATS_TYPE_STATS, sizeof(struct taskstats)); |
| 373 | if (!ret) | 395 | if (!ret) |
| @@ -456,6 +478,18 @@ out: | |||
| 456 | return rc; | 478 | return rc; |
| 457 | } | 479 | } |
| 458 | 480 | ||
| 481 | static size_t taskstats_packet_size(void) | ||
| 482 | { | ||
| 483 | size_t size; | ||
| 484 | |||
| 485 | size = nla_total_size(sizeof(u32)) + | ||
| 486 | nla_total_size(sizeof(struct taskstats)) + nla_total_size(0); | ||
| 487 | #ifdef TASKSTATS_NEEDS_PADDING | ||
| 488 | size += nla_total_size(0); /* Padding for alignment */ | ||
| 489 | #endif | ||
| 490 | return size; | ||
| 491 | } | ||
| 492 | |||
| 459 | static int cmd_attr_pid(struct genl_info *info) | 493 | static int cmd_attr_pid(struct genl_info *info) |
| 460 | { | 494 | { |
| 461 | struct taskstats *stats; | 495 | struct taskstats *stats; |
| @@ -464,8 +498,7 @@ static int cmd_attr_pid(struct genl_info *info) | |||
| 464 | u32 pid; | 498 | u32 pid; |
| 465 | int rc; | 499 | int rc; |
| 466 | 500 | ||
| 467 | size = nla_total_size(sizeof(u32)) + | 501 | size = taskstats_packet_size(); |
| 468 | nla_total_size(sizeof(struct taskstats)) + nla_total_size(0); | ||
| 469 | 502 | ||
| 470 | rc = prepare_reply(info, TASKSTATS_CMD_NEW, &rep_skb, size); | 503 | rc = prepare_reply(info, TASKSTATS_CMD_NEW, &rep_skb, size); |
| 471 | if (rc < 0) | 504 | if (rc < 0) |
| @@ -494,8 +527,7 @@ static int cmd_attr_tgid(struct genl_info *info) | |||
| 494 | u32 tgid; | 527 | u32 tgid; |
| 495 | int rc; | 528 | int rc; |
| 496 | 529 | ||
| 497 | size = nla_total_size(sizeof(u32)) + | 530 | size = taskstats_packet_size(); |
| 498 | nla_total_size(sizeof(struct taskstats)) + nla_total_size(0); | ||
| 499 | 531 | ||
| 500 | rc = prepare_reply(info, TASKSTATS_CMD_NEW, &rep_skb, size); | 532 | rc = prepare_reply(info, TASKSTATS_CMD_NEW, &rep_skb, size); |
| 501 | if (rc < 0) | 533 | if (rc < 0) |
| @@ -570,8 +602,7 @@ void taskstats_exit(struct task_struct *tsk, int group_dead) | |||
| 570 | /* | 602 | /* |
| 571 | * Size includes space for nested attributes | 603 | * Size includes space for nested attributes |
| 572 | */ | 604 | */ |
| 573 | size = nla_total_size(sizeof(u32)) + | 605 | size = taskstats_packet_size(); |
| 574 | nla_total_size(sizeof(struct taskstats)) + nla_total_size(0); | ||
| 575 | 606 | ||
| 576 | is_thread_group = !!taskstats_tgid_alloc(tsk); | 607 | is_thread_group = !!taskstats_tgid_alloc(tsk); |
| 577 | if (is_thread_group) { | 608 | if (is_thread_group) { |
diff --git a/kernel/timer.c b/kernel/timer.c index 68a9ae7679b7..353b9227c2ec 100644 --- a/kernel/timer.c +++ b/kernel/timer.c | |||
| @@ -1252,6 +1252,12 @@ unsigned long get_next_timer_interrupt(unsigned long now) | |||
| 1252 | struct tvec_base *base = __get_cpu_var(tvec_bases); | 1252 | struct tvec_base *base = __get_cpu_var(tvec_bases); |
| 1253 | unsigned long expires; | 1253 | unsigned long expires; |
| 1254 | 1254 | ||
| 1255 | /* | ||
| 1256 | * Pretend that there is no timer pending if the cpu is offline. | ||
| 1257 | * Possible pending timers will be migrated later to an active cpu. | ||
| 1258 | */ | ||
| 1259 | if (cpu_is_offline(smp_processor_id())) | ||
| 1260 | return now + NEXT_TIMER_MAX_DELTA; | ||
| 1255 | spin_lock(&base->lock); | 1261 | spin_lock(&base->lock); |
| 1256 | if (time_before_eq(base->next_timer, base->timer_jiffies)) | 1262 | if (time_before_eq(base->next_timer, base->timer_jiffies)) |
| 1257 | base->next_timer = __next_timer_interrupt(base); | 1263 | base->next_timer = __next_timer_interrupt(base); |
| @@ -1319,7 +1325,7 @@ void do_timer(unsigned long ticks) | |||
| 1319 | { | 1325 | { |
| 1320 | jiffies_64 += ticks; | 1326 | jiffies_64 += ticks; |
| 1321 | update_wall_time(); | 1327 | update_wall_time(); |
| 1322 | calc_global_load(); | 1328 | calc_global_load(ticks); |
| 1323 | } | 1329 | } |
| 1324 | 1330 | ||
| 1325 | #ifdef __ARCH_WANT_SYS_ALARM | 1331 | #ifdef __ARCH_WANT_SYS_ALARM |
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 9ed509a015d8..bd1c35a4fbcc 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c | |||
| @@ -3853,6 +3853,13 @@ int ring_buffer_read_page(struct ring_buffer *buffer, | |||
| 3853 | 3853 | ||
| 3854 | /* Need to copy one event at a time */ | 3854 | /* Need to copy one event at a time */ |
| 3855 | do { | 3855 | do { |
| 3856 | /* We need the size of one event, because | ||
| 3857 | * rb_advance_reader only advances by one event, | ||
| 3858 | * whereas rb_event_ts_length may include the size of | ||
| 3859 | * one or two events. | ||
| 3860 | * We have already ensured there's enough space if this | ||
| 3861 | * is a time extend. */ | ||
| 3862 | size = rb_event_length(event); | ||
| 3856 | memcpy(bpage->data + pos, rpage->data + rpos, size); | 3863 | memcpy(bpage->data + pos, rpage->data + rpos, size); |
| 3857 | 3864 | ||
| 3858 | len -= size; | 3865 | len -= size; |
| @@ -3867,7 +3874,7 @@ int ring_buffer_read_page(struct ring_buffer *buffer, | |||
| 3867 | event = rb_reader_event(cpu_buffer); | 3874 | event = rb_reader_event(cpu_buffer); |
| 3868 | /* Always keep the time extend and data together */ | 3875 | /* Always keep the time extend and data together */ |
| 3869 | size = rb_event_ts_length(event); | 3876 | size = rb_event_ts_length(event); |
| 3870 | } while (len > size); | 3877 | } while (len >= size); |
| 3871 | 3878 | ||
| 3872 | /* update bpage */ | 3879 | /* update bpage */ |
| 3873 | local_set(&bpage->commit, pos); | 3880 | local_set(&bpage->commit, pos); |
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index c380612273bf..f8cf959bad45 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c | |||
| @@ -2338,11 +2338,19 @@ tracing_write_stub(struct file *filp, const char __user *ubuf, | |||
| 2338 | return count; | 2338 | return count; |
| 2339 | } | 2339 | } |
| 2340 | 2340 | ||
| 2341 | static loff_t tracing_seek(struct file *file, loff_t offset, int origin) | ||
| 2342 | { | ||
| 2343 | if (file->f_mode & FMODE_READ) | ||
| 2344 | return seq_lseek(file, offset, origin); | ||
| 2345 | else | ||
| 2346 | return 0; | ||
| 2347 | } | ||
| 2348 | |||
| 2341 | static const struct file_operations tracing_fops = { | 2349 | static const struct file_operations tracing_fops = { |
| 2342 | .open = tracing_open, | 2350 | .open = tracing_open, |
| 2343 | .read = seq_read, | 2351 | .read = seq_read, |
| 2344 | .write = tracing_write_stub, | 2352 | .write = tracing_write_stub, |
| 2345 | .llseek = seq_lseek, | 2353 | .llseek = tracing_seek, |
| 2346 | .release = tracing_release, | 2354 | .release = tracing_release, |
| 2347 | }; | 2355 | }; |
| 2348 | 2356 | ||
diff --git a/kernel/user.c b/kernel/user.c index 2c7d8d5914b1..5c598ca781df 100644 --- a/kernel/user.c +++ b/kernel/user.c | |||
| @@ -158,6 +158,7 @@ struct user_struct *alloc_uid(struct user_namespace *ns, uid_t uid) | |||
| 158 | spin_lock_irq(&uidhash_lock); | 158 | spin_lock_irq(&uidhash_lock); |
| 159 | up = uid_hash_find(uid, hashent); | 159 | up = uid_hash_find(uid, hashent); |
| 160 | if (up) { | 160 | if (up) { |
| 161 | put_user_ns(ns); | ||
| 161 | key_put(new->uid_keyring); | 162 | key_put(new->uid_keyring); |
| 162 | key_put(new->session_keyring); | 163 | key_put(new->session_keyring); |
| 163 | kmem_cache_free(uid_cachep, new); | 164 | kmem_cache_free(uid_cachep, new); |
diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 14b8120d5232..c812c4927cab 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c | |||
| @@ -364,7 +364,8 @@ static int watchdog_nmi_enable(int cpu) | |||
| 364 | goto out_save; | 364 | goto out_save; |
| 365 | } | 365 | } |
| 366 | 366 | ||
| 367 | printk(KERN_ERR "NMI watchdog failed to create perf event on cpu%i: %p\n", cpu, event); | 367 | printk(KERN_ERR "NMI watchdog disabled for cpu%i: unable to create perf event: %ld\n", |
| 368 | cpu, PTR_ERR(event)); | ||
| 368 | return PTR_ERR(event); | 369 | return PTR_ERR(event); |
| 369 | 370 | ||
| 370 | /* success path */ | 371 | /* success path */ |
