aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/fork.c1
-rw-r--r--kernel/irq/proc.c2
-rw-r--r--kernel/kthread.c11
-rw-r--r--kernel/perf_event.c37
-rw-r--r--kernel/power/Kconfig5
-rw-r--r--kernel/power/Makefile1
-rw-r--r--kernel/power/nvs.c136
-rw-r--r--kernel/power/swap.c2
-rw-r--r--kernel/power/user.c2
-rw-r--r--kernel/printk.c4
-rw-r--r--kernel/resource.c104
-rw-r--r--kernel/sched.c287
-rw-r--r--kernel/taskstats.c57
-rw-r--r--kernel/timer.c8
-rw-r--r--kernel/trace/ring_buffer.c9
-rw-r--r--kernel/trace/trace.c10
-rw-r--r--kernel/user.c1
-rw-r--r--kernel/watchdog.c3
-rw-r--r--kernel/workqueue.c7
19 files changed, 369 insertions, 318 deletions
diff --git a/kernel/fork.c b/kernel/fork.c
index 3b159c5991b7..5447dc7defa9 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -273,6 +273,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
273 273
274 setup_thread_stack(tsk, orig); 274 setup_thread_stack(tsk, orig);
275 clear_user_return_notifier(tsk); 275 clear_user_return_notifier(tsk);
276 clear_tsk_need_resched(tsk);
276 stackend = end_of_stack(tsk); 277 stackend = end_of_stack(tsk);
277 *stackend = STACK_END_MAGIC; /* for overflow detection */ 278 *stackend = STACK_END_MAGIC; /* for overflow detection */
278 279
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index 01b1d3a88983..6c8a2a9f8a7b 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -214,7 +214,7 @@ static int irq_spurious_proc_show(struct seq_file *m, void *v)
214 214
215static int irq_spurious_proc_open(struct inode *inode, struct file *file) 215static int irq_spurious_proc_open(struct inode *inode, struct file *file)
216{ 216{
217 return single_open(file, irq_spurious_proc_show, NULL); 217 return single_open(file, irq_spurious_proc_show, PDE(inode)->data);
218} 218}
219 219
220static const struct file_operations irq_spurious_proc_fops = { 220static const struct file_operations irq_spurious_proc_fops = {
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 2dc3786349d1..ca61bbdd44b2 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -265,6 +265,17 @@ int kthreadd(void *unused)
265 return 0; 265 return 0;
266} 266}
267 267
268void __init_kthread_worker(struct kthread_worker *worker,
269 const char *name,
270 struct lock_class_key *key)
271{
272 spin_lock_init(&worker->lock);
273 lockdep_set_class_and_name(&worker->lock, key, name);
274 INIT_LIST_HEAD(&worker->work_list);
275 worker->task = NULL;
276}
277EXPORT_SYMBOL_GPL(__init_kthread_worker);
278
268/** 279/**
269 * kthread_worker_fn - kthread function to process kthread_worker 280 * kthread_worker_fn - kthread function to process kthread_worker
270 * @worker_ptr: pointer to initialized kthread_worker 281 * @worker_ptr: pointer to initialized kthread_worker
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index eac7e3364335..2870feee81dd 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -3824,6 +3824,8 @@ static void perf_event_task_event(struct perf_task_event *task_event)
3824 rcu_read_lock(); 3824 rcu_read_lock();
3825 list_for_each_entry_rcu(pmu, &pmus, entry) { 3825 list_for_each_entry_rcu(pmu, &pmus, entry) {
3826 cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); 3826 cpuctx = get_cpu_ptr(pmu->pmu_cpu_context);
3827 if (cpuctx->active_pmu != pmu)
3828 goto next;
3827 perf_event_task_ctx(&cpuctx->ctx, task_event); 3829 perf_event_task_ctx(&cpuctx->ctx, task_event);
3828 3830
3829 ctx = task_event->task_ctx; 3831 ctx = task_event->task_ctx;
@@ -3959,6 +3961,8 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event)
3959 rcu_read_lock(); 3961 rcu_read_lock();
3960 list_for_each_entry_rcu(pmu, &pmus, entry) { 3962 list_for_each_entry_rcu(pmu, &pmus, entry) {
3961 cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); 3963 cpuctx = get_cpu_ptr(pmu->pmu_cpu_context);
3964 if (cpuctx->active_pmu != pmu)
3965 goto next;
3962 perf_event_comm_ctx(&cpuctx->ctx, comm_event); 3966 perf_event_comm_ctx(&cpuctx->ctx, comm_event);
3963 3967
3964 ctxn = pmu->task_ctx_nr; 3968 ctxn = pmu->task_ctx_nr;
@@ -4144,6 +4148,8 @@ got_name:
4144 rcu_read_lock(); 4148 rcu_read_lock();
4145 list_for_each_entry_rcu(pmu, &pmus, entry) { 4149 list_for_each_entry_rcu(pmu, &pmus, entry) {
4146 cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); 4150 cpuctx = get_cpu_ptr(pmu->pmu_cpu_context);
4151 if (cpuctx->active_pmu != pmu)
4152 goto next;
4147 perf_event_mmap_ctx(&cpuctx->ctx, mmap_event, 4153 perf_event_mmap_ctx(&cpuctx->ctx, mmap_event,
4148 vma->vm_flags & VM_EXEC); 4154 vma->vm_flags & VM_EXEC);
4149 4155
@@ -4713,7 +4719,7 @@ static int perf_swevent_init(struct perf_event *event)
4713 break; 4719 break;
4714 } 4720 }
4715 4721
4716 if (event_id > PERF_COUNT_SW_MAX) 4722 if (event_id >= PERF_COUNT_SW_MAX)
4717 return -ENOENT; 4723 return -ENOENT;
4718 4724
4719 if (!event->parent) { 4725 if (!event->parent) {
@@ -5145,20 +5151,36 @@ static void *find_pmu_context(int ctxn)
5145 return NULL; 5151 return NULL;
5146} 5152}
5147 5153
5148static void free_pmu_context(void * __percpu cpu_context) 5154static void update_pmu_context(struct pmu *pmu, struct pmu *old_pmu)
5149{ 5155{
5150 struct pmu *pmu; 5156 int cpu;
5157
5158 for_each_possible_cpu(cpu) {
5159 struct perf_cpu_context *cpuctx;
5160
5161 cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
5162
5163 if (cpuctx->active_pmu == old_pmu)
5164 cpuctx->active_pmu = pmu;
5165 }
5166}
5167
5168static void free_pmu_context(struct pmu *pmu)
5169{
5170 struct pmu *i;
5151 5171
5152 mutex_lock(&pmus_lock); 5172 mutex_lock(&pmus_lock);
5153 /* 5173 /*
5154 * Like a real lame refcount. 5174 * Like a real lame refcount.
5155 */ 5175 */
5156 list_for_each_entry(pmu, &pmus, entry) { 5176 list_for_each_entry(i, &pmus, entry) {
5157 if (pmu->pmu_cpu_context == cpu_context) 5177 if (i->pmu_cpu_context == pmu->pmu_cpu_context) {
5178 update_pmu_context(i, pmu);
5158 goto out; 5179 goto out;
5180 }
5159 } 5181 }
5160 5182
5161 free_percpu(cpu_context); 5183 free_percpu(pmu->pmu_cpu_context);
5162out: 5184out:
5163 mutex_unlock(&pmus_lock); 5185 mutex_unlock(&pmus_lock);
5164} 5186}
@@ -5190,6 +5212,7 @@ int perf_pmu_register(struct pmu *pmu)
5190 cpuctx->ctx.pmu = pmu; 5212 cpuctx->ctx.pmu = pmu;
5191 cpuctx->jiffies_interval = 1; 5213 cpuctx->jiffies_interval = 1;
5192 INIT_LIST_HEAD(&cpuctx->rotation_list); 5214 INIT_LIST_HEAD(&cpuctx->rotation_list);
5215 cpuctx->active_pmu = pmu;
5193 } 5216 }
5194 5217
5195got_cpu_context: 5218got_cpu_context:
@@ -5241,7 +5264,7 @@ void perf_pmu_unregister(struct pmu *pmu)
5241 synchronize_rcu(); 5264 synchronize_rcu();
5242 5265
5243 free_percpu(pmu->pmu_disable_count); 5266 free_percpu(pmu->pmu_disable_count);
5244 free_pmu_context(pmu->pmu_cpu_context); 5267 free_pmu_context(pmu);
5245} 5268}
5246 5269
5247struct pmu *perf_init_event(struct perf_event *event) 5270struct pmu *perf_init_event(struct perf_event *event)
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index a5aff3ebad38..265729966ece 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -100,13 +100,9 @@ config PM_SLEEP_ADVANCED_DEBUG
100 depends on PM_ADVANCED_DEBUG 100 depends on PM_ADVANCED_DEBUG
101 default n 101 default n
102 102
103config SUSPEND_NVS
104 bool
105
106config SUSPEND 103config SUSPEND
107 bool "Suspend to RAM and standby" 104 bool "Suspend to RAM and standby"
108 depends on PM && ARCH_SUSPEND_POSSIBLE 105 depends on PM && ARCH_SUSPEND_POSSIBLE
109 select SUSPEND_NVS if HAS_IOMEM
110 default y 106 default y
111 ---help--- 107 ---help---
112 Allow the system to enter sleep states in which main memory is 108 Allow the system to enter sleep states in which main memory is
@@ -140,7 +136,6 @@ config HIBERNATION
140 depends on PM && SWAP && ARCH_HIBERNATION_POSSIBLE 136 depends on PM && SWAP && ARCH_HIBERNATION_POSSIBLE
141 select LZO_COMPRESS 137 select LZO_COMPRESS
142 select LZO_DECOMPRESS 138 select LZO_DECOMPRESS
143 select SUSPEND_NVS if HAS_IOMEM
144 ---help--- 139 ---help---
145 Enable the suspend to disk (STD) functionality, which is usually 140 Enable the suspend to disk (STD) functionality, which is usually
146 called "hibernation" in user interfaces. STD checkpoints the 141 called "hibernation" in user interfaces. STD checkpoints the
diff --git a/kernel/power/Makefile b/kernel/power/Makefile
index f9063c6b185d..120a15823325 100644
--- a/kernel/power/Makefile
+++ b/kernel/power/Makefile
@@ -10,6 +10,5 @@ obj-$(CONFIG_SUSPEND) += suspend.o
10obj-$(CONFIG_PM_TEST_SUSPEND) += suspend_test.o 10obj-$(CONFIG_PM_TEST_SUSPEND) += suspend_test.o
11obj-$(CONFIG_HIBERNATION) += hibernate.o snapshot.o swap.o user.o \ 11obj-$(CONFIG_HIBERNATION) += hibernate.o snapshot.o swap.o user.o \
12 block_io.o 12 block_io.o
13obj-$(CONFIG_SUSPEND_NVS) += nvs.o
14 13
15obj-$(CONFIG_MAGIC_SYSRQ) += poweroff.o 14obj-$(CONFIG_MAGIC_SYSRQ) += poweroff.o
diff --git a/kernel/power/nvs.c b/kernel/power/nvs.c
deleted file mode 100644
index 1836db60bbb6..000000000000
--- a/kernel/power/nvs.c
+++ /dev/null
@@ -1,136 +0,0 @@
1/*
2 * linux/kernel/power/hibernate_nvs.c - Routines for handling NVS memory
3 *
4 * Copyright (C) 2008,2009 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc.
5 *
6 * This file is released under the GPLv2.
7 */
8
9#include <linux/io.h>
10#include <linux/kernel.h>
11#include <linux/list.h>
12#include <linux/mm.h>
13#include <linux/slab.h>
14#include <linux/suspend.h>
15
16/*
17 * Platforms, like ACPI, may want us to save some memory used by them during
18 * suspend and to restore the contents of this memory during the subsequent
19 * resume. The code below implements a mechanism allowing us to do that.
20 */
21
22struct nvs_page {
23 unsigned long phys_start;
24 unsigned int size;
25 void *kaddr;
26 void *data;
27 struct list_head node;
28};
29
30static LIST_HEAD(nvs_list);
31
32/**
33 * suspend_nvs_register - register platform NVS memory region to save
34 * @start - physical address of the region
35 * @size - size of the region
36 *
37 * The NVS region need not be page-aligned (both ends) and we arrange
38 * things so that the data from page-aligned addresses in this region will
39 * be copied into separate RAM pages.
40 */
41int suspend_nvs_register(unsigned long start, unsigned long size)
42{
43 struct nvs_page *entry, *next;
44
45 while (size > 0) {
46 unsigned int nr_bytes;
47
48 entry = kzalloc(sizeof(struct nvs_page), GFP_KERNEL);
49 if (!entry)
50 goto Error;
51
52 list_add_tail(&entry->node, &nvs_list);
53 entry->phys_start = start;
54 nr_bytes = PAGE_SIZE - (start & ~PAGE_MASK);
55 entry->size = (size < nr_bytes) ? size : nr_bytes;
56
57 start += entry->size;
58 size -= entry->size;
59 }
60 return 0;
61
62 Error:
63 list_for_each_entry_safe(entry, next, &nvs_list, node) {
64 list_del(&entry->node);
65 kfree(entry);
66 }
67 return -ENOMEM;
68}
69
70/**
71 * suspend_nvs_free - free data pages allocated for saving NVS regions
72 */
73void suspend_nvs_free(void)
74{
75 struct nvs_page *entry;
76
77 list_for_each_entry(entry, &nvs_list, node)
78 if (entry->data) {
79 free_page((unsigned long)entry->data);
80 entry->data = NULL;
81 if (entry->kaddr) {
82 iounmap(entry->kaddr);
83 entry->kaddr = NULL;
84 }
85 }
86}
87
88/**
89 * suspend_nvs_alloc - allocate memory necessary for saving NVS regions
90 */
91int suspend_nvs_alloc(void)
92{
93 struct nvs_page *entry;
94
95 list_for_each_entry(entry, &nvs_list, node) {
96 entry->data = (void *)__get_free_page(GFP_KERNEL);
97 if (!entry->data) {
98 suspend_nvs_free();
99 return -ENOMEM;
100 }
101 }
102 return 0;
103}
104
105/**
106 * suspend_nvs_save - save NVS memory regions
107 */
108void suspend_nvs_save(void)
109{
110 struct nvs_page *entry;
111
112 printk(KERN_INFO "PM: Saving platform NVS memory\n");
113
114 list_for_each_entry(entry, &nvs_list, node)
115 if (entry->data) {
116 entry->kaddr = ioremap(entry->phys_start, entry->size);
117 memcpy(entry->data, entry->kaddr, entry->size);
118 }
119}
120
121/**
122 * suspend_nvs_restore - restore NVS memory regions
123 *
124 * This function is going to be called with interrupts disabled, so it
125 * cannot iounmap the virtual addresses used to access the NVS region.
126 */
127void suspend_nvs_restore(void)
128{
129 struct nvs_page *entry;
130
131 printk(KERN_INFO "PM: Restoring platform NVS memory\n");
132
133 list_for_each_entry(entry, &nvs_list, node)
134 if (entry->data)
135 memcpy(entry->kaddr, entry->data, entry->size);
136}
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index baf667bb2794..8c7e4832b9be 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -30,7 +30,7 @@
30 30
31#include "power.h" 31#include "power.h"
32 32
33#define HIBERNATE_SIG "LINHIB0001" 33#define HIBERNATE_SIG "S1SUSPEND"
34 34
35/* 35/*
36 * The swap map is a data structure used for keeping track of each page 36 * The swap map is a data structure used for keeping track of each page
diff --git a/kernel/power/user.c b/kernel/power/user.c
index 1b2ea31e6bd8..c36c3b9e8a84 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -137,7 +137,7 @@ static int snapshot_release(struct inode *inode, struct file *filp)
137 free_all_swap_pages(data->swap); 137 free_all_swap_pages(data->swap);
138 if (data->frozen) 138 if (data->frozen)
139 thaw_processes(); 139 thaw_processes();
140 pm_notifier_call_chain(data->mode == O_WRONLY ? 140 pm_notifier_call_chain(data->mode == O_RDONLY ?
141 PM_POST_HIBERNATION : PM_POST_RESTORE); 141 PM_POST_HIBERNATION : PM_POST_RESTORE);
142 atomic_inc(&snapshot_device_available); 142 atomic_inc(&snapshot_device_available);
143 143
diff --git a/kernel/printk.c b/kernel/printk.c
index 9a2264fc42ca..a23315dc4498 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -1082,13 +1082,15 @@ void printk_tick(void)
1082 1082
1083int printk_needs_cpu(int cpu) 1083int printk_needs_cpu(int cpu)
1084{ 1084{
1085 if (unlikely(cpu_is_offline(cpu)))
1086 printk_tick();
1085 return per_cpu(printk_pending, cpu); 1087 return per_cpu(printk_pending, cpu);
1086} 1088}
1087 1089
1088void wake_up_klogd(void) 1090void wake_up_klogd(void)
1089{ 1091{
1090 if (waitqueue_active(&log_wait)) 1092 if (waitqueue_active(&log_wait))
1091 __raw_get_cpu_var(printk_pending) = 1; 1093 this_cpu_write(printk_pending, 1);
1092} 1094}
1093 1095
1094/** 1096/**
diff --git a/kernel/resource.c b/kernel/resource.c
index 9fad33efd0db..798e2fae2a06 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -40,23 +40,6 @@ EXPORT_SYMBOL(iomem_resource);
40 40
41static DEFINE_RWLOCK(resource_lock); 41static DEFINE_RWLOCK(resource_lock);
42 42
43/*
44 * By default, we allocate free space bottom-up. The architecture can request
45 * top-down by clearing this flag. The user can override the architecture's
46 * choice with the "resource_alloc_from_bottom" kernel boot option, but that
47 * should only be a debugging tool.
48 */
49int resource_alloc_from_bottom = 1;
50
51static __init int setup_alloc_from_bottom(char *s)
52{
53 printk(KERN_INFO
54 "resource: allocating from bottom-up; please report a bug\n");
55 resource_alloc_from_bottom = 1;
56 return 0;
57}
58early_param("resource_alloc_from_bottom", setup_alloc_from_bottom);
59
60static void *r_next(struct seq_file *m, void *v, loff_t *pos) 43static void *r_next(struct seq_file *m, void *v, loff_t *pos)
61{ 44{
62 struct resource *p = v; 45 struct resource *p = v;
@@ -374,6 +357,10 @@ int __weak page_is_ram(unsigned long pfn)
374 return walk_system_ram_range(pfn, 1, NULL, __is_ram) == 1; 357 return walk_system_ram_range(pfn, 1, NULL, __is_ram) == 1;
375} 358}
376 359
360void __weak arch_remove_reservations(struct resource *avail)
361{
362}
363
377static resource_size_t simple_align_resource(void *data, 364static resource_size_t simple_align_resource(void *data,
378 const struct resource *avail, 365 const struct resource *avail,
379 resource_size_t size, 366 resource_size_t size,
@@ -397,74 +384,7 @@ static bool resource_contains(struct resource *res1, struct resource *res2)
397} 384}
398 385
399/* 386/*
400 * Find the resource before "child" in the sibling list of "root" children.
401 */
402static struct resource *find_sibling_prev(struct resource *root, struct resource *child)
403{
404 struct resource *this;
405
406 for (this = root->child; this; this = this->sibling)
407 if (this->sibling == child)
408 return this;
409
410 return NULL;
411}
412
413/*
414 * Find empty slot in the resource tree given range and alignment. 387 * Find empty slot in the resource tree given range and alignment.
415 * This version allocates from the end of the root resource first.
416 */
417static int find_resource_from_top(struct resource *root, struct resource *new,
418 resource_size_t size, resource_size_t min,
419 resource_size_t max, resource_size_t align,
420 resource_size_t (*alignf)(void *,
421 const struct resource *,
422 resource_size_t,
423 resource_size_t),
424 void *alignf_data)
425{
426 struct resource *this;
427 struct resource tmp, avail, alloc;
428
429 tmp.start = root->end;
430 tmp.end = root->end;
431
432 this = find_sibling_prev(root, NULL);
433 for (;;) {
434 if (this) {
435 if (this->end < root->end)
436 tmp.start = this->end + 1;
437 } else
438 tmp.start = root->start;
439
440 resource_clip(&tmp, min, max);
441
442 /* Check for overflow after ALIGN() */
443 avail = *new;
444 avail.start = ALIGN(tmp.start, align);
445 avail.end = tmp.end;
446 if (avail.start >= tmp.start) {
447 alloc.start = alignf(alignf_data, &avail, size, align);
448 alloc.end = alloc.start + size - 1;
449 if (resource_contains(&avail, &alloc)) {
450 new->start = alloc.start;
451 new->end = alloc.end;
452 return 0;
453 }
454 }
455
456 if (!this || this->start == root->start)
457 break;
458
459 tmp.end = this->start - 1;
460 this = find_sibling_prev(root, this);
461 }
462 return -EBUSY;
463}
464
465/*
466 * Find empty slot in the resource tree given range and alignment.
467 * This version allocates from the beginning of the root resource first.
468 */ 388 */
469static int find_resource(struct resource *root, struct resource *new, 389static int find_resource(struct resource *root, struct resource *new,
470 resource_size_t size, resource_size_t min, 390 resource_size_t size, resource_size_t min,
@@ -478,23 +398,24 @@ static int find_resource(struct resource *root, struct resource *new,
478 struct resource *this = root->child; 398 struct resource *this = root->child;
479 struct resource tmp = *new, avail, alloc; 399 struct resource tmp = *new, avail, alloc;
480 400
401 tmp.flags = new->flags;
481 tmp.start = root->start; 402 tmp.start = root->start;
482 /* 403 /*
483 * Skip past an allocated resource that starts at 0, since the 404 * Skip past an allocated resource that starts at 0, since the assignment
484 * assignment of this->start - 1 to tmp->end below would cause an 405 * of this->start - 1 to tmp->end below would cause an underflow.
485 * underflow.
486 */ 406 */
487 if (this && this->start == 0) { 407 if (this && this->start == 0) {
488 tmp.start = this->end + 1; 408 tmp.start = this->end + 1;
489 this = this->sibling; 409 this = this->sibling;
490 } 410 }
491 for (;;) { 411 for(;;) {
492 if (this) 412 if (this)
493 tmp.end = this->start - 1; 413 tmp.end = this->start - 1;
494 else 414 else
495 tmp.end = root->end; 415 tmp.end = root->end;
496 416
497 resource_clip(&tmp, min, max); 417 resource_clip(&tmp, min, max);
418 arch_remove_reservations(&tmp);
498 419
499 /* Check for overflow after ALIGN() */ 420 /* Check for overflow after ALIGN() */
500 avail = *new; 421 avail = *new;
@@ -509,10 +430,8 @@ static int find_resource(struct resource *root, struct resource *new,
509 return 0; 430 return 0;
510 } 431 }
511 } 432 }
512
513 if (!this) 433 if (!this)
514 break; 434 break;
515
516 tmp.start = this->end + 1; 435 tmp.start = this->end + 1;
517 this = this->sibling; 436 this = this->sibling;
518 } 437 }
@@ -545,10 +464,7 @@ int allocate_resource(struct resource *root, struct resource *new,
545 alignf = simple_align_resource; 464 alignf = simple_align_resource;
546 465
547 write_lock(&resource_lock); 466 write_lock(&resource_lock);
548 if (resource_alloc_from_bottom) 467 err = find_resource(root, new, size, min, max, align, alignf, alignf_data);
549 err = find_resource(root, new, size, min, max, align, alignf, alignf_data);
550 else
551 err = find_resource_from_top(root, new, size, min, max, align, alignf, alignf_data);
552 if (err >= 0 && __request_resource(root, new)) 468 if (err >= 0 && __request_resource(root, new))
553 err = -EBUSY; 469 err = -EBUSY;
554 write_unlock(&resource_lock); 470 write_unlock(&resource_lock);
diff --git a/kernel/sched.c b/kernel/sched.c
index dc91a4d09ac3..297d1a0eedb0 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -636,22 +636,18 @@ static inline struct task_group *task_group(struct task_struct *p)
636 636
637#endif /* CONFIG_CGROUP_SCHED */ 637#endif /* CONFIG_CGROUP_SCHED */
638 638
639static u64 irq_time_cpu(int cpu); 639static void update_rq_clock_task(struct rq *rq, s64 delta);
640static void sched_irq_time_avg_update(struct rq *rq, u64 irq_time);
641 640
642inline void update_rq_clock(struct rq *rq) 641static void update_rq_clock(struct rq *rq)
643{ 642{
644 if (!rq->skip_clock_update) { 643 s64 delta;
645 int cpu = cpu_of(rq);
646 u64 irq_time;
647 644
648 rq->clock = sched_clock_cpu(cpu); 645 if (rq->skip_clock_update)
649 irq_time = irq_time_cpu(cpu); 646 return;
650 if (rq->clock - irq_time > rq->clock_task)
651 rq->clock_task = rq->clock - irq_time;
652 647
653 sched_irq_time_avg_update(rq, irq_time); 648 delta = sched_clock_cpu(cpu_of(rq)) - rq->clock;
654 } 649 rq->clock += delta;
650 update_rq_clock_task(rq, delta);
655} 651}
656 652
657/* 653/*
@@ -1924,10 +1920,9 @@ static void deactivate_task(struct rq *rq, struct task_struct *p, int flags)
1924 * They are read and saved off onto struct rq in update_rq_clock(). 1920 * They are read and saved off onto struct rq in update_rq_clock().
1925 * This may result in other CPU reading this CPU's irq time and can 1921 * This may result in other CPU reading this CPU's irq time and can
1926 * race with irq/account_system_vtime on this CPU. We would either get old 1922 * race with irq/account_system_vtime on this CPU. We would either get old
1927 * or new value (or semi updated value on 32 bit) with a side effect of 1923 * or new value with a side effect of accounting a slice of irq time to wrong
1928 * accounting a slice of irq time to wrong task when irq is in progress 1924 * task when irq is in progress while we read rq->clock. That is a worthy
1929 * while we read rq->clock. That is a worthy compromise in place of having 1925 * compromise in place of having locks on each irq in account_system_time.
1930 * locks on each irq in account_system_time.
1931 */ 1926 */
1932static DEFINE_PER_CPU(u64, cpu_hardirq_time); 1927static DEFINE_PER_CPU(u64, cpu_hardirq_time);
1933static DEFINE_PER_CPU(u64, cpu_softirq_time); 1928static DEFINE_PER_CPU(u64, cpu_softirq_time);
@@ -1945,19 +1940,58 @@ void disable_sched_clock_irqtime(void)
1945 sched_clock_irqtime = 0; 1940 sched_clock_irqtime = 0;
1946} 1941}
1947 1942
1948static u64 irq_time_cpu(int cpu) 1943#ifndef CONFIG_64BIT
1944static DEFINE_PER_CPU(seqcount_t, irq_time_seq);
1945
1946static inline void irq_time_write_begin(void)
1949{ 1947{
1950 if (!sched_clock_irqtime) 1948 __this_cpu_inc(irq_time_seq.sequence);
1951 return 0; 1949 smp_wmb();
1950}
1951
1952static inline void irq_time_write_end(void)
1953{
1954 smp_wmb();
1955 __this_cpu_inc(irq_time_seq.sequence);
1956}
1957
1958static inline u64 irq_time_read(int cpu)
1959{
1960 u64 irq_time;
1961 unsigned seq;
1952 1962
1963 do {
1964 seq = read_seqcount_begin(&per_cpu(irq_time_seq, cpu));
1965 irq_time = per_cpu(cpu_softirq_time, cpu) +
1966 per_cpu(cpu_hardirq_time, cpu);
1967 } while (read_seqcount_retry(&per_cpu(irq_time_seq, cpu), seq));
1968
1969 return irq_time;
1970}
1971#else /* CONFIG_64BIT */
1972static inline void irq_time_write_begin(void)
1973{
1974}
1975
1976static inline void irq_time_write_end(void)
1977{
1978}
1979
1980static inline u64 irq_time_read(int cpu)
1981{
1953 return per_cpu(cpu_softirq_time, cpu) + per_cpu(cpu_hardirq_time, cpu); 1982 return per_cpu(cpu_softirq_time, cpu) + per_cpu(cpu_hardirq_time, cpu);
1954} 1983}
1984#endif /* CONFIG_64BIT */
1955 1985
1986/*
1987 * Called before incrementing preempt_count on {soft,}irq_enter
1988 * and before decrementing preempt_count on {soft,}irq_exit.
1989 */
1956void account_system_vtime(struct task_struct *curr) 1990void account_system_vtime(struct task_struct *curr)
1957{ 1991{
1958 unsigned long flags; 1992 unsigned long flags;
1993 s64 delta;
1959 int cpu; 1994 int cpu;
1960 u64 now, delta;
1961 1995
1962 if (!sched_clock_irqtime) 1996 if (!sched_clock_irqtime)
1963 return; 1997 return;
@@ -1965,9 +1999,10 @@ void account_system_vtime(struct task_struct *curr)
1965 local_irq_save(flags); 1999 local_irq_save(flags);
1966 2000
1967 cpu = smp_processor_id(); 2001 cpu = smp_processor_id();
1968 now = sched_clock_cpu(cpu); 2002 delta = sched_clock_cpu(cpu) - __this_cpu_read(irq_start_time);
1969 delta = now - per_cpu(irq_start_time, cpu); 2003 __this_cpu_add(irq_start_time, delta);
1970 per_cpu(irq_start_time, cpu) = now; 2004
2005 irq_time_write_begin();
1971 /* 2006 /*
1972 * We do not account for softirq time from ksoftirqd here. 2007 * We do not account for softirq time from ksoftirqd here.
1973 * We want to continue accounting softirq time to ksoftirqd thread 2008 * We want to continue accounting softirq time to ksoftirqd thread
@@ -1975,33 +2010,55 @@ void account_system_vtime(struct task_struct *curr)
1975 * that do not consume any time, but still wants to run. 2010 * that do not consume any time, but still wants to run.
1976 */ 2011 */
1977 if (hardirq_count()) 2012 if (hardirq_count())
1978 per_cpu(cpu_hardirq_time, cpu) += delta; 2013 __this_cpu_add(cpu_hardirq_time, delta);
1979 else if (in_serving_softirq() && !(curr->flags & PF_KSOFTIRQD)) 2014 else if (in_serving_softirq() && !(curr->flags & PF_KSOFTIRQD))
1980 per_cpu(cpu_softirq_time, cpu) += delta; 2015 __this_cpu_add(cpu_softirq_time, delta);
1981 2016
2017 irq_time_write_end();
1982 local_irq_restore(flags); 2018 local_irq_restore(flags);
1983} 2019}
1984EXPORT_SYMBOL_GPL(account_system_vtime); 2020EXPORT_SYMBOL_GPL(account_system_vtime);
1985 2021
1986static void sched_irq_time_avg_update(struct rq *rq, u64 curr_irq_time) 2022static void update_rq_clock_task(struct rq *rq, s64 delta)
1987{ 2023{
1988 if (sched_clock_irqtime && sched_feat(NONIRQ_POWER)) { 2024 s64 irq_delta;
1989 u64 delta_irq = curr_irq_time - rq->prev_irq_time; 2025
1990 rq->prev_irq_time = curr_irq_time; 2026 irq_delta = irq_time_read(cpu_of(rq)) - rq->prev_irq_time;
1991 sched_rt_avg_update(rq, delta_irq); 2027
1992 } 2028 /*
2029 * Since irq_time is only updated on {soft,}irq_exit, we might run into
2030 * this case when a previous update_rq_clock() happened inside a
2031 * {soft,}irq region.
2032 *
2033 * When this happens, we stop ->clock_task and only update the
2034 * prev_irq_time stamp to account for the part that fit, so that a next
2035 * update will consume the rest. This ensures ->clock_task is
2036 * monotonic.
2037 *
2038 * It does however cause some slight miss-attribution of {soft,}irq
2039 * time, a more accurate solution would be to update the irq_time using
2040 * the current rq->clock timestamp, except that would require using
2041 * atomic ops.
2042 */
2043 if (irq_delta > delta)
2044 irq_delta = delta;
2045
2046 rq->prev_irq_time += irq_delta;
2047 delta -= irq_delta;
2048 rq->clock_task += delta;
2049
2050 if (irq_delta && sched_feat(NONIRQ_POWER))
2051 sched_rt_avg_update(rq, irq_delta);
1993} 2052}
1994 2053
1995#else 2054#else /* CONFIG_IRQ_TIME_ACCOUNTING */
1996 2055
1997static u64 irq_time_cpu(int cpu) 2056static void update_rq_clock_task(struct rq *rq, s64 delta)
1998{ 2057{
1999 return 0; 2058 rq->clock_task += delta;
2000} 2059}
2001 2060
2002static void sched_irq_time_avg_update(struct rq *rq, u64 curr_irq_time) { } 2061#endif /* CONFIG_IRQ_TIME_ACCOUNTING */
2003
2004#endif
2005 2062
2006#include "sched_idletask.c" 2063#include "sched_idletask.c"
2007#include "sched_fair.c" 2064#include "sched_fair.c"
@@ -2129,7 +2186,7 @@ static void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
2129 * A queue event has occurred, and we're going to schedule. In 2186 * A queue event has occurred, and we're going to schedule. In
2130 * this case, we can save a useless back to back clock update. 2187 * this case, we can save a useless back to back clock update.
2131 */ 2188 */
2132 if (test_tsk_need_resched(rq->curr)) 2189 if (rq->curr->se.on_rq && test_tsk_need_resched(rq->curr))
2133 rq->skip_clock_update = 1; 2190 rq->skip_clock_update = 1;
2134} 2191}
2135 2192
@@ -3119,6 +3176,15 @@ static long calc_load_fold_active(struct rq *this_rq)
3119 return delta; 3176 return delta;
3120} 3177}
3121 3178
3179static unsigned long
3180calc_load(unsigned long load, unsigned long exp, unsigned long active)
3181{
3182 load *= exp;
3183 load += active * (FIXED_1 - exp);
3184 load += 1UL << (FSHIFT - 1);
3185 return load >> FSHIFT;
3186}
3187
3122#ifdef CONFIG_NO_HZ 3188#ifdef CONFIG_NO_HZ
3123/* 3189/*
3124 * For NO_HZ we delay the active fold to the next LOAD_FREQ update. 3190 * For NO_HZ we delay the active fold to the next LOAD_FREQ update.
@@ -3148,6 +3214,128 @@ static long calc_load_fold_idle(void)
3148 3214
3149 return delta; 3215 return delta;
3150} 3216}
3217
3218/**
3219 * fixed_power_int - compute: x^n, in O(log n) time
3220 *
3221 * @x: base of the power
3222 * @frac_bits: fractional bits of @x
3223 * @n: power to raise @x to.
3224 *
3225 * By exploiting the relation between the definition of the natural power
3226 * function: x^n := x*x*...*x (x multiplied by itself for n times), and
3227 * the binary encoding of numbers used by computers: n := \Sum n_i * 2^i,
3228 * (where: n_i \elem {0, 1}, the binary vector representing n),
3229 * we find: x^n := x^(\Sum n_i * 2^i) := \Prod x^(n_i * 2^i), which is
3230 * of course trivially computable in O(log_2 n), the length of our binary
3231 * vector.
3232 */
3233static unsigned long
3234fixed_power_int(unsigned long x, unsigned int frac_bits, unsigned int n)
3235{
3236 unsigned long result = 1UL << frac_bits;
3237
3238 if (n) for (;;) {
3239 if (n & 1) {
3240 result *= x;
3241 result += 1UL << (frac_bits - 1);
3242 result >>= frac_bits;
3243 }
3244 n >>= 1;
3245 if (!n)
3246 break;
3247 x *= x;
3248 x += 1UL << (frac_bits - 1);
3249 x >>= frac_bits;
3250 }
3251
3252 return result;
3253}
3254
3255/*
3256 * a1 = a0 * e + a * (1 - e)
3257 *
3258 * a2 = a1 * e + a * (1 - e)
3259 * = (a0 * e + a * (1 - e)) * e + a * (1 - e)
3260 * = a0 * e^2 + a * (1 - e) * (1 + e)
3261 *
3262 * a3 = a2 * e + a * (1 - e)
3263 * = (a0 * e^2 + a * (1 - e) * (1 + e)) * e + a * (1 - e)
3264 * = a0 * e^3 + a * (1 - e) * (1 + e + e^2)
3265 *
3266 * ...
3267 *
3268 * an = a0 * e^n + a * (1 - e) * (1 + e + ... + e^n-1) [1]
3269 * = a0 * e^n + a * (1 - e) * (1 - e^n)/(1 - e)
3270 * = a0 * e^n + a * (1 - e^n)
3271 *
3272 * [1] application of the geometric series:
3273 *
3274 * n 1 - x^(n+1)
3275 * S_n := \Sum x^i = -------------
3276 * i=0 1 - x
3277 */
3278static unsigned long
3279calc_load_n(unsigned long load, unsigned long exp,
3280 unsigned long active, unsigned int n)
3281{
3282
3283 return calc_load(load, fixed_power_int(exp, FSHIFT, n), active);
3284}
3285
3286/*
3287 * NO_HZ can leave us missing all per-cpu ticks calling
3288 * calc_load_account_active(), but since an idle CPU folds its delta into
3289 * calc_load_tasks_idle per calc_load_account_idle(), all we need to do is fold
3290 * in the pending idle delta if our idle period crossed a load cycle boundary.
3291 *
3292 * Once we've updated the global active value, we need to apply the exponential
3293 * weights adjusted to the number of cycles missed.
3294 */
3295static void calc_global_nohz(unsigned long ticks)
3296{
3297 long delta, active, n;
3298
3299 if (time_before(jiffies, calc_load_update))
3300 return;
3301
3302 /*
3303 * If we crossed a calc_load_update boundary, make sure to fold
3304 * any pending idle changes, the respective CPUs might have
3305 * missed the tick driven calc_load_account_active() update
3306 * due to NO_HZ.
3307 */
3308 delta = calc_load_fold_idle();
3309 if (delta)
3310 atomic_long_add(delta, &calc_load_tasks);
3311
3312 /*
3313 * If we were idle for multiple load cycles, apply them.
3314 */
3315 if (ticks >= LOAD_FREQ) {
3316 n = ticks / LOAD_FREQ;
3317
3318 active = atomic_long_read(&calc_load_tasks);
3319 active = active > 0 ? active * FIXED_1 : 0;
3320
3321 avenrun[0] = calc_load_n(avenrun[0], EXP_1, active, n);
3322 avenrun[1] = calc_load_n(avenrun[1], EXP_5, active, n);
3323 avenrun[2] = calc_load_n(avenrun[2], EXP_15, active, n);
3324
3325 calc_load_update += n * LOAD_FREQ;
3326 }
3327
3328 /*
3329 * Its possible the remainder of the above division also crosses
3330 * a LOAD_FREQ period, the regular check in calc_global_load()
3331 * which comes after this will take care of that.
3332 *
3333 * Consider us being 11 ticks before a cycle completion, and us
3334 * sleeping for 4*LOAD_FREQ + 22 ticks, then the above code will
3335 * age us 4 cycles, and the test in calc_global_load() will
3336 * pick up the final one.
3337 */
3338}
3151#else 3339#else
3152static void calc_load_account_idle(struct rq *this_rq) 3340static void calc_load_account_idle(struct rq *this_rq)
3153{ 3341{
@@ -3157,6 +3345,10 @@ static inline long calc_load_fold_idle(void)
3157{ 3345{
3158 return 0; 3346 return 0;
3159} 3347}
3348
3349static void calc_global_nohz(unsigned long ticks)
3350{
3351}
3160#endif 3352#endif
3161 3353
3162/** 3354/**
@@ -3174,24 +3366,17 @@ void get_avenrun(unsigned long *loads, unsigned long offset, int shift)
3174 loads[2] = (avenrun[2] + offset) << shift; 3366 loads[2] = (avenrun[2] + offset) << shift;
3175} 3367}
3176 3368
3177static unsigned long
3178calc_load(unsigned long load, unsigned long exp, unsigned long active)
3179{
3180 load *= exp;
3181 load += active * (FIXED_1 - exp);
3182 return load >> FSHIFT;
3183}
3184
3185/* 3369/*
3186 * calc_load - update the avenrun load estimates 10 ticks after the 3370 * calc_load - update the avenrun load estimates 10 ticks after the
3187 * CPUs have updated calc_load_tasks. 3371 * CPUs have updated calc_load_tasks.
3188 */ 3372 */
3189void calc_global_load(void) 3373void calc_global_load(unsigned long ticks)
3190{ 3374{
3191 unsigned long upd = calc_load_update + 10;
3192 long active; 3375 long active;
3193 3376
3194 if (time_before(jiffies, upd)) 3377 calc_global_nohz(ticks);
3378
3379 if (time_before(jiffies, calc_load_update + 10))
3195 return; 3380 return;
3196 3381
3197 active = atomic_long_read(&calc_load_tasks); 3382 active = atomic_long_read(&calc_load_tasks);
@@ -3845,7 +4030,6 @@ static void put_prev_task(struct rq *rq, struct task_struct *prev)
3845{ 4030{
3846 if (prev->se.on_rq) 4031 if (prev->se.on_rq)
3847 update_rq_clock(rq); 4032 update_rq_clock(rq);
3848 rq->skip_clock_update = 0;
3849 prev->sched_class->put_prev_task(rq, prev); 4033 prev->sched_class->put_prev_task(rq, prev);
3850} 4034}
3851 4035
@@ -3903,7 +4087,6 @@ need_resched_nonpreemptible:
3903 hrtick_clear(rq); 4087 hrtick_clear(rq);
3904 4088
3905 raw_spin_lock_irq(&rq->lock); 4089 raw_spin_lock_irq(&rq->lock);
3906 clear_tsk_need_resched(prev);
3907 4090
3908 switch_count = &prev->nivcsw; 4091 switch_count = &prev->nivcsw;
3909 if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) { 4092 if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) {
@@ -3935,6 +4118,8 @@ need_resched_nonpreemptible:
3935 4118
3936 put_prev_task(rq, prev); 4119 put_prev_task(rq, prev);
3937 next = pick_next_task(rq); 4120 next = pick_next_task(rq);
4121 clear_tsk_need_resched(prev);
4122 rq->skip_clock_update = 0;
3938 4123
3939 if (likely(prev != next)) { 4124 if (likely(prev != next)) {
3940 sched_info_switch(prev, next); 4125 sched_info_switch(prev, next);
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index c8231fb15708..3308fd7f1b52 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -349,25 +349,47 @@ static int parse(struct nlattr *na, struct cpumask *mask)
349 return ret; 349 return ret;
350} 350}
351 351
352#ifdef CONFIG_IA64
353#define TASKSTATS_NEEDS_PADDING 1
354#endif
355
352static struct taskstats *mk_reply(struct sk_buff *skb, int type, u32 pid) 356static struct taskstats *mk_reply(struct sk_buff *skb, int type, u32 pid)
353{ 357{
354 struct nlattr *na, *ret; 358 struct nlattr *na, *ret;
355 int aggr; 359 int aggr;
356 360
357 /* If we don't pad, we end up with alignment on a 4 byte boundary.
358 * This causes lots of runtime warnings on systems requiring 8 byte
359 * alignment */
360 u32 pids[2] = { pid, 0 };
361 int pid_size = ALIGN(sizeof(pid), sizeof(long));
362
363 aggr = (type == TASKSTATS_TYPE_PID) 361 aggr = (type == TASKSTATS_TYPE_PID)
364 ? TASKSTATS_TYPE_AGGR_PID 362 ? TASKSTATS_TYPE_AGGR_PID
365 : TASKSTATS_TYPE_AGGR_TGID; 363 : TASKSTATS_TYPE_AGGR_TGID;
366 364
365 /*
366 * The taskstats structure is internally aligned on 8 byte
367 * boundaries but the layout of the aggregrate reply, with
368 * two NLA headers and the pid (each 4 bytes), actually
369 * force the entire structure to be unaligned. This causes
370 * the kernel to issue unaligned access warnings on some
371 * architectures like ia64. Unfortunately, some software out there
372 * doesn't properly unroll the NLA packet and assumes that the start
373 * of the taskstats structure will always be 20 bytes from the start
374 * of the netlink payload. Aligning the start of the taskstats
375 * structure breaks this software, which we don't want. So, for now
376 * the alignment only happens on architectures that require it
377 * and those users will have to update to fixed versions of those
378 * packages. Space is reserved in the packet only when needed.
379 * This ifdef should be removed in several years e.g. 2012 once
380 * we can be confident that fixed versions are installed on most
381 * systems. We add the padding before the aggregate since the
382 * aggregate is already a defined type.
383 */
384#ifdef TASKSTATS_NEEDS_PADDING
385 if (nla_put(skb, TASKSTATS_TYPE_NULL, 0, NULL) < 0)
386 goto err;
387#endif
367 na = nla_nest_start(skb, aggr); 388 na = nla_nest_start(skb, aggr);
368 if (!na) 389 if (!na)
369 goto err; 390 goto err;
370 if (nla_put(skb, type, pid_size, pids) < 0) 391
392 if (nla_put(skb, type, sizeof(pid), &pid) < 0)
371 goto err; 393 goto err;
372 ret = nla_reserve(skb, TASKSTATS_TYPE_STATS, sizeof(struct taskstats)); 394 ret = nla_reserve(skb, TASKSTATS_TYPE_STATS, sizeof(struct taskstats));
373 if (!ret) 395 if (!ret)
@@ -456,6 +478,18 @@ out:
456 return rc; 478 return rc;
457} 479}
458 480
481static size_t taskstats_packet_size(void)
482{
483 size_t size;
484
485 size = nla_total_size(sizeof(u32)) +
486 nla_total_size(sizeof(struct taskstats)) + nla_total_size(0);
487#ifdef TASKSTATS_NEEDS_PADDING
488 size += nla_total_size(0); /* Padding for alignment */
489#endif
490 return size;
491}
492
459static int cmd_attr_pid(struct genl_info *info) 493static int cmd_attr_pid(struct genl_info *info)
460{ 494{
461 struct taskstats *stats; 495 struct taskstats *stats;
@@ -464,8 +498,7 @@ static int cmd_attr_pid(struct genl_info *info)
464 u32 pid; 498 u32 pid;
465 int rc; 499 int rc;
466 500
467 size = nla_total_size(sizeof(u32)) + 501 size = taskstats_packet_size();
468 nla_total_size(sizeof(struct taskstats)) + nla_total_size(0);
469 502
470 rc = prepare_reply(info, TASKSTATS_CMD_NEW, &rep_skb, size); 503 rc = prepare_reply(info, TASKSTATS_CMD_NEW, &rep_skb, size);
471 if (rc < 0) 504 if (rc < 0)
@@ -494,8 +527,7 @@ static int cmd_attr_tgid(struct genl_info *info)
494 u32 tgid; 527 u32 tgid;
495 int rc; 528 int rc;
496 529
497 size = nla_total_size(sizeof(u32)) + 530 size = taskstats_packet_size();
498 nla_total_size(sizeof(struct taskstats)) + nla_total_size(0);
499 531
500 rc = prepare_reply(info, TASKSTATS_CMD_NEW, &rep_skb, size); 532 rc = prepare_reply(info, TASKSTATS_CMD_NEW, &rep_skb, size);
501 if (rc < 0) 533 if (rc < 0)
@@ -570,8 +602,7 @@ void taskstats_exit(struct task_struct *tsk, int group_dead)
570 /* 602 /*
571 * Size includes space for nested attributes 603 * Size includes space for nested attributes
572 */ 604 */
573 size = nla_total_size(sizeof(u32)) + 605 size = taskstats_packet_size();
574 nla_total_size(sizeof(struct taskstats)) + nla_total_size(0);
575 606
576 is_thread_group = !!taskstats_tgid_alloc(tsk); 607 is_thread_group = !!taskstats_tgid_alloc(tsk);
577 if (is_thread_group) { 608 if (is_thread_group) {
diff --git a/kernel/timer.c b/kernel/timer.c
index 68a9ae7679b7..353b9227c2ec 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -1252,6 +1252,12 @@ unsigned long get_next_timer_interrupt(unsigned long now)
1252 struct tvec_base *base = __get_cpu_var(tvec_bases); 1252 struct tvec_base *base = __get_cpu_var(tvec_bases);
1253 unsigned long expires; 1253 unsigned long expires;
1254 1254
1255 /*
1256 * Pretend that there is no timer pending if the cpu is offline.
1257 * Possible pending timers will be migrated later to an active cpu.
1258 */
1259 if (cpu_is_offline(smp_processor_id()))
1260 return now + NEXT_TIMER_MAX_DELTA;
1255 spin_lock(&base->lock); 1261 spin_lock(&base->lock);
1256 if (time_before_eq(base->next_timer, base->timer_jiffies)) 1262 if (time_before_eq(base->next_timer, base->timer_jiffies))
1257 base->next_timer = __next_timer_interrupt(base); 1263 base->next_timer = __next_timer_interrupt(base);
@@ -1319,7 +1325,7 @@ void do_timer(unsigned long ticks)
1319{ 1325{
1320 jiffies_64 += ticks; 1326 jiffies_64 += ticks;
1321 update_wall_time(); 1327 update_wall_time();
1322 calc_global_load(); 1328 calc_global_load(ticks);
1323} 1329}
1324 1330
1325#ifdef __ARCH_WANT_SYS_ALARM 1331#ifdef __ARCH_WANT_SYS_ALARM
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 9ed509a015d8..bd1c35a4fbcc 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -3853,6 +3853,13 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
3853 3853
3854 /* Need to copy one event at a time */ 3854 /* Need to copy one event at a time */
3855 do { 3855 do {
3856 /* We need the size of one event, because
3857 * rb_advance_reader only advances by one event,
3858 * whereas rb_event_ts_length may include the size of
3859 * one or two events.
3860 * We have already ensured there's enough space if this
3861 * is a time extend. */
3862 size = rb_event_length(event);
3856 memcpy(bpage->data + pos, rpage->data + rpos, size); 3863 memcpy(bpage->data + pos, rpage->data + rpos, size);
3857 3864
3858 len -= size; 3865 len -= size;
@@ -3867,7 +3874,7 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
3867 event = rb_reader_event(cpu_buffer); 3874 event = rb_reader_event(cpu_buffer);
3868 /* Always keep the time extend and data together */ 3875 /* Always keep the time extend and data together */
3869 size = rb_event_ts_length(event); 3876 size = rb_event_ts_length(event);
3870 } while (len > size); 3877 } while (len >= size);
3871 3878
3872 /* update bpage */ 3879 /* update bpage */
3873 local_set(&bpage->commit, pos); 3880 local_set(&bpage->commit, pos);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index c380612273bf..f8cf959bad45 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2338,11 +2338,19 @@ tracing_write_stub(struct file *filp, const char __user *ubuf,
2338 return count; 2338 return count;
2339} 2339}
2340 2340
2341static loff_t tracing_seek(struct file *file, loff_t offset, int origin)
2342{
2343 if (file->f_mode & FMODE_READ)
2344 return seq_lseek(file, offset, origin);
2345 else
2346 return 0;
2347}
2348
2341static const struct file_operations tracing_fops = { 2349static const struct file_operations tracing_fops = {
2342 .open = tracing_open, 2350 .open = tracing_open,
2343 .read = seq_read, 2351 .read = seq_read,
2344 .write = tracing_write_stub, 2352 .write = tracing_write_stub,
2345 .llseek = seq_lseek, 2353 .llseek = tracing_seek,
2346 .release = tracing_release, 2354 .release = tracing_release,
2347}; 2355};
2348 2356
diff --git a/kernel/user.c b/kernel/user.c
index 2c7d8d5914b1..5c598ca781df 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -158,6 +158,7 @@ struct user_struct *alloc_uid(struct user_namespace *ns, uid_t uid)
158 spin_lock_irq(&uidhash_lock); 158 spin_lock_irq(&uidhash_lock);
159 up = uid_hash_find(uid, hashent); 159 up = uid_hash_find(uid, hashent);
160 if (up) { 160 if (up) {
161 put_user_ns(ns);
161 key_put(new->uid_keyring); 162 key_put(new->uid_keyring);
162 key_put(new->session_keyring); 163 key_put(new->session_keyring);
163 kmem_cache_free(uid_cachep, new); 164 kmem_cache_free(uid_cachep, new);
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 6e3c41a4024c..5b082156cd21 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -364,7 +364,8 @@ static int watchdog_nmi_enable(int cpu)
364 goto out_save; 364 goto out_save;
365 } 365 }
366 366
367 printk(KERN_ERR "NMI watchdog failed to create perf event on cpu%i: %p\n", cpu, event); 367 printk(KERN_ERR "NMI watchdog disabled for cpu%i: unable to create perf event: %ld\n",
368 cpu, PTR_ERR(event));
368 return PTR_ERR(event); 369 return PTR_ERR(event);
369 370
370 /* success path */ 371 /* success path */
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 90db1bd1a978..e785b0f2aea5 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -661,7 +661,7 @@ void wq_worker_waking_up(struct task_struct *task, unsigned int cpu)
661{ 661{
662 struct worker *worker = kthread_data(task); 662 struct worker *worker = kthread_data(task);
663 663
664 if (likely(!(worker->flags & WORKER_NOT_RUNNING))) 664 if (!(worker->flags & WORKER_NOT_RUNNING))
665 atomic_inc(get_gcwq_nr_running(cpu)); 665 atomic_inc(get_gcwq_nr_running(cpu));
666} 666}
667 667
@@ -687,7 +687,7 @@ struct task_struct *wq_worker_sleeping(struct task_struct *task,
687 struct global_cwq *gcwq = get_gcwq(cpu); 687 struct global_cwq *gcwq = get_gcwq(cpu);
688 atomic_t *nr_running = get_gcwq_nr_running(cpu); 688 atomic_t *nr_running = get_gcwq_nr_running(cpu);
689 689
690 if (unlikely(worker->flags & WORKER_NOT_RUNNING)) 690 if (worker->flags & WORKER_NOT_RUNNING)
691 return NULL; 691 return NULL;
692 692
693 /* this can only happen on the local cpu */ 693 /* this can only happen on the local cpu */
@@ -3692,7 +3692,8 @@ static int __init init_workqueues(void)
3692 system_nrt_wq = alloc_workqueue("events_nrt", WQ_NON_REENTRANT, 0); 3692 system_nrt_wq = alloc_workqueue("events_nrt", WQ_NON_REENTRANT, 0);
3693 system_unbound_wq = alloc_workqueue("events_unbound", WQ_UNBOUND, 3693 system_unbound_wq = alloc_workqueue("events_unbound", WQ_UNBOUND,
3694 WQ_UNBOUND_MAX_ACTIVE); 3694 WQ_UNBOUND_MAX_ACTIVE);
3695 BUG_ON(!system_wq || !system_long_wq || !system_nrt_wq); 3695 BUG_ON(!system_wq || !system_long_wq || !system_nrt_wq ||
3696 !system_unbound_wq);
3696 return 0; 3697 return 0;
3697} 3698}
3698early_initcall(init_workqueues); 3699early_initcall(init_workqueues);