aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cgroup.c13
-rw-r--r--kernel/compat.c21
-rw-r--r--kernel/debug/kdb/kdb_bp.c2
-rw-r--r--kernel/debug/kdb/kdb_private.h7
-rw-r--r--kernel/debug/kdb/kdb_support.c4
-rw-r--r--kernel/exit.c5
-rw-r--r--kernel/fork.c19
-rw-r--r--kernel/gcov/fs.c244
-rw-r--r--kernel/groups.c5
-rw-r--r--kernel/hrtimer.c3
-rw-r--r--kernel/hw_breakpoint.c3
-rw-r--r--kernel/kfifo.c11
-rw-r--r--kernel/kmod.c4
-rw-r--r--kernel/module.c4
-rw-r--r--kernel/mutex.c23
-rw-r--r--kernel/perf_event.c32
-rw-r--r--kernel/pm_qos_params.c16
-rw-r--r--kernel/power/hibernate.c1
-rw-r--r--kernel/power/snapshot.c86
-rw-r--r--kernel/power/swap.c6
-rw-r--r--kernel/sched.c24
-rw-r--r--kernel/sched_fair.c15
-rw-r--r--kernel/signal.c8
-rw-r--r--kernel/smp.c17
-rw-r--r--kernel/sys.c2
-rw-r--r--kernel/sysctl.c7
-rw-r--r--kernel/trace/ftrace.c19
-rw-r--r--kernel/trace/ring_buffer.c5
-rw-r--r--kernel/trace/trace.c11
-rw-r--r--kernel/trace/trace_event_perf.c3
-rw-r--r--kernel/trace/trace_events.c207
-rw-r--r--kernel/trace/trace_functions_graph.c10
-rw-r--r--kernel/trace/trace_kprobe.c43
-rw-r--r--kernel/trace/trace_stack.c2
-rw-r--r--kernel/watchdog.c20
-rw-r--r--kernel/workqueue.c89
36 files changed, 698 insertions, 293 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 192f88c5b0f9..c9483d8f6140 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1791,19 +1791,20 @@ out:
1791} 1791}
1792 1792
1793/** 1793/**
1794 * cgroup_attach_task_current_cg - attach task 'tsk' to current task's cgroup 1794 * cgroup_attach_task_all - attach task 'tsk' to all cgroups of task 'from'
1795 * @from: attach to all cgroups of a given task
1795 * @tsk: the task to be attached 1796 * @tsk: the task to be attached
1796 */ 1797 */
1797int cgroup_attach_task_current_cg(struct task_struct *tsk) 1798int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk)
1798{ 1799{
1799 struct cgroupfs_root *root; 1800 struct cgroupfs_root *root;
1800 struct cgroup *cur_cg;
1801 int retval = 0; 1801 int retval = 0;
1802 1802
1803 cgroup_lock(); 1803 cgroup_lock();
1804 for_each_active_root(root) { 1804 for_each_active_root(root) {
1805 cur_cg = task_cgroup_from_root(current, root); 1805 struct cgroup *from_cg = task_cgroup_from_root(from, root);
1806 retval = cgroup_attach_task(cur_cg, tsk); 1806
1807 retval = cgroup_attach_task(from_cg, tsk);
1807 if (retval) 1808 if (retval)
1808 break; 1809 break;
1809 } 1810 }
@@ -1811,7 +1812,7 @@ int cgroup_attach_task_current_cg(struct task_struct *tsk)
1811 1812
1812 return retval; 1813 return retval;
1813} 1814}
1814EXPORT_SYMBOL_GPL(cgroup_attach_task_current_cg); 1815EXPORT_SYMBOL_GPL(cgroup_attach_task_all);
1815 1816
1816/* 1817/*
1817 * Attach task with pid 'pid' to cgroup 'cgrp'. Call with cgroup_mutex 1818 * Attach task with pid 'pid' to cgroup 'cgrp'. Call with cgroup_mutex
diff --git a/kernel/compat.c b/kernel/compat.c
index e167efce8423..c9e2ec0b34a8 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -1126,3 +1126,24 @@ compat_sys_sysinfo(struct compat_sysinfo __user *info)
1126 1126
1127 return 0; 1127 return 0;
1128} 1128}
1129
1130/*
1131 * Allocate user-space memory for the duration of a single system call,
1132 * in order to marshall parameters inside a compat thunk.
1133 */
1134void __user *compat_alloc_user_space(unsigned long len)
1135{
1136 void __user *ptr;
1137
1138 /* If len would occupy more than half of the entire compat space... */
1139 if (unlikely(len > (((compat_uptr_t)~0) >> 1)))
1140 return NULL;
1141
1142 ptr = arch_compat_alloc_user_space(len);
1143
1144 if (unlikely(!access_ok(VERIFY_WRITE, ptr, len)))
1145 return NULL;
1146
1147 return ptr;
1148}
1149EXPORT_SYMBOL_GPL(compat_alloc_user_space);
diff --git a/kernel/debug/kdb/kdb_bp.c b/kernel/debug/kdb/kdb_bp.c
index 75bd9b3ebbb7..20059ef4459a 100644
--- a/kernel/debug/kdb/kdb_bp.c
+++ b/kernel/debug/kdb/kdb_bp.c
@@ -274,7 +274,6 @@ static int kdb_bp(int argc, const char **argv)
274 int i, bpno; 274 int i, bpno;
275 kdb_bp_t *bp, *bp_check; 275 kdb_bp_t *bp, *bp_check;
276 int diag; 276 int diag;
277 int free;
278 char *symname = NULL; 277 char *symname = NULL;
279 long offset = 0ul; 278 long offset = 0ul;
280 int nextarg; 279 int nextarg;
@@ -305,7 +304,6 @@ static int kdb_bp(int argc, const char **argv)
305 /* 304 /*
306 * Find an empty bp structure to allocate 305 * Find an empty bp structure to allocate
307 */ 306 */
308 free = KDB_MAXBPT;
309 for (bpno = 0, bp = kdb_breakpoints; bpno < KDB_MAXBPT; bpno++, bp++) { 307 for (bpno = 0, bp = kdb_breakpoints; bpno < KDB_MAXBPT; bpno++, bp++) {
310 if (bp->bp_free) 308 if (bp->bp_free)
311 break; 309 break;
diff --git a/kernel/debug/kdb/kdb_private.h b/kernel/debug/kdb/kdb_private.h
index c438f545a321..be775f7e81e0 100644
--- a/kernel/debug/kdb/kdb_private.h
+++ b/kernel/debug/kdb/kdb_private.h
@@ -255,7 +255,14 @@ extern void kdb_ps1(const struct task_struct *p);
255extern void kdb_print_nameval(const char *name, unsigned long val); 255extern void kdb_print_nameval(const char *name, unsigned long val);
256extern void kdb_send_sig_info(struct task_struct *p, struct siginfo *info); 256extern void kdb_send_sig_info(struct task_struct *p, struct siginfo *info);
257extern void kdb_meminfo_proc_show(void); 257extern void kdb_meminfo_proc_show(void);
258#ifdef CONFIG_KALLSYMS
258extern const char *kdb_walk_kallsyms(loff_t *pos); 259extern const char *kdb_walk_kallsyms(loff_t *pos);
260#else /* ! CONFIG_KALLSYMS */
261static inline const char *kdb_walk_kallsyms(loff_t *pos)
262{
263 return NULL;
264}
265#endif /* ! CONFIG_KALLSYMS */
259extern char *kdb_getstr(char *, size_t, char *); 266extern char *kdb_getstr(char *, size_t, char *);
260 267
261/* Defines for kdb_symbol_print */ 268/* Defines for kdb_symbol_print */
diff --git a/kernel/debug/kdb/kdb_support.c b/kernel/debug/kdb/kdb_support.c
index 45344d5c53dd..6b2485dcb050 100644
--- a/kernel/debug/kdb/kdb_support.c
+++ b/kernel/debug/kdb/kdb_support.c
@@ -82,8 +82,8 @@ static char *kdb_name_table[100]; /* arbitrary size */
82int kdbnearsym(unsigned long addr, kdb_symtab_t *symtab) 82int kdbnearsym(unsigned long addr, kdb_symtab_t *symtab)
83{ 83{
84 int ret = 0; 84 int ret = 0;
85 unsigned long symbolsize; 85 unsigned long symbolsize = 0;
86 unsigned long offset; 86 unsigned long offset = 0;
87#define knt1_size 128 /* must be >= kallsyms table size */ 87#define knt1_size 128 /* must be >= kallsyms table size */
88 char *knt1 = NULL; 88 char *knt1 = NULL;
89 89
diff --git a/kernel/exit.c b/kernel/exit.c
index 671ed56e0a49..03120229db28 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -1386,8 +1386,7 @@ static int wait_task_stopped(struct wait_opts *wo,
1386 if (!unlikely(wo->wo_flags & WNOWAIT)) 1386 if (!unlikely(wo->wo_flags & WNOWAIT))
1387 *p_code = 0; 1387 *p_code = 0;
1388 1388
1389 /* don't need the RCU readlock here as we're holding a spinlock */ 1389 uid = task_uid(p);
1390 uid = __task_cred(p)->uid;
1391unlock_sig: 1390unlock_sig:
1392 spin_unlock_irq(&p->sighand->siglock); 1391 spin_unlock_irq(&p->sighand->siglock);
1393 if (!exit_code) 1392 if (!exit_code)
@@ -1460,7 +1459,7 @@ static int wait_task_continued(struct wait_opts *wo, struct task_struct *p)
1460 } 1459 }
1461 if (!unlikely(wo->wo_flags & WNOWAIT)) 1460 if (!unlikely(wo->wo_flags & WNOWAIT))
1462 p->signal->flags &= ~SIGNAL_STOP_CONTINUED; 1461 p->signal->flags &= ~SIGNAL_STOP_CONTINUED;
1463 uid = __task_cred(p)->uid; 1462 uid = task_uid(p);
1464 spin_unlock_irq(&p->sighand->siglock); 1463 spin_unlock_irq(&p->sighand->siglock);
1465 1464
1466 pid = task_pid_vnr(p); 1465 pid = task_pid_vnr(p);
diff --git a/kernel/fork.c b/kernel/fork.c
index 98b450876f93..c445f8cc408d 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -300,7 +300,7 @@ out:
300#ifdef CONFIG_MMU 300#ifdef CONFIG_MMU
301static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) 301static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
302{ 302{
303 struct vm_area_struct *mpnt, *tmp, **pprev; 303 struct vm_area_struct *mpnt, *tmp, *prev, **pprev;
304 struct rb_node **rb_link, *rb_parent; 304 struct rb_node **rb_link, *rb_parent;
305 int retval; 305 int retval;
306 unsigned long charge; 306 unsigned long charge;
@@ -328,6 +328,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
328 if (retval) 328 if (retval)
329 goto out; 329 goto out;
330 330
331 prev = NULL;
331 for (mpnt = oldmm->mmap; mpnt; mpnt = mpnt->vm_next) { 332 for (mpnt = oldmm->mmap; mpnt; mpnt = mpnt->vm_next) {
332 struct file *file; 333 struct file *file;
333 334
@@ -355,11 +356,11 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
355 if (IS_ERR(pol)) 356 if (IS_ERR(pol))
356 goto fail_nomem_policy; 357 goto fail_nomem_policy;
357 vma_set_policy(tmp, pol); 358 vma_set_policy(tmp, pol);
359 tmp->vm_mm = mm;
358 if (anon_vma_fork(tmp, mpnt)) 360 if (anon_vma_fork(tmp, mpnt))
359 goto fail_nomem_anon_vma_fork; 361 goto fail_nomem_anon_vma_fork;
360 tmp->vm_flags &= ~VM_LOCKED; 362 tmp->vm_flags &= ~VM_LOCKED;
361 tmp->vm_mm = mm; 363 tmp->vm_next = tmp->vm_prev = NULL;
362 tmp->vm_next = NULL;
363 file = tmp->vm_file; 364 file = tmp->vm_file;
364 if (file) { 365 if (file) {
365 struct inode *inode = file->f_path.dentry->d_inode; 366 struct inode *inode = file->f_path.dentry->d_inode;
@@ -392,6 +393,8 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
392 */ 393 */
393 *pprev = tmp; 394 *pprev = tmp;
394 pprev = &tmp->vm_next; 395 pprev = &tmp->vm_next;
396 tmp->vm_prev = prev;
397 prev = tmp;
395 398
396 __vma_link_rb(mm, tmp, rb_link, rb_parent); 399 __vma_link_rb(mm, tmp, rb_link, rb_parent);
397 rb_link = &tmp->vm_rb.rb_right; 400 rb_link = &tmp->vm_rb.rb_right;
@@ -752,13 +755,13 @@ static int copy_fs(unsigned long clone_flags, struct task_struct *tsk)
752 struct fs_struct *fs = current->fs; 755 struct fs_struct *fs = current->fs;
753 if (clone_flags & CLONE_FS) { 756 if (clone_flags & CLONE_FS) {
754 /* tsk->fs is already what we want */ 757 /* tsk->fs is already what we want */
755 write_lock(&fs->lock); 758 spin_lock(&fs->lock);
756 if (fs->in_exec) { 759 if (fs->in_exec) {
757 write_unlock(&fs->lock); 760 spin_unlock(&fs->lock);
758 return -EAGAIN; 761 return -EAGAIN;
759 } 762 }
760 fs->users++; 763 fs->users++;
761 write_unlock(&fs->lock); 764 spin_unlock(&fs->lock);
762 return 0; 765 return 0;
763 } 766 }
764 tsk->fs = copy_fs_struct(fs); 767 tsk->fs = copy_fs_struct(fs);
@@ -1676,13 +1679,13 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
1676 1679
1677 if (new_fs) { 1680 if (new_fs) {
1678 fs = current->fs; 1681 fs = current->fs;
1679 write_lock(&fs->lock); 1682 spin_lock(&fs->lock);
1680 current->fs = new_fs; 1683 current->fs = new_fs;
1681 if (--fs->users) 1684 if (--fs->users)
1682 new_fs = NULL; 1685 new_fs = NULL;
1683 else 1686 else
1684 new_fs = fs; 1687 new_fs = fs;
1685 write_unlock(&fs->lock); 1688 spin_unlock(&fs->lock);
1686 } 1689 }
1687 1690
1688 if (new_mm) { 1691 if (new_mm) {
diff --git a/kernel/gcov/fs.c b/kernel/gcov/fs.c
index ef3c3f88a7a3..f83972b16564 100644
--- a/kernel/gcov/fs.c
+++ b/kernel/gcov/fs.c
@@ -33,10 +33,11 @@
33 * @children: child nodes 33 * @children: child nodes
34 * @all: list head for list of all nodes 34 * @all: list head for list of all nodes
35 * @parent: parent node 35 * @parent: parent node
36 * @info: associated profiling data structure if not a directory 36 * @loaded_info: array of pointers to profiling data sets for loaded object
37 * @ghost: when an object file containing profiling data is unloaded we keep a 37 * files.
38 * copy of the profiling data here to allow collecting coverage data 38 * @num_loaded: number of profiling data sets for loaded object files.
39 * for cleanup code. Such a node is called a "ghost". 39 * @unloaded_info: accumulated copy of profiling data sets for unloaded
40 * object files. Used only when gcov_persist=1.
40 * @dentry: main debugfs entry, either a directory or data file 41 * @dentry: main debugfs entry, either a directory or data file
41 * @links: associated symbolic links 42 * @links: associated symbolic links
42 * @name: data file basename 43 * @name: data file basename
@@ -51,10 +52,11 @@ struct gcov_node {
51 struct list_head children; 52 struct list_head children;
52 struct list_head all; 53 struct list_head all;
53 struct gcov_node *parent; 54 struct gcov_node *parent;
54 struct gcov_info *info; 55 struct gcov_info **loaded_info;
55 struct gcov_info *ghost; 56 struct gcov_info *unloaded_info;
56 struct dentry *dentry; 57 struct dentry *dentry;
57 struct dentry **links; 58 struct dentry **links;
59 int num_loaded;
58 char name[0]; 60 char name[0];
59}; 61};
60 62
@@ -136,16 +138,37 @@ static const struct seq_operations gcov_seq_ops = {
136}; 138};
137 139
138/* 140/*
139 * Return the profiling data set for a given node. This can either be the 141 * Return a profiling data set associated with the given node. This is
140 * original profiling data structure or a duplicate (also called "ghost") 142 * either a data set for a loaded object file or a data set copy in case
141 * in case the associated object file has been unloaded. 143 * all associated object files have been unloaded.
142 */ 144 */
143static struct gcov_info *get_node_info(struct gcov_node *node) 145static struct gcov_info *get_node_info(struct gcov_node *node)
144{ 146{
145 if (node->info) 147 if (node->num_loaded > 0)
146 return node->info; 148 return node->loaded_info[0];
147 149
148 return node->ghost; 150 return node->unloaded_info;
151}
152
153/*
154 * Return a newly allocated profiling data set which contains the sum of
155 * all profiling data associated with the given node.
156 */
157static struct gcov_info *get_accumulated_info(struct gcov_node *node)
158{
159 struct gcov_info *info;
160 int i = 0;
161
162 if (node->unloaded_info)
163 info = gcov_info_dup(node->unloaded_info);
164 else
165 info = gcov_info_dup(node->loaded_info[i++]);
166 if (!info)
167 return NULL;
168 for (; i < node->num_loaded; i++)
169 gcov_info_add(info, node->loaded_info[i]);
170
171 return info;
149} 172}
150 173
151/* 174/*
@@ -163,9 +186,10 @@ static int gcov_seq_open(struct inode *inode, struct file *file)
163 mutex_lock(&node_lock); 186 mutex_lock(&node_lock);
164 /* 187 /*
165 * Read from a profiling data copy to minimize reference tracking 188 * Read from a profiling data copy to minimize reference tracking
166 * complexity and concurrent access. 189 * complexity and concurrent access and to keep accumulating multiple
190 * profiling data sets associated with one node simple.
167 */ 191 */
168 info = gcov_info_dup(get_node_info(node)); 192 info = get_accumulated_info(node);
169 if (!info) 193 if (!info)
170 goto out_unlock; 194 goto out_unlock;
171 iter = gcov_iter_new(info); 195 iter = gcov_iter_new(info);
@@ -225,12 +249,25 @@ static struct gcov_node *get_node_by_name(const char *name)
225 return NULL; 249 return NULL;
226} 250}
227 251
252/*
253 * Reset all profiling data associated with the specified node.
254 */
255static void reset_node(struct gcov_node *node)
256{
257 int i;
258
259 if (node->unloaded_info)
260 gcov_info_reset(node->unloaded_info);
261 for (i = 0; i < node->num_loaded; i++)
262 gcov_info_reset(node->loaded_info[i]);
263}
264
228static void remove_node(struct gcov_node *node); 265static void remove_node(struct gcov_node *node);
229 266
230/* 267/*
231 * write() implementation for gcov data files. Reset profiling data for the 268 * write() implementation for gcov data files. Reset profiling data for the
232 * associated file. If the object file has been unloaded (i.e. this is 269 * corresponding file. If all associated object files have been unloaded,
233 * a "ghost" node), remove the debug fs node as well. 270 * remove the debug fs node as well.
234 */ 271 */
235static ssize_t gcov_seq_write(struct file *file, const char __user *addr, 272static ssize_t gcov_seq_write(struct file *file, const char __user *addr,
236 size_t len, loff_t *pos) 273 size_t len, loff_t *pos)
@@ -245,10 +282,10 @@ static ssize_t gcov_seq_write(struct file *file, const char __user *addr,
245 node = get_node_by_name(info->filename); 282 node = get_node_by_name(info->filename);
246 if (node) { 283 if (node) {
247 /* Reset counts or remove node for unloaded modules. */ 284 /* Reset counts or remove node for unloaded modules. */
248 if (node->ghost) 285 if (node->num_loaded == 0)
249 remove_node(node); 286 remove_node(node);
250 else 287 else
251 gcov_info_reset(node->info); 288 reset_node(node);
252 } 289 }
253 /* Reset counts for open file. */ 290 /* Reset counts for open file. */
254 gcov_info_reset(info); 291 gcov_info_reset(info);
@@ -378,7 +415,10 @@ static void init_node(struct gcov_node *node, struct gcov_info *info,
378 INIT_LIST_HEAD(&node->list); 415 INIT_LIST_HEAD(&node->list);
379 INIT_LIST_HEAD(&node->children); 416 INIT_LIST_HEAD(&node->children);
380 INIT_LIST_HEAD(&node->all); 417 INIT_LIST_HEAD(&node->all);
381 node->info = info; 418 if (node->loaded_info) {
419 node->loaded_info[0] = info;
420 node->num_loaded = 1;
421 }
382 node->parent = parent; 422 node->parent = parent;
383 if (name) 423 if (name)
384 strcpy(node->name, name); 424 strcpy(node->name, name);
@@ -394,9 +434,13 @@ static struct gcov_node *new_node(struct gcov_node *parent,
394 struct gcov_node *node; 434 struct gcov_node *node;
395 435
396 node = kzalloc(sizeof(struct gcov_node) + strlen(name) + 1, GFP_KERNEL); 436 node = kzalloc(sizeof(struct gcov_node) + strlen(name) + 1, GFP_KERNEL);
397 if (!node) { 437 if (!node)
398 pr_warning("out of memory\n"); 438 goto err_nomem;
399 return NULL; 439 if (info) {
440 node->loaded_info = kcalloc(1, sizeof(struct gcov_info *),
441 GFP_KERNEL);
442 if (!node->loaded_info)
443 goto err_nomem;
400 } 444 }
401 init_node(node, info, name, parent); 445 init_node(node, info, name, parent);
402 /* Differentiate between gcov data file nodes and directory nodes. */ 446 /* Differentiate between gcov data file nodes and directory nodes. */
@@ -416,6 +460,11 @@ static struct gcov_node *new_node(struct gcov_node *parent,
416 list_add(&node->all, &all_head); 460 list_add(&node->all, &all_head);
417 461
418 return node; 462 return node;
463
464err_nomem:
465 kfree(node);
466 pr_warning("out of memory\n");
467 return NULL;
419} 468}
420 469
421/* Remove symbolic links associated with node. */ 470/* Remove symbolic links associated with node. */
@@ -441,8 +490,9 @@ static void release_node(struct gcov_node *node)
441 list_del(&node->all); 490 list_del(&node->all);
442 debugfs_remove(node->dentry); 491 debugfs_remove(node->dentry);
443 remove_links(node); 492 remove_links(node);
444 if (node->ghost) 493 kfree(node->loaded_info);
445 gcov_info_free(node->ghost); 494 if (node->unloaded_info)
495 gcov_info_free(node->unloaded_info);
446 kfree(node); 496 kfree(node);
447} 497}
448 498
@@ -477,7 +527,7 @@ static struct gcov_node *get_child_by_name(struct gcov_node *parent,
477 527
478/* 528/*
479 * write() implementation for reset file. Reset all profiling data to zero 529 * write() implementation for reset file. Reset all profiling data to zero
480 * and remove ghost nodes. 530 * and remove nodes for which all associated object files are unloaded.
481 */ 531 */
482static ssize_t reset_write(struct file *file, const char __user *addr, 532static ssize_t reset_write(struct file *file, const char __user *addr,
483 size_t len, loff_t *pos) 533 size_t len, loff_t *pos)
@@ -487,8 +537,8 @@ static ssize_t reset_write(struct file *file, const char __user *addr,
487 mutex_lock(&node_lock); 537 mutex_lock(&node_lock);
488restart: 538restart:
489 list_for_each_entry(node, &all_head, all) { 539 list_for_each_entry(node, &all_head, all) {
490 if (node->info) 540 if (node->num_loaded > 0)
491 gcov_info_reset(node->info); 541 reset_node(node);
492 else if (list_empty(&node->children)) { 542 else if (list_empty(&node->children)) {
493 remove_node(node); 543 remove_node(node);
494 /* Several nodes may have gone - restart loop. */ 544 /* Several nodes may have gone - restart loop. */
@@ -564,37 +614,115 @@ err_remove:
564} 614}
565 615
566/* 616/*
567 * The profiling data set associated with this node is being unloaded. Store a 617 * Associate a profiling data set with an existing node. Needs to be called
568 * copy of the profiling data and turn this node into a "ghost". 618 * with node_lock held.
569 */ 619 */
570static int ghost_node(struct gcov_node *node) 620static void add_info(struct gcov_node *node, struct gcov_info *info)
571{ 621{
572 node->ghost = gcov_info_dup(node->info); 622 struct gcov_info **loaded_info;
573 if (!node->ghost) { 623 int num = node->num_loaded;
574 pr_warning("could not save data for '%s' (out of memory)\n", 624
575 node->info->filename); 625 /*
576 return -ENOMEM; 626 * Prepare new array. This is done first to simplify cleanup in
627 * case the new data set is incompatible, the node only contains
628 * unloaded data sets and there's not enough memory for the array.
629 */
630 loaded_info = kcalloc(num + 1, sizeof(struct gcov_info *), GFP_KERNEL);
631 if (!loaded_info) {
632 pr_warning("could not add '%s' (out of memory)\n",
633 info->filename);
634 return;
635 }
636 memcpy(loaded_info, node->loaded_info,
637 num * sizeof(struct gcov_info *));
638 loaded_info[num] = info;
639 /* Check if the new data set is compatible. */
640 if (num == 0) {
641 /*
642 * A module was unloaded, modified and reloaded. The new
643 * data set replaces the copy of the last one.
644 */
645 if (!gcov_info_is_compatible(node->unloaded_info, info)) {
646 pr_warning("discarding saved data for %s "
647 "(incompatible version)\n", info->filename);
648 gcov_info_free(node->unloaded_info);
649 node->unloaded_info = NULL;
650 }
651 } else {
652 /*
653 * Two different versions of the same object file are loaded.
654 * The initial one takes precedence.
655 */
656 if (!gcov_info_is_compatible(node->loaded_info[0], info)) {
657 pr_warning("could not add '%s' (incompatible "
658 "version)\n", info->filename);
659 kfree(loaded_info);
660 return;
661 }
577 } 662 }
578 node->info = NULL; 663 /* Overwrite previous array. */
664 kfree(node->loaded_info);
665 node->loaded_info = loaded_info;
666 node->num_loaded = num + 1;
667}
579 668
580 return 0; 669/*
670 * Return the index of a profiling data set associated with a node.
671 */
672static int get_info_index(struct gcov_node *node, struct gcov_info *info)
673{
674 int i;
675
676 for (i = 0; i < node->num_loaded; i++) {
677 if (node->loaded_info[i] == info)
678 return i;
679 }
680 return -ENOENT;
581} 681}
582 682
583/* 683/*
584 * Profiling data for this node has been loaded again. Add profiling data 684 * Save the data of a profiling data set which is being unloaded.
585 * from previous instantiation and turn this node into a regular node.
586 */ 685 */
587static void revive_node(struct gcov_node *node, struct gcov_info *info) 686static void save_info(struct gcov_node *node, struct gcov_info *info)
588{ 687{
589 if (gcov_info_is_compatible(node->ghost, info)) 688 if (node->unloaded_info)
590 gcov_info_add(info, node->ghost); 689 gcov_info_add(node->unloaded_info, info);
591 else { 690 else {
592 pr_warning("discarding saved data for '%s' (version changed)\n", 691 node->unloaded_info = gcov_info_dup(info);
692 if (!node->unloaded_info) {
693 pr_warning("could not save data for '%s' "
694 "(out of memory)\n", info->filename);
695 }
696 }
697}
698
699/*
700 * Disassociate a profiling data set from a node. Needs to be called with
701 * node_lock held.
702 */
703static void remove_info(struct gcov_node *node, struct gcov_info *info)
704{
705 int i;
706
707 i = get_info_index(node, info);
708 if (i < 0) {
709 pr_warning("could not remove '%s' (not found)\n",
593 info->filename); 710 info->filename);
711 return;
594 } 712 }
595 gcov_info_free(node->ghost); 713 if (gcov_persist)
596 node->ghost = NULL; 714 save_info(node, info);
597 node->info = info; 715 /* Shrink array. */
716 node->loaded_info[i] = node->loaded_info[node->num_loaded - 1];
717 node->num_loaded--;
718 if (node->num_loaded > 0)
719 return;
720 /* Last loaded data set was removed. */
721 kfree(node->loaded_info);
722 node->loaded_info = NULL;
723 node->num_loaded = 0;
724 if (!node->unloaded_info)
725 remove_node(node);
598} 726}
599 727
600/* 728/*
@@ -609,30 +737,18 @@ void gcov_event(enum gcov_action action, struct gcov_info *info)
609 node = get_node_by_name(info->filename); 737 node = get_node_by_name(info->filename);
610 switch (action) { 738 switch (action) {
611 case GCOV_ADD: 739 case GCOV_ADD:
612 /* Add new node or revive ghost. */ 740 if (node)
613 if (!node) { 741 add_info(node, info);
742 else
614 add_node(info); 743 add_node(info);
615 break;
616 }
617 if (gcov_persist)
618 revive_node(node, info);
619 else {
620 pr_warning("could not add '%s' (already exists)\n",
621 info->filename);
622 }
623 break; 744 break;
624 case GCOV_REMOVE: 745 case GCOV_REMOVE:
625 /* Remove node or turn into ghost. */ 746 if (node)
626 if (!node) { 747 remove_info(node, info);
748 else {
627 pr_warning("could not remove '%s' (not found)\n", 749 pr_warning("could not remove '%s' (not found)\n",
628 info->filename); 750 info->filename);
629 break;
630 } 751 }
631 if (gcov_persist) {
632 if (!ghost_node(node))
633 break;
634 }
635 remove_node(node);
636 break; 752 break;
637 } 753 }
638 mutex_unlock(&node_lock); 754 mutex_unlock(&node_lock);
diff --git a/kernel/groups.c b/kernel/groups.c
index 53b1916c9492..253dc0f35cf4 100644
--- a/kernel/groups.c
+++ b/kernel/groups.c
@@ -143,10 +143,9 @@ int groups_search(const struct group_info *group_info, gid_t grp)
143 right = group_info->ngroups; 143 right = group_info->ngroups;
144 while (left < right) { 144 while (left < right) {
145 unsigned int mid = (left+right)/2; 145 unsigned int mid = (left+right)/2;
146 int cmp = grp - GROUP_AT(group_info, mid); 146 if (grp > GROUP_AT(group_info, mid))
147 if (cmp > 0)
148 left = mid + 1; 147 left = mid + 1;
149 else if (cmp < 0) 148 else if (grp < GROUP_AT(group_info, mid))
150 right = mid; 149 right = mid;
151 else 150 else
152 return 1; 151 return 1;
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index ce669174f355..1decafbb6b1a 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -1091,11 +1091,10 @@ EXPORT_SYMBOL_GPL(hrtimer_cancel);
1091 */ 1091 */
1092ktime_t hrtimer_get_remaining(const struct hrtimer *timer) 1092ktime_t hrtimer_get_remaining(const struct hrtimer *timer)
1093{ 1093{
1094 struct hrtimer_clock_base *base;
1095 unsigned long flags; 1094 unsigned long flags;
1096 ktime_t rem; 1095 ktime_t rem;
1097 1096
1098 base = lock_hrtimer_base(timer, &flags); 1097 lock_hrtimer_base(timer, &flags);
1099 rem = hrtimer_expires_remaining(timer); 1098 rem = hrtimer_expires_remaining(timer);
1100 unlock_hrtimer_base(timer, &flags); 1099 unlock_hrtimer_base(timer, &flags);
1101 1100
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c
index d71a987fd2bf..c7c2aed9e2dc 100644
--- a/kernel/hw_breakpoint.c
+++ b/kernel/hw_breakpoint.c
@@ -433,7 +433,8 @@ register_user_hw_breakpoint(struct perf_event_attr *attr,
433 perf_overflow_handler_t triggered, 433 perf_overflow_handler_t triggered,
434 struct task_struct *tsk) 434 struct task_struct *tsk)
435{ 435{
436 return perf_event_create_kernel_counter(attr, -1, tsk->pid, triggered); 436 return perf_event_create_kernel_counter(attr, -1, task_pid_vnr(tsk),
437 triggered);
437} 438}
438EXPORT_SYMBOL_GPL(register_user_hw_breakpoint); 439EXPORT_SYMBOL_GPL(register_user_hw_breakpoint);
439 440
diff --git a/kernel/kfifo.c b/kernel/kfifo.c
index 4502604ecadf..01a0700e873f 100644
--- a/kernel/kfifo.c
+++ b/kernel/kfifo.c
@@ -365,8 +365,6 @@ static unsigned int setup_sgl(struct __kfifo *fifo, struct scatterlist *sgl,
365 n = setup_sgl_buf(sgl, fifo->data + off, nents, l); 365 n = setup_sgl_buf(sgl, fifo->data + off, nents, l);
366 n += setup_sgl_buf(sgl + n, fifo->data, nents - n, len - l); 366 n += setup_sgl_buf(sgl + n, fifo->data, nents - n, len - l);
367 367
368 if (n)
369 sg_mark_end(sgl + n - 1);
370 return n; 368 return n;
371} 369}
372 370
@@ -503,6 +501,15 @@ unsigned int __kfifo_out_r(struct __kfifo *fifo, void *buf,
503} 501}
504EXPORT_SYMBOL(__kfifo_out_r); 502EXPORT_SYMBOL(__kfifo_out_r);
505 503
504void __kfifo_skip_r(struct __kfifo *fifo, size_t recsize)
505{
506 unsigned int n;
507
508 n = __kfifo_peek_n(fifo, recsize);
509 fifo->out += n + recsize;
510}
511EXPORT_SYMBOL(__kfifo_skip_r);
512
506int __kfifo_from_user_r(struct __kfifo *fifo, const void __user *from, 513int __kfifo_from_user_r(struct __kfifo *fifo, const void __user *from,
507 unsigned long len, unsigned int *copied, size_t recsize) 514 unsigned long len, unsigned int *copied, size_t recsize)
508{ 515{
diff --git a/kernel/kmod.c b/kernel/kmod.c
index 6e9b19667a8d..9cd0591c96a2 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -153,7 +153,9 @@ static int ____call_usermodehelper(void *data)
153 goto fail; 153 goto fail;
154 } 154 }
155 155
156 retval = kernel_execve(sub_info->path, sub_info->argv, sub_info->envp); 156 retval = kernel_execve(sub_info->path,
157 (const char *const *)sub_info->argv,
158 (const char *const *)sub_info->envp);
157 159
158 /* Exec failed? */ 160 /* Exec failed? */
159fail: 161fail:
diff --git a/kernel/module.c b/kernel/module.c
index d0b5f8db11b4..ccd641991842 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1537,6 +1537,7 @@ static int __unlink_module(void *_mod)
1537{ 1537{
1538 struct module *mod = _mod; 1538 struct module *mod = _mod;
1539 list_del(&mod->list); 1539 list_del(&mod->list);
1540 module_bug_cleanup(mod);
1540 return 0; 1541 return 0;
1541} 1542}
1542 1543
@@ -2625,6 +2626,7 @@ static struct module *load_module(void __user *umod,
2625 if (err < 0) 2626 if (err < 0)
2626 goto ddebug; 2627 goto ddebug;
2627 2628
2629 module_bug_finalize(info.hdr, info.sechdrs, mod);
2628 list_add_rcu(&mod->list, &modules); 2630 list_add_rcu(&mod->list, &modules);
2629 mutex_unlock(&module_mutex); 2631 mutex_unlock(&module_mutex);
2630 2632
@@ -2650,6 +2652,8 @@ static struct module *load_module(void __user *umod,
2650 mutex_lock(&module_mutex); 2652 mutex_lock(&module_mutex);
2651 /* Unlink carefully: kallsyms could be walking list. */ 2653 /* Unlink carefully: kallsyms could be walking list. */
2652 list_del_rcu(&mod->list); 2654 list_del_rcu(&mod->list);
2655 module_bug_cleanup(mod);
2656
2653 ddebug: 2657 ddebug:
2654 if (!mod->taints) 2658 if (!mod->taints)
2655 dynamic_debug_remove(info.debug); 2659 dynamic_debug_remove(info.debug);
diff --git a/kernel/mutex.c b/kernel/mutex.c
index 4c0b7b3e6d2e..200407c1502f 100644
--- a/kernel/mutex.c
+++ b/kernel/mutex.c
@@ -36,15 +36,6 @@
36# include <asm/mutex.h> 36# include <asm/mutex.h>
37#endif 37#endif
38 38
39/***
40 * mutex_init - initialize the mutex
41 * @lock: the mutex to be initialized
42 * @key: the lock_class_key for the class; used by mutex lock debugging
43 *
44 * Initialize the mutex to unlocked state.
45 *
46 * It is not allowed to initialize an already locked mutex.
47 */
48void 39void
49__mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key) 40__mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key)
50{ 41{
@@ -68,7 +59,7 @@ EXPORT_SYMBOL(__mutex_init);
68static __used noinline void __sched 59static __used noinline void __sched
69__mutex_lock_slowpath(atomic_t *lock_count); 60__mutex_lock_slowpath(atomic_t *lock_count);
70 61
71/*** 62/**
72 * mutex_lock - acquire the mutex 63 * mutex_lock - acquire the mutex
73 * @lock: the mutex to be acquired 64 * @lock: the mutex to be acquired
74 * 65 *
@@ -105,7 +96,7 @@ EXPORT_SYMBOL(mutex_lock);
105 96
106static __used noinline void __sched __mutex_unlock_slowpath(atomic_t *lock_count); 97static __used noinline void __sched __mutex_unlock_slowpath(atomic_t *lock_count);
107 98
108/*** 99/**
109 * mutex_unlock - release the mutex 100 * mutex_unlock - release the mutex
110 * @lock: the mutex to be released 101 * @lock: the mutex to be released
111 * 102 *
@@ -364,8 +355,8 @@ __mutex_lock_killable_slowpath(atomic_t *lock_count);
364static noinline int __sched 355static noinline int __sched
365__mutex_lock_interruptible_slowpath(atomic_t *lock_count); 356__mutex_lock_interruptible_slowpath(atomic_t *lock_count);
366 357
367/*** 358/**
368 * mutex_lock_interruptible - acquire the mutex, interruptable 359 * mutex_lock_interruptible - acquire the mutex, interruptible
369 * @lock: the mutex to be acquired 360 * @lock: the mutex to be acquired
370 * 361 *
371 * Lock the mutex like mutex_lock(), and return 0 if the mutex has 362 * Lock the mutex like mutex_lock(), and return 0 if the mutex has
@@ -456,15 +447,15 @@ static inline int __mutex_trylock_slowpath(atomic_t *lock_count)
456 return prev == 1; 447 return prev == 1;
457} 448}
458 449
459/*** 450/**
460 * mutex_trylock - try acquire the mutex, without waiting 451 * mutex_trylock - try to acquire the mutex, without waiting
461 * @lock: the mutex to be acquired 452 * @lock: the mutex to be acquired
462 * 453 *
463 * Try to acquire the mutex atomically. Returns 1 if the mutex 454 * Try to acquire the mutex atomically. Returns 1 if the mutex
464 * has been acquired successfully, and 0 on contention. 455 * has been acquired successfully, and 0 on contention.
465 * 456 *
466 * NOTE: this function follows the spin_trylock() convention, so 457 * NOTE: this function follows the spin_trylock() convention, so
467 * it is negated to the down_trylock() return values! Be careful 458 * it is negated from the down_trylock() return values! Be careful
468 * about this when converting semaphore users to mutexes. 459 * about this when converting semaphore users to mutexes.
469 * 460 *
470 * This function must not be used in interrupt context. The 461 * This function must not be used in interrupt context. The
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 403d1804b198..db5b56064687 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -402,11 +402,31 @@ static void perf_group_detach(struct perf_event *event)
402 } 402 }
403} 403}
404 404
405static inline int
406event_filter_match(struct perf_event *event)
407{
408 return event->cpu == -1 || event->cpu == smp_processor_id();
409}
410
405static void 411static void
406event_sched_out(struct perf_event *event, 412event_sched_out(struct perf_event *event,
407 struct perf_cpu_context *cpuctx, 413 struct perf_cpu_context *cpuctx,
408 struct perf_event_context *ctx) 414 struct perf_event_context *ctx)
409{ 415{
416 u64 delta;
417 /*
418 * An event which could not be activated because of
419 * filter mismatch still needs to have its timings
420 * maintained, otherwise bogus information is return
421 * via read() for time_enabled, time_running:
422 */
423 if (event->state == PERF_EVENT_STATE_INACTIVE
424 && !event_filter_match(event)) {
425 delta = ctx->time - event->tstamp_stopped;
426 event->tstamp_running += delta;
427 event->tstamp_stopped = ctx->time;
428 }
429
410 if (event->state != PERF_EVENT_STATE_ACTIVE) 430 if (event->state != PERF_EVENT_STATE_ACTIVE)
411 return; 431 return;
412 432
@@ -432,9 +452,7 @@ group_sched_out(struct perf_event *group_event,
432 struct perf_event_context *ctx) 452 struct perf_event_context *ctx)
433{ 453{
434 struct perf_event *event; 454 struct perf_event *event;
435 455 int state = group_event->state;
436 if (group_event->state != PERF_EVENT_STATE_ACTIVE)
437 return;
438 456
439 event_sched_out(group_event, cpuctx, ctx); 457 event_sched_out(group_event, cpuctx, ctx);
440 458
@@ -444,7 +462,7 @@ group_sched_out(struct perf_event *group_event,
444 list_for_each_entry(event, &group_event->sibling_list, group_entry) 462 list_for_each_entry(event, &group_event->sibling_list, group_entry)
445 event_sched_out(event, cpuctx, ctx); 463 event_sched_out(event, cpuctx, ctx);
446 464
447 if (group_event->attr.exclusive) 465 if (state == PERF_EVENT_STATE_ACTIVE && group_event->attr.exclusive)
448 cpuctx->exclusive = 0; 466 cpuctx->exclusive = 0;
449} 467}
450 468
@@ -5743,15 +5761,15 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
5743{ 5761{
5744 unsigned int cpu = (long)hcpu; 5762 unsigned int cpu = (long)hcpu;
5745 5763
5746 switch (action) { 5764 switch (action & ~CPU_TASKS_FROZEN) {
5747 5765
5748 case CPU_UP_PREPARE: 5766 case CPU_UP_PREPARE:
5749 case CPU_UP_PREPARE_FROZEN: 5767 case CPU_DOWN_FAILED:
5750 perf_event_init_cpu(cpu); 5768 perf_event_init_cpu(cpu);
5751 break; 5769 break;
5752 5770
5771 case CPU_UP_CANCELED:
5753 case CPU_DOWN_PREPARE: 5772 case CPU_DOWN_PREPARE:
5754 case CPU_DOWN_PREPARE_FROZEN:
5755 perf_event_exit_cpu(cpu); 5773 perf_event_exit_cpu(cpu);
5756 break; 5774 break;
5757 5775
diff --git a/kernel/pm_qos_params.c b/kernel/pm_qos_params.c
index 996a4dec5f96..645e541a45f6 100644
--- a/kernel/pm_qos_params.c
+++ b/kernel/pm_qos_params.c
@@ -212,15 +212,17 @@ EXPORT_SYMBOL_GPL(pm_qos_request_active);
212 212
213/** 213/**
214 * pm_qos_add_request - inserts new qos request into the list 214 * pm_qos_add_request - inserts new qos request into the list
215 * @pm_qos_class: identifies which list of qos request to us 215 * @dep: pointer to a preallocated handle
216 * @pm_qos_class: identifies which list of qos request to use
216 * @value: defines the qos request 217 * @value: defines the qos request
217 * 218 *
218 * This function inserts a new entry in the pm_qos_class list of requested qos 219 * This function inserts a new entry in the pm_qos_class list of requested qos
219 * performance characteristics. It recomputes the aggregate QoS expectations 220 * performance characteristics. It recomputes the aggregate QoS expectations
220 * for the pm_qos_class of parameters, and returns the pm_qos_request list 221 * for the pm_qos_class of parameters and initializes the pm_qos_request_list
221 * element as a handle for use in updating and removal. Call needs to save 222 * handle. Caller needs to save this handle for later use in updates and
222 * this handle for later use. 223 * removal.
223 */ 224 */
225
224void pm_qos_add_request(struct pm_qos_request_list *dep, 226void pm_qos_add_request(struct pm_qos_request_list *dep,
225 int pm_qos_class, s32 value) 227 int pm_qos_class, s32 value)
226{ 228{
@@ -348,7 +350,7 @@ static int pm_qos_power_open(struct inode *inode, struct file *filp)
348 350
349 pm_qos_class = find_pm_qos_object_by_minor(iminor(inode)); 351 pm_qos_class = find_pm_qos_object_by_minor(iminor(inode));
350 if (pm_qos_class >= 0) { 352 if (pm_qos_class >= 0) {
351 struct pm_qos_request_list *req = kzalloc(GFP_KERNEL, sizeof(*req)); 353 struct pm_qos_request_list *req = kzalloc(sizeof(*req), GFP_KERNEL);
352 if (!req) 354 if (!req)
353 return -ENOMEM; 355 return -ENOMEM;
354 356
@@ -387,10 +389,12 @@ static ssize_t pm_qos_power_write(struct file *filp, const char __user *buf,
387 } else if (count == 11) { /* len('0x12345678/0') */ 389 } else if (count == 11) { /* len('0x12345678/0') */
388 if (copy_from_user(ascii_value, buf, 11)) 390 if (copy_from_user(ascii_value, buf, 11))
389 return -EFAULT; 391 return -EFAULT;
392 if (strlen(ascii_value) != 10)
393 return -EINVAL;
390 x = sscanf(ascii_value, "%x", &value); 394 x = sscanf(ascii_value, "%x", &value);
391 if (x != 1) 395 if (x != 1)
392 return -EINVAL; 396 return -EINVAL;
393 pr_debug(KERN_ERR "%s, %d, 0x%x\n", ascii_value, x, value); 397 pr_debug("%s, %d, 0x%x\n", ascii_value, x, value);
394 } else 398 } else
395 return -EINVAL; 399 return -EINVAL;
396 400
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index c77963938bca..8dc31e02ae12 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -338,7 +338,6 @@ int hibernation_snapshot(int platform_mode)
338 goto Close; 338 goto Close;
339 339
340 suspend_console(); 340 suspend_console();
341 hibernation_freeze_swap();
342 saved_mask = clear_gfp_allowed_mask(GFP_IOFS); 341 saved_mask = clear_gfp_allowed_mask(GFP_IOFS);
343 error = dpm_suspend_start(PMSG_FREEZE); 342 error = dpm_suspend_start(PMSG_FREEZE);
344 if (error) 343 if (error)
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 5e7edfb05e66..d3f795f01bbc 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -1086,7 +1086,6 @@ void swsusp_free(void)
1086 buffer = NULL; 1086 buffer = NULL;
1087 alloc_normal = 0; 1087 alloc_normal = 0;
1088 alloc_highmem = 0; 1088 alloc_highmem = 0;
1089 hibernation_thaw_swap();
1090} 1089}
1091 1090
1092/* Helper functions used for the shrinking of memory. */ 1091/* Helper functions used for the shrinking of memory. */
@@ -1122,9 +1121,19 @@ static unsigned long preallocate_image_pages(unsigned long nr_pages, gfp_t mask)
1122 return nr_alloc; 1121 return nr_alloc;
1123} 1122}
1124 1123
1125static unsigned long preallocate_image_memory(unsigned long nr_pages) 1124static unsigned long preallocate_image_memory(unsigned long nr_pages,
1125 unsigned long avail_normal)
1126{ 1126{
1127 return preallocate_image_pages(nr_pages, GFP_IMAGE); 1127 unsigned long alloc;
1128
1129 if (avail_normal <= alloc_normal)
1130 return 0;
1131
1132 alloc = avail_normal - alloc_normal;
1133 if (nr_pages < alloc)
1134 alloc = nr_pages;
1135
1136 return preallocate_image_pages(alloc, GFP_IMAGE);
1128} 1137}
1129 1138
1130#ifdef CONFIG_HIGHMEM 1139#ifdef CONFIG_HIGHMEM
@@ -1170,15 +1179,22 @@ static inline unsigned long preallocate_highmem_fraction(unsigned long nr_pages,
1170 */ 1179 */
1171static void free_unnecessary_pages(void) 1180static void free_unnecessary_pages(void)
1172{ 1181{
1173 unsigned long save_highmem, to_free_normal, to_free_highmem; 1182 unsigned long save, to_free_normal, to_free_highmem;
1174 1183
1175 to_free_normal = alloc_normal - count_data_pages(); 1184 save = count_data_pages();
1176 save_highmem = count_highmem_pages(); 1185 if (alloc_normal >= save) {
1177 if (alloc_highmem > save_highmem) { 1186 to_free_normal = alloc_normal - save;
1178 to_free_highmem = alloc_highmem - save_highmem; 1187 save = 0;
1188 } else {
1189 to_free_normal = 0;
1190 save -= alloc_normal;
1191 }
1192 save += count_highmem_pages();
1193 if (alloc_highmem >= save) {
1194 to_free_highmem = alloc_highmem - save;
1179 } else { 1195 } else {
1180 to_free_highmem = 0; 1196 to_free_highmem = 0;
1181 to_free_normal -= save_highmem - alloc_highmem; 1197 to_free_normal -= save - alloc_highmem;
1182 } 1198 }
1183 1199
1184 memory_bm_position_reset(&copy_bm); 1200 memory_bm_position_reset(&copy_bm);
@@ -1259,7 +1275,7 @@ int hibernate_preallocate_memory(void)
1259{ 1275{
1260 struct zone *zone; 1276 struct zone *zone;
1261 unsigned long saveable, size, max_size, count, highmem, pages = 0; 1277 unsigned long saveable, size, max_size, count, highmem, pages = 0;
1262 unsigned long alloc, save_highmem, pages_highmem; 1278 unsigned long alloc, save_highmem, pages_highmem, avail_normal;
1263 struct timeval start, stop; 1279 struct timeval start, stop;
1264 int error; 1280 int error;
1265 1281
@@ -1296,6 +1312,7 @@ int hibernate_preallocate_memory(void)
1296 else 1312 else
1297 count += zone_page_state(zone, NR_FREE_PAGES); 1313 count += zone_page_state(zone, NR_FREE_PAGES);
1298 } 1314 }
1315 avail_normal = count;
1299 count += highmem; 1316 count += highmem;
1300 count -= totalreserve_pages; 1317 count -= totalreserve_pages;
1301 1318
@@ -1310,12 +1327,21 @@ int hibernate_preallocate_memory(void)
1310 */ 1327 */
1311 if (size >= saveable) { 1328 if (size >= saveable) {
1312 pages = preallocate_image_highmem(save_highmem); 1329 pages = preallocate_image_highmem(save_highmem);
1313 pages += preallocate_image_memory(saveable - pages); 1330 pages += preallocate_image_memory(saveable - pages, avail_normal);
1314 goto out; 1331 goto out;
1315 } 1332 }
1316 1333
1317 /* Estimate the minimum size of the image. */ 1334 /* Estimate the minimum size of the image. */
1318 pages = minimum_image_size(saveable); 1335 pages = minimum_image_size(saveable);
1336 /*
1337 * To avoid excessive pressure on the normal zone, leave room in it to
1338 * accommodate an image of the minimum size (unless it's already too
1339 * small, in which case don't preallocate pages from it at all).
1340 */
1341 if (avail_normal > pages)
1342 avail_normal -= pages;
1343 else
1344 avail_normal = 0;
1319 if (size < pages) 1345 if (size < pages)
1320 size = min_t(unsigned long, pages, max_size); 1346 size = min_t(unsigned long, pages, max_size);
1321 1347
@@ -1336,16 +1362,34 @@ int hibernate_preallocate_memory(void)
1336 */ 1362 */
1337 pages_highmem = preallocate_image_highmem(highmem / 2); 1363 pages_highmem = preallocate_image_highmem(highmem / 2);
1338 alloc = (count - max_size) - pages_highmem; 1364 alloc = (count - max_size) - pages_highmem;
1339 pages = preallocate_image_memory(alloc); 1365 pages = preallocate_image_memory(alloc, avail_normal);
1340 if (pages < alloc) 1366 if (pages < alloc) {
1341 goto err_out; 1367 /* We have exhausted non-highmem pages, try highmem. */
1342 size = max_size - size; 1368 alloc -= pages;
1343 alloc = size; 1369 pages += pages_highmem;
1344 size = preallocate_highmem_fraction(size, highmem, count); 1370 pages_highmem = preallocate_image_highmem(alloc);
1345 pages_highmem += size; 1371 if (pages_highmem < alloc)
1346 alloc -= size; 1372 goto err_out;
1347 pages += preallocate_image_memory(alloc); 1373 pages += pages_highmem;
1348 pages += pages_highmem; 1374 /*
1375 * size is the desired number of saveable pages to leave in
1376 * memory, so try to preallocate (all memory - size) pages.
1377 */
1378 alloc = (count - pages) - size;
1379 pages += preallocate_image_highmem(alloc);
1380 } else {
1381 /*
1382 * There are approximately max_size saveable pages at this point
1383 * and we want to reduce this number down to size.
1384 */
1385 alloc = max_size - size;
1386 size = preallocate_highmem_fraction(alloc, highmem, count);
1387 pages_highmem += size;
1388 alloc -= size;
1389 size = preallocate_image_memory(alloc, avail_normal);
1390 pages_highmem += preallocate_image_highmem(alloc - size);
1391 pages += pages_highmem + size;
1392 }
1349 1393
1350 /* 1394 /*
1351 * We only need as many page frames for the image as there are saveable 1395 * We only need as many page frames for the image as there are saveable
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index 5d0059eed3e4..e6a5bdf61a37 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -136,10 +136,10 @@ sector_t alloc_swapdev_block(int swap)
136{ 136{
137 unsigned long offset; 137 unsigned long offset;
138 138
139 offset = swp_offset(get_swap_for_hibernation(swap)); 139 offset = swp_offset(get_swap_page_of_type(swap));
140 if (offset) { 140 if (offset) {
141 if (swsusp_extents_insert(offset)) 141 if (swsusp_extents_insert(offset))
142 swap_free_for_hibernation(swp_entry(swap, offset)); 142 swap_free(swp_entry(swap, offset));
143 else 143 else
144 return swapdev_block(swap, offset); 144 return swapdev_block(swap, offset);
145 } 145 }
@@ -163,7 +163,7 @@ void free_all_swap_pages(int swap)
163 ext = container_of(node, struct swsusp_extent, node); 163 ext = container_of(node, struct swsusp_extent, node);
164 rb_erase(node, &swsusp_extents); 164 rb_erase(node, &swsusp_extents);
165 for (offset = ext->start; offset <= ext->end; offset++) 165 for (offset = ext->start; offset <= ext->end; offset++)
166 swap_free_for_hibernation(swp_entry(swap, offset)); 166 swap_free(swp_entry(swap, offset));
167 167
168 kfree(ext); 168 kfree(ext);
169 } 169 }
diff --git a/kernel/sched.c b/kernel/sched.c
index 41541d79e3c8..dc85ceb90832 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1294,6 +1294,10 @@ static void resched_task(struct task_struct *p)
1294static void sched_rt_avg_update(struct rq *rq, u64 rt_delta) 1294static void sched_rt_avg_update(struct rq *rq, u64 rt_delta)
1295{ 1295{
1296} 1296}
1297
1298static void sched_avg_update(struct rq *rq)
1299{
1300}
1297#endif /* CONFIG_SMP */ 1301#endif /* CONFIG_SMP */
1298 1302
1299#if BITS_PER_LONG == 32 1303#if BITS_PER_LONG == 32
@@ -3182,6 +3186,8 @@ static void update_cpu_load(struct rq *this_rq)
3182 3186
3183 this_rq->cpu_load[i] = (old_load * (scale - 1) + new_load) >> i; 3187 this_rq->cpu_load[i] = (old_load * (scale - 1) + new_load) >> i;
3184 } 3188 }
3189
3190 sched_avg_update(this_rq);
3185} 3191}
3186 3192
3187static void update_cpu_load_active(struct rq *this_rq) 3193static void update_cpu_load_active(struct rq *this_rq)
@@ -3507,9 +3513,9 @@ void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
3507 rtime = nsecs_to_cputime(p->se.sum_exec_runtime); 3513 rtime = nsecs_to_cputime(p->se.sum_exec_runtime);
3508 3514
3509 if (total) { 3515 if (total) {
3510 u64 temp; 3516 u64 temp = rtime;
3511 3517
3512 temp = (u64)(rtime * utime); 3518 temp *= utime;
3513 do_div(temp, total); 3519 do_div(temp, total);
3514 utime = (cputime_t)temp; 3520 utime = (cputime_t)temp;
3515 } else 3521 } else
@@ -3540,9 +3546,9 @@ void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
3540 rtime = nsecs_to_cputime(cputime.sum_exec_runtime); 3546 rtime = nsecs_to_cputime(cputime.sum_exec_runtime);
3541 3547
3542 if (total) { 3548 if (total) {
3543 u64 temp; 3549 u64 temp = rtime;
3544 3550
3545 temp = (u64)(rtime * cputime.utime); 3551 temp *= cputime.utime;
3546 do_div(temp, total); 3552 do_div(temp, total);
3547 utime = (cputime_t)temp; 3553 utime = (cputime_t)temp;
3548 } else 3554 } else
@@ -3865,8 +3871,16 @@ int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner)
3865 /* 3871 /*
3866 * Owner changed, break to re-assess state. 3872 * Owner changed, break to re-assess state.
3867 */ 3873 */
3868 if (lock->owner != owner) 3874 if (lock->owner != owner) {
3875 /*
3876 * If the lock has switched to a different owner,
3877 * we likely have heavy contention. Return 0 to quit
3878 * optimistic spinning and not contend further:
3879 */
3880 if (lock->owner)
3881 return 0;
3869 break; 3882 break;
3883 }
3870 3884
3871 /* 3885 /*
3872 * Is that owner really running on that cpu? 3886 * Is that owner really running on that cpu?
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 806d1b227a21..db3f674ca49d 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -54,13 +54,13 @@ enum sched_tunable_scaling sysctl_sched_tunable_scaling
54 * Minimal preemption granularity for CPU-bound tasks: 54 * Minimal preemption granularity for CPU-bound tasks:
55 * (default: 2 msec * (1 + ilog(ncpus)), units: nanoseconds) 55 * (default: 2 msec * (1 + ilog(ncpus)), units: nanoseconds)
56 */ 56 */
57unsigned int sysctl_sched_min_granularity = 2000000ULL; 57unsigned int sysctl_sched_min_granularity = 750000ULL;
58unsigned int normalized_sysctl_sched_min_granularity = 2000000ULL; 58unsigned int normalized_sysctl_sched_min_granularity = 750000ULL;
59 59
60/* 60/*
61 * is kept at sysctl_sched_latency / sysctl_sched_min_granularity 61 * is kept at sysctl_sched_latency / sysctl_sched_min_granularity
62 */ 62 */
63static unsigned int sched_nr_latency = 3; 63static unsigned int sched_nr_latency = 8;
64 64
65/* 65/*
66 * After fork, child runs first. If set to 0 (default) then 66 * After fork, child runs first. If set to 0 (default) then
@@ -1313,7 +1313,7 @@ static struct sched_group *
1313find_idlest_group(struct sched_domain *sd, struct task_struct *p, 1313find_idlest_group(struct sched_domain *sd, struct task_struct *p,
1314 int this_cpu, int load_idx) 1314 int this_cpu, int load_idx)
1315{ 1315{
1316 struct sched_group *idlest = NULL, *this = NULL, *group = sd->groups; 1316 struct sched_group *idlest = NULL, *group = sd->groups;
1317 unsigned long min_load = ULONG_MAX, this_load = 0; 1317 unsigned long min_load = ULONG_MAX, this_load = 0;
1318 int imbalance = 100 + (sd->imbalance_pct-100)/2; 1318 int imbalance = 100 + (sd->imbalance_pct-100)/2;
1319 1319
@@ -1348,7 +1348,6 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p,
1348 1348
1349 if (local_group) { 1349 if (local_group) {
1350 this_load = avg_load; 1350 this_load = avg_load;
1351 this = group;
1352 } else if (avg_load < min_load) { 1351 } else if (avg_load < min_load) {
1353 min_load = avg_load; 1352 min_load = avg_load;
1354 idlest = group; 1353 idlest = group;
@@ -2268,8 +2267,6 @@ unsigned long scale_rt_power(int cpu)
2268 struct rq *rq = cpu_rq(cpu); 2267 struct rq *rq = cpu_rq(cpu);
2269 u64 total, available; 2268 u64 total, available;
2270 2269
2271 sched_avg_update(rq);
2272
2273 total = sched_avg_period() + (rq->clock - rq->age_stamp); 2270 total = sched_avg_period() + (rq->clock - rq->age_stamp);
2274 available = total - rq->rt_avg; 2271 available = total - rq->rt_avg;
2275 2272
@@ -3633,7 +3630,7 @@ static inline int nohz_kick_needed(struct rq *rq, int cpu)
3633 if (time_before(now, nohz.next_balance)) 3630 if (time_before(now, nohz.next_balance))
3634 return 0; 3631 return 0;
3635 3632
3636 if (!rq->nr_running) 3633 if (rq->idle_at_tick)
3637 return 0; 3634 return 0;
3638 3635
3639 first_pick_cpu = atomic_read(&nohz.first_pick_cpu); 3636 first_pick_cpu = atomic_read(&nohz.first_pick_cpu);
@@ -3752,6 +3749,8 @@ static void task_fork_fair(struct task_struct *p)
3752 3749
3753 raw_spin_lock_irqsave(&rq->lock, flags); 3750 raw_spin_lock_irqsave(&rq->lock, flags);
3754 3751
3752 update_rq_clock(rq);
3753
3755 if (unlikely(task_cpu(p) != this_cpu)) 3754 if (unlikely(task_cpu(p) != this_cpu))
3756 __set_task_cpu(p, this_cpu); 3755 __set_task_cpu(p, this_cpu);
3757 3756
diff --git a/kernel/signal.c b/kernel/signal.c
index bded65187780..919562c3d6b7 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2215,6 +2215,14 @@ int copy_siginfo_to_user(siginfo_t __user *to, siginfo_t *from)
2215#ifdef __ARCH_SI_TRAPNO 2215#ifdef __ARCH_SI_TRAPNO
2216 err |= __put_user(from->si_trapno, &to->si_trapno); 2216 err |= __put_user(from->si_trapno, &to->si_trapno);
2217#endif 2217#endif
2218#ifdef BUS_MCEERR_AO
2219 /*
2220 * Other callers might not initialize the si_lsb field,
2221 * so check explicitely for the right codes here.
2222 */
2223 if (from->si_code == BUS_MCEERR_AR || from->si_code == BUS_MCEERR_AO)
2224 err |= __put_user(from->si_addr_lsb, &to->si_addr_lsb);
2225#endif
2218 break; 2226 break;
2219 case __SI_CHLD: 2227 case __SI_CHLD:
2220 err |= __put_user(from->si_pid, &to->si_pid); 2228 err |= __put_user(from->si_pid, &to->si_pid);
diff --git a/kernel/smp.c b/kernel/smp.c
index 75c970c715d3..ed6aacfcb7ef 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -365,9 +365,10 @@ call:
365EXPORT_SYMBOL_GPL(smp_call_function_any); 365EXPORT_SYMBOL_GPL(smp_call_function_any);
366 366
367/** 367/**
368 * __smp_call_function_single(): Run a function on another CPU 368 * __smp_call_function_single(): Run a function on a specific CPU
369 * @cpu: The CPU to run on. 369 * @cpu: The CPU to run on.
370 * @data: Pre-allocated and setup data structure 370 * @data: Pre-allocated and setup data structure
371 * @wait: If true, wait until function has completed on specified CPU.
371 * 372 *
372 * Like smp_call_function_single(), but allow caller to pass in a 373 * Like smp_call_function_single(), but allow caller to pass in a
373 * pre-allocated data structure. Useful for embedding @data inside 374 * pre-allocated data structure. Useful for embedding @data inside
@@ -376,8 +377,10 @@ EXPORT_SYMBOL_GPL(smp_call_function_any);
376void __smp_call_function_single(int cpu, struct call_single_data *data, 377void __smp_call_function_single(int cpu, struct call_single_data *data,
377 int wait) 378 int wait)
378{ 379{
379 csd_lock(data); 380 unsigned int this_cpu;
381 unsigned long flags;
380 382
383 this_cpu = get_cpu();
381 /* 384 /*
382 * Can deadlock when called with interrupts disabled. 385 * Can deadlock when called with interrupts disabled.
383 * We allow cpu's that are not yet online though, as no one else can 386 * We allow cpu's that are not yet online though, as no one else can
@@ -387,7 +390,15 @@ void __smp_call_function_single(int cpu, struct call_single_data *data,
387 WARN_ON_ONCE(cpu_online(smp_processor_id()) && wait && irqs_disabled() 390 WARN_ON_ONCE(cpu_online(smp_processor_id()) && wait && irqs_disabled()
388 && !oops_in_progress); 391 && !oops_in_progress);
389 392
390 generic_exec_single(cpu, data, wait); 393 if (cpu == this_cpu) {
394 local_irq_save(flags);
395 data->func(data->info);
396 local_irq_restore(flags);
397 } else {
398 csd_lock(data);
399 generic_exec_single(cpu, data, wait);
400 }
401 put_cpu();
391} 402}
392 403
393/** 404/**
diff --git a/kernel/sys.c b/kernel/sys.c
index e9ad44489828..7f5a0cd296a9 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -931,6 +931,7 @@ SYSCALL_DEFINE2(setpgid, pid_t, pid, pid_t, pgid)
931 pgid = pid; 931 pgid = pid;
932 if (pgid < 0) 932 if (pgid < 0)
933 return -EINVAL; 933 return -EINVAL;
934 rcu_read_lock();
934 935
935 /* From this point forward we keep holding onto the tasklist lock 936 /* From this point forward we keep holding onto the tasklist lock
936 * so that our parent does not change from under us. -DaveM 937 * so that our parent does not change from under us. -DaveM
@@ -984,6 +985,7 @@ SYSCALL_DEFINE2(setpgid, pid_t, pid, pid_t, pgid)
984out: 985out:
985 /* All paths lead to here, thus we are safe. -DaveM */ 986 /* All paths lead to here, thus we are safe. -DaveM */
986 write_unlock_irq(&tasklist_lock); 987 write_unlock_irq(&tasklist_lock);
988 rcu_read_unlock();
987 return err; 989 return err;
988} 990}
989 991
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index ca38e8e3e907..3a45c224770f 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1713,10 +1713,7 @@ static __init int sysctl_init(void)
1713{ 1713{
1714 sysctl_set_parent(NULL, root_table); 1714 sysctl_set_parent(NULL, root_table);
1715#ifdef CONFIG_SYSCTL_SYSCALL_CHECK 1715#ifdef CONFIG_SYSCTL_SYSCALL_CHECK
1716 { 1716 sysctl_check_table(current->nsproxy, root_table);
1717 int err;
1718 err = sysctl_check_table(current->nsproxy, root_table);
1719 }
1720#endif 1717#endif
1721 return 0; 1718 return 0;
1722} 1719}
@@ -2488,7 +2485,7 @@ static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int
2488 kbuf[left] = 0; 2485 kbuf[left] = 0;
2489 } 2486 }
2490 2487
2491 for (; left && vleft--; i++, min++, max++, first=0) { 2488 for (; left && vleft--; i++, first = 0) {
2492 unsigned long val; 2489 unsigned long val;
2493 2490
2494 if (write) { 2491 if (write) {
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 0d88ce9b9fb8..fa7ece649fe1 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -381,12 +381,19 @@ static int function_stat_show(struct seq_file *m, void *v)
381{ 381{
382 struct ftrace_profile *rec = v; 382 struct ftrace_profile *rec = v;
383 char str[KSYM_SYMBOL_LEN]; 383 char str[KSYM_SYMBOL_LEN];
384 int ret = 0;
384#ifdef CONFIG_FUNCTION_GRAPH_TRACER 385#ifdef CONFIG_FUNCTION_GRAPH_TRACER
385 static DEFINE_MUTEX(mutex);
386 static struct trace_seq s; 386 static struct trace_seq s;
387 unsigned long long avg; 387 unsigned long long avg;
388 unsigned long long stddev; 388 unsigned long long stddev;
389#endif 389#endif
390 mutex_lock(&ftrace_profile_lock);
391
392 /* we raced with function_profile_reset() */
393 if (unlikely(rec->counter == 0)) {
394 ret = -EBUSY;
395 goto out;
396 }
390 397
391 kallsyms_lookup(rec->ip, NULL, NULL, NULL, str); 398 kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
392 seq_printf(m, " %-30.30s %10lu", str, rec->counter); 399 seq_printf(m, " %-30.30s %10lu", str, rec->counter);
@@ -408,7 +415,6 @@ static int function_stat_show(struct seq_file *m, void *v)
408 do_div(stddev, (rec->counter - 1) * 1000); 415 do_div(stddev, (rec->counter - 1) * 1000);
409 } 416 }
410 417
411 mutex_lock(&mutex);
412 trace_seq_init(&s); 418 trace_seq_init(&s);
413 trace_print_graph_duration(rec->time, &s); 419 trace_print_graph_duration(rec->time, &s);
414 trace_seq_puts(&s, " "); 420 trace_seq_puts(&s, " ");
@@ -416,11 +422,12 @@ static int function_stat_show(struct seq_file *m, void *v)
416 trace_seq_puts(&s, " "); 422 trace_seq_puts(&s, " ");
417 trace_print_graph_duration(stddev, &s); 423 trace_print_graph_duration(stddev, &s);
418 trace_print_seq(m, &s); 424 trace_print_seq(m, &s);
419 mutex_unlock(&mutex);
420#endif 425#endif
421 seq_putc(m, '\n'); 426 seq_putc(m, '\n');
427out:
428 mutex_unlock(&ftrace_profile_lock);
422 429
423 return 0; 430 return ret;
424} 431}
425 432
426static void ftrace_profile_reset(struct ftrace_profile_stat *stat) 433static void ftrace_profile_reset(struct ftrace_profile_stat *stat)
@@ -1503,6 +1510,8 @@ static void *t_start(struct seq_file *m, loff_t *pos)
1503 if (*pos > 0) 1510 if (*pos > 0)
1504 return t_hash_start(m, pos); 1511 return t_hash_start(m, pos);
1505 iter->flags |= FTRACE_ITER_PRINTALL; 1512 iter->flags |= FTRACE_ITER_PRINTALL;
1513 /* reset in case of seek/pread */
1514 iter->flags &= ~FTRACE_ITER_HASH;
1506 return iter; 1515 return iter;
1507 } 1516 }
1508 1517
@@ -2409,7 +2418,7 @@ static const struct file_operations ftrace_filter_fops = {
2409 .open = ftrace_filter_open, 2418 .open = ftrace_filter_open,
2410 .read = seq_read, 2419 .read = seq_read,
2411 .write = ftrace_filter_write, 2420 .write = ftrace_filter_write,
2412 .llseek = ftrace_regex_lseek, 2421 .llseek = no_llseek,
2413 .release = ftrace_filter_release, 2422 .release = ftrace_filter_release,
2414}; 2423};
2415 2424
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 3632ce87674f..492197e2f86c 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -2985,13 +2985,11 @@ static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer)
2985 2985
2986static void rb_advance_iter(struct ring_buffer_iter *iter) 2986static void rb_advance_iter(struct ring_buffer_iter *iter)
2987{ 2987{
2988 struct ring_buffer *buffer;
2989 struct ring_buffer_per_cpu *cpu_buffer; 2988 struct ring_buffer_per_cpu *cpu_buffer;
2990 struct ring_buffer_event *event; 2989 struct ring_buffer_event *event;
2991 unsigned length; 2990 unsigned length;
2992 2991
2993 cpu_buffer = iter->cpu_buffer; 2992 cpu_buffer = iter->cpu_buffer;
2994 buffer = cpu_buffer->buffer;
2995 2993
2996 /* 2994 /*
2997 * Check if we are at the end of the buffer. 2995 * Check if we are at the end of the buffer.
@@ -3846,6 +3844,9 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
3846 rpos = reader->read; 3844 rpos = reader->read;
3847 pos += size; 3845 pos += size;
3848 3846
3847 if (rpos >= commit)
3848 break;
3849
3849 event = rb_reader_event(cpu_buffer); 3850 event = rb_reader_event(cpu_buffer);
3850 size = rb_event_length(event); 3851 size = rb_event_length(event);
3851 } while (len > size); 3852 } while (len > size);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index ba14a22be4cc..9ec59f541156 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -3463,6 +3463,7 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
3463 size_t cnt, loff_t *fpos) 3463 size_t cnt, loff_t *fpos)
3464{ 3464{
3465 char *buf; 3465 char *buf;
3466 size_t written;
3466 3467
3467 if (tracing_disabled) 3468 if (tracing_disabled)
3468 return -EINVAL; 3469 return -EINVAL;
@@ -3484,11 +3485,15 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
3484 } else 3485 } else
3485 buf[cnt] = '\0'; 3486 buf[cnt] = '\0';
3486 3487
3487 cnt = mark_printk("%s", buf); 3488 written = mark_printk("%s", buf);
3488 kfree(buf); 3489 kfree(buf);
3489 *fpos += cnt; 3490 *fpos += written;
3490 3491
3491 return cnt; 3492 /* don't tell userspace we wrote more - it might confuse them */
3493 if (written > cnt)
3494 written = cnt;
3495
3496 return written;
3492} 3497}
3493 3498
3494static int tracing_clock_show(struct seq_file *m, void *v) 3499static int tracing_clock_show(struct seq_file *m, void *v)
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index 000e6e85b445..31cc4cb0dbf2 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -91,6 +91,8 @@ int perf_trace_init(struct perf_event *p_event)
91 tp_event->class && tp_event->class->reg && 91 tp_event->class && tp_event->class->reg &&
92 try_module_get(tp_event->mod)) { 92 try_module_get(tp_event->mod)) {
93 ret = perf_trace_event_init(tp_event, p_event); 93 ret = perf_trace_event_init(tp_event, p_event);
94 if (ret)
95 module_put(tp_event->mod);
94 break; 96 break;
95 } 97 }
96 } 98 }
@@ -146,6 +148,7 @@ void perf_trace_destroy(struct perf_event *p_event)
146 } 148 }
147 } 149 }
148out: 150out:
151 module_put(tp_event->mod);
149 mutex_unlock(&event_mutex); 152 mutex_unlock(&event_mutex);
150} 153}
151 154
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 09b4fa6e4d3b..4c758f146328 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -598,88 +598,165 @@ out:
598 return ret; 598 return ret;
599} 599}
600 600
601static void print_event_fields(struct trace_seq *s, struct list_head *head) 601enum {
602 FORMAT_HEADER = 1,
603 FORMAT_PRINTFMT = 2,
604};
605
606static void *f_next(struct seq_file *m, void *v, loff_t *pos)
602{ 607{
608 struct ftrace_event_call *call = m->private;
603 struct ftrace_event_field *field; 609 struct ftrace_event_field *field;
610 struct list_head *head;
604 611
605 list_for_each_entry_reverse(field, head, link) { 612 (*pos)++;
606 /*
607 * Smartly shows the array type(except dynamic array).
608 * Normal:
609 * field:TYPE VAR
610 * If TYPE := TYPE[LEN], it is shown:
611 * field:TYPE VAR[LEN]
612 */
613 const char *array_descriptor = strchr(field->type, '[');
614 613
615 if (!strncmp(field->type, "__data_loc", 10)) 614 switch ((unsigned long)v) {
616 array_descriptor = NULL; 615 case FORMAT_HEADER:
616 head = &ftrace_common_fields;
617 617
618 if (!array_descriptor) { 618 if (unlikely(list_empty(head)))
619 trace_seq_printf(s, "\tfield:%s %s;\toffset:%u;" 619 return NULL;
620 "\tsize:%u;\tsigned:%d;\n", 620
621 field->type, field->name, field->offset, 621 field = list_entry(head->prev, struct ftrace_event_field, link);
622 field->size, !!field->is_signed); 622 return field;
623 } else { 623
624 trace_seq_printf(s, "\tfield:%.*s %s%s;\toffset:%u;" 624 case FORMAT_PRINTFMT:
625 "\tsize:%u;\tsigned:%d;\n", 625 /* all done */
626 (int)(array_descriptor - field->type), 626 return NULL;
627 field->type, field->name, 627 }
628 array_descriptor, field->offset, 628
629 field->size, !!field->is_signed); 629 head = trace_get_fields(call);
630 } 630
631 /*
632 * To separate common fields from event fields, the
633 * LSB is set on the first event field. Clear it in case.
634 */
635 v = (void *)((unsigned long)v & ~1L);
636
637 field = v;
638 /*
639 * If this is a common field, and at the end of the list, then
640 * continue with main list.
641 */
642 if (field->link.prev == &ftrace_common_fields) {
643 if (unlikely(list_empty(head)))
644 return NULL;
645 field = list_entry(head->prev, struct ftrace_event_field, link);
646 /* Set the LSB to notify f_show to print an extra newline */
647 field = (struct ftrace_event_field *)
648 ((unsigned long)field | 1);
649 return field;
631 } 650 }
651
652 /* If we are done tell f_show to print the format */
653 if (field->link.prev == head)
654 return (void *)FORMAT_PRINTFMT;
655
656 field = list_entry(field->link.prev, struct ftrace_event_field, link);
657
658 return field;
632} 659}
633 660
634static ssize_t 661static void *f_start(struct seq_file *m, loff_t *pos)
635event_format_read(struct file *filp, char __user *ubuf, size_t cnt,
636 loff_t *ppos)
637{ 662{
638 struct ftrace_event_call *call = filp->private_data; 663 loff_t l = 0;
639 struct list_head *head; 664 void *p;
640 struct trace_seq *s;
641 char *buf;
642 int r;
643 665
644 if (*ppos) 666 /* Start by showing the header */
667 if (!*pos)
668 return (void *)FORMAT_HEADER;
669
670 p = (void *)FORMAT_HEADER;
671 do {
672 p = f_next(m, p, &l);
673 } while (p && l < *pos);
674
675 return p;
676}
677
678static int f_show(struct seq_file *m, void *v)
679{
680 struct ftrace_event_call *call = m->private;
681 struct ftrace_event_field *field;
682 const char *array_descriptor;
683
684 switch ((unsigned long)v) {
685 case FORMAT_HEADER:
686 seq_printf(m, "name: %s\n", call->name);
687 seq_printf(m, "ID: %d\n", call->event.type);
688 seq_printf(m, "format:\n");
645 return 0; 689 return 0;
646 690
647 s = kmalloc(sizeof(*s), GFP_KERNEL); 691 case FORMAT_PRINTFMT:
648 if (!s) 692 seq_printf(m, "\nprint fmt: %s\n",
649 return -ENOMEM; 693 call->print_fmt);
694 return 0;
695 }
650 696
651 trace_seq_init(s); 697 /*
698 * To separate common fields from event fields, the
699 * LSB is set on the first event field. Clear it and
700 * print a newline if it is set.
701 */
702 if ((unsigned long)v & 1) {
703 seq_putc(m, '\n');
704 v = (void *)((unsigned long)v & ~1L);
705 }
652 706
653 trace_seq_printf(s, "name: %s\n", call->name); 707 field = v;
654 trace_seq_printf(s, "ID: %d\n", call->event.type);
655 trace_seq_printf(s, "format:\n");
656 708
657 /* print common fields */ 709 /*
658 print_event_fields(s, &ftrace_common_fields); 710 * Smartly shows the array type(except dynamic array).
711 * Normal:
712 * field:TYPE VAR
713 * If TYPE := TYPE[LEN], it is shown:
714 * field:TYPE VAR[LEN]
715 */
716 array_descriptor = strchr(field->type, '[');
659 717
660 trace_seq_putc(s, '\n'); 718 if (!strncmp(field->type, "__data_loc", 10))
719 array_descriptor = NULL;
661 720
662 /* print event specific fields */ 721 if (!array_descriptor)
663 head = trace_get_fields(call); 722 seq_printf(m, "\tfield:%s %s;\toffset:%u;\tsize:%u;\tsigned:%d;\n",
664 print_event_fields(s, head); 723 field->type, field->name, field->offset,
724 field->size, !!field->is_signed);
725 else
726 seq_printf(m, "\tfield:%.*s %s%s;\toffset:%u;\tsize:%u;\tsigned:%d;\n",
727 (int)(array_descriptor - field->type),
728 field->type, field->name,
729 array_descriptor, field->offset,
730 field->size, !!field->is_signed);
665 731
666 r = trace_seq_printf(s, "\nprint fmt: %s\n", call->print_fmt); 732 return 0;
733}
667 734
668 if (!r) { 735static void f_stop(struct seq_file *m, void *p)
669 /* 736{
670 * ug! The format output is bigger than a PAGE!! 737}
671 */
672 buf = "FORMAT TOO BIG\n";
673 r = simple_read_from_buffer(ubuf, cnt, ppos,
674 buf, strlen(buf));
675 goto out;
676 }
677 738
678 r = simple_read_from_buffer(ubuf, cnt, ppos, 739static const struct seq_operations trace_format_seq_ops = {
679 s->buffer, s->len); 740 .start = f_start,
680 out: 741 .next = f_next,
681 kfree(s); 742 .stop = f_stop,
682 return r; 743 .show = f_show,
744};
745
746static int trace_format_open(struct inode *inode, struct file *file)
747{
748 struct ftrace_event_call *call = inode->i_private;
749 struct seq_file *m;
750 int ret;
751
752 ret = seq_open(file, &trace_format_seq_ops);
753 if (ret < 0)
754 return ret;
755
756 m = file->private_data;
757 m->private = call;
758
759 return 0;
683} 760}
684 761
685static ssize_t 762static ssize_t
@@ -877,8 +954,10 @@ static const struct file_operations ftrace_enable_fops = {
877}; 954};
878 955
879static const struct file_operations ftrace_event_format_fops = { 956static const struct file_operations ftrace_event_format_fops = {
880 .open = tracing_open_generic, 957 .open = trace_format_open,
881 .read = event_format_read, 958 .read = seq_read,
959 .llseek = seq_lseek,
960 .release = seq_release,
882}; 961};
883 962
884static const struct file_operations ftrace_event_id_fops = { 963static const struct file_operations ftrace_event_id_fops = {
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 6bff23625781..6f233698518e 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -507,7 +507,15 @@ get_return_for_leaf(struct trace_iterator *iter,
507 * if the output fails. 507 * if the output fails.
508 */ 508 */
509 data->ent = *curr; 509 data->ent = *curr;
510 data->ret = *next; 510 /*
511 * If the next event is not a return type, then
512 * we only care about what type it is. Otherwise we can
513 * safely copy the entire event.
514 */
515 if (next->ent.type == TRACE_GRAPH_RET)
516 data->ret = *next;
517 else
518 data->ret.ent.type = next->ent.type;
511 } 519 }
512 } 520 }
513 521
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 8b27c9849b42..544301d29dee 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -514,8 +514,8 @@ static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs);
514static int kretprobe_dispatcher(struct kretprobe_instance *ri, 514static int kretprobe_dispatcher(struct kretprobe_instance *ri,
515 struct pt_regs *regs); 515 struct pt_regs *regs);
516 516
517/* Check the name is good for event/group */ 517/* Check the name is good for event/group/fields */
518static int check_event_name(const char *name) 518static int is_good_name(const char *name)
519{ 519{
520 if (!isalpha(*name) && *name != '_') 520 if (!isalpha(*name) && *name != '_')
521 return 0; 521 return 0;
@@ -557,7 +557,7 @@ static struct trace_probe *alloc_trace_probe(const char *group,
557 else 557 else
558 tp->rp.kp.pre_handler = kprobe_dispatcher; 558 tp->rp.kp.pre_handler = kprobe_dispatcher;
559 559
560 if (!event || !check_event_name(event)) { 560 if (!event || !is_good_name(event)) {
561 ret = -EINVAL; 561 ret = -EINVAL;
562 goto error; 562 goto error;
563 } 563 }
@@ -567,7 +567,7 @@ static struct trace_probe *alloc_trace_probe(const char *group,
567 if (!tp->call.name) 567 if (!tp->call.name)
568 goto error; 568 goto error;
569 569
570 if (!group || !check_event_name(group)) { 570 if (!group || !is_good_name(group)) {
571 ret = -EINVAL; 571 ret = -EINVAL;
572 goto error; 572 goto error;
573 } 573 }
@@ -883,7 +883,7 @@ static int create_trace_probe(int argc, char **argv)
883 int i, ret = 0; 883 int i, ret = 0;
884 int is_return = 0, is_delete = 0; 884 int is_return = 0, is_delete = 0;
885 char *symbol = NULL, *event = NULL, *group = NULL; 885 char *symbol = NULL, *event = NULL, *group = NULL;
886 char *arg, *tmp; 886 char *arg;
887 unsigned long offset = 0; 887 unsigned long offset = 0;
888 void *addr = NULL; 888 void *addr = NULL;
889 char buf[MAX_EVENT_NAME_LEN]; 889 char buf[MAX_EVENT_NAME_LEN];
@@ -992,26 +992,36 @@ static int create_trace_probe(int argc, char **argv)
992 /* parse arguments */ 992 /* parse arguments */
993 ret = 0; 993 ret = 0;
994 for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) { 994 for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) {
995 /* Increment count for freeing args in error case */
996 tp->nr_args++;
997
995 /* Parse argument name */ 998 /* Parse argument name */
996 arg = strchr(argv[i], '='); 999 arg = strchr(argv[i], '=');
997 if (arg) 1000 if (arg) {
998 *arg++ = '\0'; 1001 *arg++ = '\0';
999 else 1002 tp->args[i].name = kstrdup(argv[i], GFP_KERNEL);
1003 } else {
1000 arg = argv[i]; 1004 arg = argv[i];
1005 /* If argument name is omitted, set "argN" */
1006 snprintf(buf, MAX_EVENT_NAME_LEN, "arg%d", i + 1);
1007 tp->args[i].name = kstrdup(buf, GFP_KERNEL);
1008 }
1001 1009
1002 tp->args[i].name = kstrdup(argv[i], GFP_KERNEL);
1003 if (!tp->args[i].name) { 1010 if (!tp->args[i].name) {
1004 pr_info("Failed to allocate argument%d name '%s'.\n", 1011 pr_info("Failed to allocate argument[%d] name.\n", i);
1005 i, argv[i]);
1006 ret = -ENOMEM; 1012 ret = -ENOMEM;
1007 goto error; 1013 goto error;
1008 } 1014 }
1009 tmp = strchr(tp->args[i].name, ':'); 1015
1010 if (tmp) 1016 if (!is_good_name(tp->args[i].name)) {
1011 *tmp = '_'; /* convert : to _ */ 1017 pr_info("Invalid argument[%d] name: %s\n",
1018 i, tp->args[i].name);
1019 ret = -EINVAL;
1020 goto error;
1021 }
1012 1022
1013 if (conflict_field_name(tp->args[i].name, tp->args, i)) { 1023 if (conflict_field_name(tp->args[i].name, tp->args, i)) {
1014 pr_info("Argument%d name '%s' conflicts with " 1024 pr_info("Argument[%d] name '%s' conflicts with "
1015 "another field.\n", i, argv[i]); 1025 "another field.\n", i, argv[i]);
1016 ret = -EINVAL; 1026 ret = -EINVAL;
1017 goto error; 1027 goto error;
@@ -1020,12 +1030,9 @@ static int create_trace_probe(int argc, char **argv)
1020 /* Parse fetch argument */ 1030 /* Parse fetch argument */
1021 ret = parse_probe_arg(arg, tp, &tp->args[i], is_return); 1031 ret = parse_probe_arg(arg, tp, &tp->args[i], is_return);
1022 if (ret) { 1032 if (ret) {
1023 pr_info("Parse error at argument%d. (%d)\n", i, ret); 1033 pr_info("Parse error at argument[%d]. (%d)\n", i, ret);
1024 kfree(tp->args[i].name);
1025 goto error; 1034 goto error;
1026 } 1035 }
1027
1028 tp->nr_args++;
1029 } 1036 }
1030 1037
1031 ret = register_trace_probe(tp); 1038 ret = register_trace_probe(tp);
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 056468eae7cf..a6b7e0e0f3eb 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -249,7 +249,7 @@ static int trace_lookup_stack(struct seq_file *m, long i)
249{ 249{
250 unsigned long addr = stack_dump_trace[i]; 250 unsigned long addr = stack_dump_trace[i];
251 251
252 return seq_printf(m, "%pF\n", (void *)addr); 252 return seq_printf(m, "%pS\n", (void *)addr);
253} 253}
254 254
255static void print_disabled(struct seq_file *m) 255static void print_disabled(struct seq_file *m)
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 613bc1f04610..7f9c3c52ecc1 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -122,7 +122,7 @@ static void __touch_watchdog(void)
122 122
123void touch_softlockup_watchdog(void) 123void touch_softlockup_watchdog(void)
124{ 124{
125 __get_cpu_var(watchdog_touch_ts) = 0; 125 __raw_get_cpu_var(watchdog_touch_ts) = 0;
126} 126}
127EXPORT_SYMBOL(touch_softlockup_watchdog); 127EXPORT_SYMBOL(touch_softlockup_watchdog);
128 128
@@ -142,7 +142,14 @@ void touch_all_softlockup_watchdogs(void)
142#ifdef CONFIG_HARDLOCKUP_DETECTOR 142#ifdef CONFIG_HARDLOCKUP_DETECTOR
143void touch_nmi_watchdog(void) 143void touch_nmi_watchdog(void)
144{ 144{
145 __get_cpu_var(watchdog_nmi_touch) = true; 145 if (watchdog_enabled) {
146 unsigned cpu;
147
148 for_each_present_cpu(cpu) {
149 if (per_cpu(watchdog_nmi_touch, cpu) != true)
150 per_cpu(watchdog_nmi_touch, cpu) = true;
151 }
152 }
146 touch_softlockup_watchdog(); 153 touch_softlockup_watchdog();
147} 154}
148EXPORT_SYMBOL(touch_nmi_watchdog); 155EXPORT_SYMBOL(touch_nmi_watchdog);
@@ -206,6 +213,9 @@ void watchdog_overflow_callback(struct perf_event *event, int nmi,
206 struct perf_sample_data *data, 213 struct perf_sample_data *data,
207 struct pt_regs *regs) 214 struct pt_regs *regs)
208{ 215{
216 /* Ensure the watchdog never gets throttled */
217 event->hw.interrupts = 0;
218
209 if (__get_cpu_var(watchdog_nmi_touch) == true) { 219 if (__get_cpu_var(watchdog_nmi_touch) == true) {
210 __get_cpu_var(watchdog_nmi_touch) = false; 220 __get_cpu_var(watchdog_nmi_touch) = false;
211 return; 221 return;
@@ -430,6 +440,9 @@ static int watchdog_enable(int cpu)
430 wake_up_process(p); 440 wake_up_process(p);
431 } 441 }
432 442
443 /* if any cpu succeeds, watchdog is considered enabled for the system */
444 watchdog_enabled = 1;
445
433 return 0; 446 return 0;
434} 447}
435 448
@@ -452,9 +465,6 @@ static void watchdog_disable(int cpu)
452 per_cpu(softlockup_watchdog, cpu) = NULL; 465 per_cpu(softlockup_watchdog, cpu) = NULL;
453 kthread_stop(p); 466 kthread_stop(p);
454 } 467 }
455
456 /* if any cpu succeeds, watchdog is considered enabled for the system */
457 watchdog_enabled = 1;
458} 468}
459 469
460static void watchdog_enable_all_cpus(void) 470static void watchdog_enable_all_cpus(void)
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 2994a0e3a61c..f77afd939229 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1,19 +1,26 @@
1/* 1/*
2 * linux/kernel/workqueue.c 2 * kernel/workqueue.c - generic async execution with shared worker pool
3 * 3 *
4 * Generic mechanism for defining kernel helper threads for running 4 * Copyright (C) 2002 Ingo Molnar
5 * arbitrary tasks in process context.
6 * 5 *
7 * Started by Ingo Molnar, Copyright (C) 2002 6 * Derived from the taskqueue/keventd code by:
7 * David Woodhouse <dwmw2@infradead.org>
8 * Andrew Morton
9 * Kai Petzke <wpp@marie.physik.tu-berlin.de>
10 * Theodore Ts'o <tytso@mit.edu>
8 * 11 *
9 * Derived from the taskqueue/keventd code by: 12 * Made to use alloc_percpu by Christoph Lameter.
10 * 13 *
11 * David Woodhouse <dwmw2@infradead.org> 14 * Copyright (C) 2010 SUSE Linux Products GmbH
12 * Andrew Morton 15 * Copyright (C) 2010 Tejun Heo <tj@kernel.org>
13 * Kai Petzke <wpp@marie.physik.tu-berlin.de>
14 * Theodore Ts'o <tytso@mit.edu>
15 * 16 *
16 * Made to use alloc_percpu by Christoph Lameter. 17 * This is the generic async execution mechanism. Work items as are
18 * executed in process context. The worker pool is shared and
19 * automatically managed. There is one worker pool for each CPU and
20 * one extra for works which are better served by workers which are
21 * not bound to any specific CPU.
22 *
23 * Please read Documentation/workqueue.txt for details.
17 */ 24 */
18 25
19#include <linux/module.h> 26#include <linux/module.h>
@@ -35,6 +42,9 @@
35#include <linux/lockdep.h> 42#include <linux/lockdep.h>
36#include <linux/idr.h> 43#include <linux/idr.h>
37 44
45#define CREATE_TRACE_POINTS
46#include <trace/events/workqueue.h>
47
38#include "workqueue_sched.h" 48#include "workqueue_sched.h"
39 49
40enum { 50enum {
@@ -87,7 +97,8 @@ enum {
87/* 97/*
88 * Structure fields follow one of the following exclusion rules. 98 * Structure fields follow one of the following exclusion rules.
89 * 99 *
90 * I: Set during initialization and read-only afterwards. 100 * I: Modifiable by initialization/destruction paths and read-only for
101 * everyone else.
91 * 102 *
92 * P: Preemption protected. Disabling preemption is enough and should 103 * P: Preemption protected. Disabling preemption is enough and should
93 * only be modified and accessed from the local cpu. 104 * only be modified and accessed from the local cpu.
@@ -195,7 +206,7 @@ typedef cpumask_var_t mayday_mask_t;
195 cpumask_test_and_set_cpu((cpu), (mask)) 206 cpumask_test_and_set_cpu((cpu), (mask))
196#define mayday_clear_cpu(cpu, mask) cpumask_clear_cpu((cpu), (mask)) 207#define mayday_clear_cpu(cpu, mask) cpumask_clear_cpu((cpu), (mask))
197#define for_each_mayday_cpu(cpu, mask) for_each_cpu((cpu), (mask)) 208#define for_each_mayday_cpu(cpu, mask) for_each_cpu((cpu), (mask))
198#define alloc_mayday_mask(maskp, gfp) alloc_cpumask_var((maskp), (gfp)) 209#define alloc_mayday_mask(maskp, gfp) zalloc_cpumask_var((maskp), (gfp))
199#define free_mayday_mask(mask) free_cpumask_var((mask)) 210#define free_mayday_mask(mask) free_cpumask_var((mask))
200#else 211#else
201typedef unsigned long mayday_mask_t; 212typedef unsigned long mayday_mask_t;
@@ -940,10 +951,14 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
940 struct global_cwq *gcwq; 951 struct global_cwq *gcwq;
941 struct cpu_workqueue_struct *cwq; 952 struct cpu_workqueue_struct *cwq;
942 struct list_head *worklist; 953 struct list_head *worklist;
954 unsigned int work_flags;
943 unsigned long flags; 955 unsigned long flags;
944 956
945 debug_work_activate(work); 957 debug_work_activate(work);
946 958
959 if (WARN_ON_ONCE(wq->flags & WQ_DYING))
960 return;
961
947 /* determine gcwq to use */ 962 /* determine gcwq to use */
948 if (!(wq->flags & WQ_UNBOUND)) { 963 if (!(wq->flags & WQ_UNBOUND)) {
949 struct global_cwq *last_gcwq; 964 struct global_cwq *last_gcwq;
@@ -986,14 +1001,17 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
986 BUG_ON(!list_empty(&work->entry)); 1001 BUG_ON(!list_empty(&work->entry));
987 1002
988 cwq->nr_in_flight[cwq->work_color]++; 1003 cwq->nr_in_flight[cwq->work_color]++;
1004 work_flags = work_color_to_flags(cwq->work_color);
989 1005
990 if (likely(cwq->nr_active < cwq->max_active)) { 1006 if (likely(cwq->nr_active < cwq->max_active)) {
991 cwq->nr_active++; 1007 cwq->nr_active++;
992 worklist = gcwq_determine_ins_pos(gcwq, cwq); 1008 worklist = gcwq_determine_ins_pos(gcwq, cwq);
993 } else 1009 } else {
1010 work_flags |= WORK_STRUCT_DELAYED;
994 worklist = &cwq->delayed_works; 1011 worklist = &cwq->delayed_works;
1012 }
995 1013
996 insert_work(cwq, work, worklist, work_color_to_flags(cwq->work_color)); 1014 insert_work(cwq, work, worklist, work_flags);
997 1015
998 spin_unlock_irqrestore(&gcwq->lock, flags); 1016 spin_unlock_irqrestore(&gcwq->lock, flags);
999} 1017}
@@ -1212,6 +1230,7 @@ static void worker_leave_idle(struct worker *worker)
1212 * bound), %false if offline. 1230 * bound), %false if offline.
1213 */ 1231 */
1214static bool worker_maybe_bind_and_lock(struct worker *worker) 1232static bool worker_maybe_bind_and_lock(struct worker *worker)
1233__acquires(&gcwq->lock)
1215{ 1234{
1216 struct global_cwq *gcwq = worker->gcwq; 1235 struct global_cwq *gcwq = worker->gcwq;
1217 struct task_struct *task = worker->task; 1236 struct task_struct *task = worker->task;
@@ -1485,6 +1504,8 @@ static void gcwq_mayday_timeout(unsigned long __gcwq)
1485 * otherwise. 1504 * otherwise.
1486 */ 1505 */
1487static bool maybe_create_worker(struct global_cwq *gcwq) 1506static bool maybe_create_worker(struct global_cwq *gcwq)
1507__releases(&gcwq->lock)
1508__acquires(&gcwq->lock)
1488{ 1509{
1489 if (!need_to_create_worker(gcwq)) 1510 if (!need_to_create_worker(gcwq))
1490 return false; 1511 return false;
@@ -1659,6 +1680,7 @@ static void cwq_activate_first_delayed(struct cpu_workqueue_struct *cwq)
1659 struct list_head *pos = gcwq_determine_ins_pos(cwq->gcwq, cwq); 1680 struct list_head *pos = gcwq_determine_ins_pos(cwq->gcwq, cwq);
1660 1681
1661 move_linked_works(work, pos, NULL); 1682 move_linked_works(work, pos, NULL);
1683 __clear_bit(WORK_STRUCT_DELAYED_BIT, work_data_bits(work));
1662 cwq->nr_active++; 1684 cwq->nr_active++;
1663} 1685}
1664 1686
@@ -1666,6 +1688,7 @@ static void cwq_activate_first_delayed(struct cpu_workqueue_struct *cwq)
1666 * cwq_dec_nr_in_flight - decrement cwq's nr_in_flight 1688 * cwq_dec_nr_in_flight - decrement cwq's nr_in_flight
1667 * @cwq: cwq of interest 1689 * @cwq: cwq of interest
1668 * @color: color of work which left the queue 1690 * @color: color of work which left the queue
1691 * @delayed: for a delayed work
1669 * 1692 *
1670 * A work either has completed or is removed from pending queue, 1693 * A work either has completed or is removed from pending queue,
1671 * decrement nr_in_flight of its cwq and handle workqueue flushing. 1694 * decrement nr_in_flight of its cwq and handle workqueue flushing.
@@ -1673,19 +1696,22 @@ static void cwq_activate_first_delayed(struct cpu_workqueue_struct *cwq)
1673 * CONTEXT: 1696 * CONTEXT:
1674 * spin_lock_irq(gcwq->lock). 1697 * spin_lock_irq(gcwq->lock).
1675 */ 1698 */
1676static void cwq_dec_nr_in_flight(struct cpu_workqueue_struct *cwq, int color) 1699static void cwq_dec_nr_in_flight(struct cpu_workqueue_struct *cwq, int color,
1700 bool delayed)
1677{ 1701{
1678 /* ignore uncolored works */ 1702 /* ignore uncolored works */
1679 if (color == WORK_NO_COLOR) 1703 if (color == WORK_NO_COLOR)
1680 return; 1704 return;
1681 1705
1682 cwq->nr_in_flight[color]--; 1706 cwq->nr_in_flight[color]--;
1683 cwq->nr_active--;
1684 1707
1685 if (!list_empty(&cwq->delayed_works)) { 1708 if (!delayed) {
1686 /* one down, submit a delayed one */ 1709 cwq->nr_active--;
1687 if (cwq->nr_active < cwq->max_active) 1710 if (!list_empty(&cwq->delayed_works)) {
1688 cwq_activate_first_delayed(cwq); 1711 /* one down, submit a delayed one */
1712 if (cwq->nr_active < cwq->max_active)
1713 cwq_activate_first_delayed(cwq);
1714 }
1689 } 1715 }
1690 1716
1691 /* is flush in progress and are we at the flushing tip? */ 1717 /* is flush in progress and are we at the flushing tip? */
@@ -1722,6 +1748,8 @@ static void cwq_dec_nr_in_flight(struct cpu_workqueue_struct *cwq, int color)
1722 * spin_lock_irq(gcwq->lock) which is released and regrabbed. 1748 * spin_lock_irq(gcwq->lock) which is released and regrabbed.
1723 */ 1749 */
1724static void process_one_work(struct worker *worker, struct work_struct *work) 1750static void process_one_work(struct worker *worker, struct work_struct *work)
1751__releases(&gcwq->lock)
1752__acquires(&gcwq->lock)
1725{ 1753{
1726 struct cpu_workqueue_struct *cwq = get_work_cwq(work); 1754 struct cpu_workqueue_struct *cwq = get_work_cwq(work);
1727 struct global_cwq *gcwq = cwq->gcwq; 1755 struct global_cwq *gcwq = cwq->gcwq;
@@ -1790,7 +1818,13 @@ static void process_one_work(struct worker *worker, struct work_struct *work)
1790 work_clear_pending(work); 1818 work_clear_pending(work);
1791 lock_map_acquire(&cwq->wq->lockdep_map); 1819 lock_map_acquire(&cwq->wq->lockdep_map);
1792 lock_map_acquire(&lockdep_map); 1820 lock_map_acquire(&lockdep_map);
1821 trace_workqueue_execute_start(work);
1793 f(work); 1822 f(work);
1823 /*
1824 * While we must be careful to not use "work" after this, the trace
1825 * point will only record its address.
1826 */
1827 trace_workqueue_execute_end(work);
1794 lock_map_release(&lockdep_map); 1828 lock_map_release(&lockdep_map);
1795 lock_map_release(&cwq->wq->lockdep_map); 1829 lock_map_release(&cwq->wq->lockdep_map);
1796 1830
@@ -1814,7 +1848,7 @@ static void process_one_work(struct worker *worker, struct work_struct *work)
1814 hlist_del_init(&worker->hentry); 1848 hlist_del_init(&worker->hentry);
1815 worker->current_work = NULL; 1849 worker->current_work = NULL;
1816 worker->current_cwq = NULL; 1850 worker->current_cwq = NULL;
1817 cwq_dec_nr_in_flight(cwq, work_color); 1851 cwq_dec_nr_in_flight(cwq, work_color, false);
1818} 1852}
1819 1853
1820/** 1854/**
@@ -2379,7 +2413,8 @@ static int try_to_grab_pending(struct work_struct *work)
2379 debug_work_deactivate(work); 2413 debug_work_deactivate(work);
2380 list_del_init(&work->entry); 2414 list_del_init(&work->entry);
2381 cwq_dec_nr_in_flight(get_work_cwq(work), 2415 cwq_dec_nr_in_flight(get_work_cwq(work),
2382 get_work_color(work)); 2416 get_work_color(work),
2417 *work_data_bits(work) & WORK_STRUCT_DELAYED);
2383 ret = 1; 2418 ret = 1;
2384 } 2419 }
2385 } 2420 }
@@ -2782,7 +2817,6 @@ struct workqueue_struct *__alloc_workqueue_key(const char *name,
2782 if (IS_ERR(rescuer->task)) 2817 if (IS_ERR(rescuer->task))
2783 goto err; 2818 goto err;
2784 2819
2785 wq->rescuer = rescuer;
2786 rescuer->task->flags |= PF_THREAD_BOUND; 2820 rescuer->task->flags |= PF_THREAD_BOUND;
2787 wake_up_process(rescuer->task); 2821 wake_up_process(rescuer->task);
2788 } 2822 }
@@ -2824,6 +2858,7 @@ void destroy_workqueue(struct workqueue_struct *wq)
2824{ 2858{
2825 unsigned int cpu; 2859 unsigned int cpu;
2826 2860
2861 wq->flags |= WQ_DYING;
2827 flush_workqueue(wq); 2862 flush_workqueue(wq);
2828 2863
2829 /* 2864 /*
@@ -2848,6 +2883,7 @@ void destroy_workqueue(struct workqueue_struct *wq)
2848 if (wq->flags & WQ_RESCUER) { 2883 if (wq->flags & WQ_RESCUER) {
2849 kthread_stop(wq->rescuer->task); 2884 kthread_stop(wq->rescuer->task);
2850 free_mayday_mask(wq->mayday_mask); 2885 free_mayday_mask(wq->mayday_mask);
2886 kfree(wq->rescuer);
2851 } 2887 }
2852 2888
2853 free_cwqs(wq); 2889 free_cwqs(wq);
@@ -3230,6 +3266,8 @@ static int __cpuinit trustee_thread(void *__gcwq)
3230 * multiple times. To be used by cpu_callback. 3266 * multiple times. To be used by cpu_callback.
3231 */ 3267 */
3232static void __cpuinit wait_trustee_state(struct global_cwq *gcwq, int state) 3268static void __cpuinit wait_trustee_state(struct global_cwq *gcwq, int state)
3269__releases(&gcwq->lock)
3270__acquires(&gcwq->lock)
3233{ 3271{
3234 if (!(gcwq->trustee_state == state || 3272 if (!(gcwq->trustee_state == state ||
3235 gcwq->trustee_state == TRUSTEE_DONE)) { 3273 gcwq->trustee_state == TRUSTEE_DONE)) {
@@ -3536,8 +3574,7 @@ static int __init init_workqueues(void)
3536 spin_lock_init(&gcwq->lock); 3574 spin_lock_init(&gcwq->lock);
3537 INIT_LIST_HEAD(&gcwq->worklist); 3575 INIT_LIST_HEAD(&gcwq->worklist);
3538 gcwq->cpu = cpu; 3576 gcwq->cpu = cpu;
3539 if (cpu == WORK_CPU_UNBOUND) 3577 gcwq->flags |= GCWQ_DISASSOCIATED;
3540 gcwq->flags |= GCWQ_DISASSOCIATED;
3541 3578
3542 INIT_LIST_HEAD(&gcwq->idle_list); 3579 INIT_LIST_HEAD(&gcwq->idle_list);
3543 for (i = 0; i < BUSY_WORKER_HASH_SIZE; i++) 3580 for (i = 0; i < BUSY_WORKER_HASH_SIZE; i++)
@@ -3561,6 +3598,8 @@ static int __init init_workqueues(void)
3561 struct global_cwq *gcwq = get_gcwq(cpu); 3598 struct global_cwq *gcwq = get_gcwq(cpu);
3562 struct worker *worker; 3599 struct worker *worker;
3563 3600
3601 if (cpu != WORK_CPU_UNBOUND)
3602 gcwq->flags &= ~GCWQ_DISASSOCIATED;
3564 worker = create_worker(gcwq, true); 3603 worker = create_worker(gcwq, true);
3565 BUG_ON(!worker); 3604 BUG_ON(!worker);
3566 spin_lock_irq(&gcwq->lock); 3605 spin_lock_irq(&gcwq->lock);