aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/audit.c2
-rw-r--r--kernel/audit_tree.c1
-rw-r--r--kernel/cgroup.c31
-rw-r--r--kernel/cpu.c55
-rw-r--r--kernel/exit.c2
-rw-r--r--kernel/irq/irqdomain.c9
-rw-r--r--kernel/printk.c91
-rw-r--r--kernel/range.c8
-rw-r--r--kernel/rcutree.c21
-rw-r--r--kernel/rcutree.h2
-rw-r--r--kernel/softirq.c13
-rw-r--r--kernel/sys.c29
-rw-r--r--kernel/time/ntp.c1
-rw-r--r--kernel/time/tick-broadcast.c8
-rw-r--r--kernel/time/timekeeping.c8
-rw-r--r--kernel/trace/ftrace.c18
-rw-r--r--kernel/trace/ring_buffer.c3
-rw-r--r--kernel/trace/trace.c27
-rw-r--r--kernel/trace/trace.h2
-rw-r--r--kernel/trace/trace_selftest.c2
20 files changed, 206 insertions, 127 deletions
diff --git a/kernel/audit.c b/kernel/audit.c
index 21c7fa615bd3..91e53d04b6a9 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -1056,7 +1056,7 @@ static inline void audit_get_stamp(struct audit_context *ctx,
1056static void wait_for_auditd(unsigned long sleep_time) 1056static void wait_for_auditd(unsigned long sleep_time)
1057{ 1057{
1058 DECLARE_WAITQUEUE(wait, current); 1058 DECLARE_WAITQUEUE(wait, current);
1059 set_current_state(TASK_INTERRUPTIBLE); 1059 set_current_state(TASK_UNINTERRUPTIBLE);
1060 add_wait_queue(&audit_backlog_wait, &wait); 1060 add_wait_queue(&audit_backlog_wait, &wait);
1061 1061
1062 if (audit_backlog_limit && 1062 if (audit_backlog_limit &&
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index a291aa23fb3f..43c307dc9453 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -658,6 +658,7 @@ int audit_add_tree_rule(struct audit_krule *rule)
658 struct vfsmount *mnt; 658 struct vfsmount *mnt;
659 int err; 659 int err;
660 660
661 rule->tree = NULL;
661 list_for_each_entry(tree, &tree_list, list) { 662 list_for_each_entry(tree, &tree_list, list) {
662 if (!strcmp(seed->pathname, tree->pathname)) { 663 if (!strcmp(seed->pathname, tree->pathname)) {
663 put_tree(seed); 664 put_tree(seed);
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 2a9926275f80..a7c9e6ddb979 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1686,11 +1686,14 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
1686 */ 1686 */
1687 cgroup_drop_root(opts.new_root); 1687 cgroup_drop_root(opts.new_root);
1688 1688
1689 if (((root->flags | opts.flags) & CGRP_ROOT_SANE_BEHAVIOR) && 1689 if (root->flags != opts.flags) {
1690 root->flags != opts.flags) { 1690 if ((root->flags | opts.flags) & CGRP_ROOT_SANE_BEHAVIOR) {
1691 pr_err("cgroup: sane_behavior: new mount options should match the existing superblock\n"); 1691 pr_err("cgroup: sane_behavior: new mount options should match the existing superblock\n");
1692 ret = -EINVAL; 1692 ret = -EINVAL;
1693 goto drop_new_super; 1693 goto drop_new_super;
1694 } else {
1695 pr_warning("cgroup: new mount options do not match the existing superblock, will be ignored\n");
1696 }
1694 } 1697 }
1695 1698
1696 /* no subsys rebinding, so refcounts don't change */ 1699 /* no subsys rebinding, so refcounts don't change */
@@ -2699,13 +2702,14 @@ static int cgroup_add_file(struct cgroup *cgrp, struct cgroup_subsys *subsys,
2699 goto out; 2702 goto out;
2700 } 2703 }
2701 2704
2705 cfe->type = (void *)cft;
2706 cfe->dentry = dentry;
2707 dentry->d_fsdata = cfe;
2708 simple_xattrs_init(&cfe->xattrs);
2709
2702 mode = cgroup_file_mode(cft); 2710 mode = cgroup_file_mode(cft);
2703 error = cgroup_create_file(dentry, mode | S_IFREG, cgrp->root->sb); 2711 error = cgroup_create_file(dentry, mode | S_IFREG, cgrp->root->sb);
2704 if (!error) { 2712 if (!error) {
2705 cfe->type = (void *)cft;
2706 cfe->dentry = dentry;
2707 dentry->d_fsdata = cfe;
2708 simple_xattrs_init(&cfe->xattrs);
2709 list_add_tail(&cfe->node, &parent->files); 2713 list_add_tail(&cfe->node, &parent->files);
2710 cfe = NULL; 2714 cfe = NULL;
2711 } 2715 }
@@ -2953,11 +2957,8 @@ struct cgroup *cgroup_next_descendant_pre(struct cgroup *pos,
2953 WARN_ON_ONCE(!rcu_read_lock_held()); 2957 WARN_ON_ONCE(!rcu_read_lock_held());
2954 2958
2955 /* if first iteration, pretend we just visited @cgroup */ 2959 /* if first iteration, pretend we just visited @cgroup */
2956 if (!pos) { 2960 if (!pos)
2957 if (list_empty(&cgroup->children))
2958 return NULL;
2959 pos = cgroup; 2961 pos = cgroup;
2960 }
2961 2962
2962 /* visit the first child if exists */ 2963 /* visit the first child if exists */
2963 next = list_first_or_null_rcu(&pos->children, struct cgroup, sibling); 2964 next = list_first_or_null_rcu(&pos->children, struct cgroup, sibling);
@@ -2965,14 +2966,14 @@ struct cgroup *cgroup_next_descendant_pre(struct cgroup *pos,
2965 return next; 2966 return next;
2966 2967
2967 /* no child, visit my or the closest ancestor's next sibling */ 2968 /* no child, visit my or the closest ancestor's next sibling */
2968 do { 2969 while (pos != cgroup) {
2969 next = list_entry_rcu(pos->sibling.next, struct cgroup, 2970 next = list_entry_rcu(pos->sibling.next, struct cgroup,
2970 sibling); 2971 sibling);
2971 if (&next->sibling != &pos->parent->children) 2972 if (&next->sibling != &pos->parent->children)
2972 return next; 2973 return next;
2973 2974
2974 pos = pos->parent; 2975 pos = pos->parent;
2975 } while (pos != cgroup); 2976 }
2976 2977
2977 return NULL; 2978 return NULL;
2978} 2979}
diff --git a/kernel/cpu.c b/kernel/cpu.c
index b5e4ab2d427e..198a38883e64 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -133,6 +133,27 @@ static void cpu_hotplug_done(void)
133 mutex_unlock(&cpu_hotplug.lock); 133 mutex_unlock(&cpu_hotplug.lock);
134} 134}
135 135
136/*
137 * Wait for currently running CPU hotplug operations to complete (if any) and
138 * disable future CPU hotplug (from sysfs). The 'cpu_add_remove_lock' protects
139 * the 'cpu_hotplug_disabled' flag. The same lock is also acquired by the
140 * hotplug path before performing hotplug operations. So acquiring that lock
141 * guarantees mutual exclusion from any currently running hotplug operations.
142 */
143void cpu_hotplug_disable(void)
144{
145 cpu_maps_update_begin();
146 cpu_hotplug_disabled = 1;
147 cpu_maps_update_done();
148}
149
150void cpu_hotplug_enable(void)
151{
152 cpu_maps_update_begin();
153 cpu_hotplug_disabled = 0;
154 cpu_maps_update_done();
155}
156
136#else /* #if CONFIG_HOTPLUG_CPU */ 157#else /* #if CONFIG_HOTPLUG_CPU */
137static void cpu_hotplug_begin(void) {} 158static void cpu_hotplug_begin(void) {}
138static void cpu_hotplug_done(void) {} 159static void cpu_hotplug_done(void) {}
@@ -541,36 +562,6 @@ static int __init alloc_frozen_cpus(void)
541core_initcall(alloc_frozen_cpus); 562core_initcall(alloc_frozen_cpus);
542 563
543/* 564/*
544 * Prevent regular CPU hotplug from racing with the freezer, by disabling CPU
545 * hotplug when tasks are about to be frozen. Also, don't allow the freezer
546 * to continue until any currently running CPU hotplug operation gets
547 * completed.
548 * To modify the 'cpu_hotplug_disabled' flag, we need to acquire the
549 * 'cpu_add_remove_lock'. And this same lock is also taken by the regular
550 * CPU hotplug path and released only after it is complete. Thus, we
551 * (and hence the freezer) will block here until any currently running CPU
552 * hotplug operation gets completed.
553 */
554void cpu_hotplug_disable_before_freeze(void)
555{
556 cpu_maps_update_begin();
557 cpu_hotplug_disabled = 1;
558 cpu_maps_update_done();
559}
560
561
562/*
563 * When tasks have been thawed, re-enable regular CPU hotplug (which had been
564 * disabled while beginning to freeze tasks).
565 */
566void cpu_hotplug_enable_after_thaw(void)
567{
568 cpu_maps_update_begin();
569 cpu_hotplug_disabled = 0;
570 cpu_maps_update_done();
571}
572
573/*
574 * When callbacks for CPU hotplug notifications are being executed, we must 565 * When callbacks for CPU hotplug notifications are being executed, we must
575 * ensure that the state of the system with respect to the tasks being frozen 566 * ensure that the state of the system with respect to the tasks being frozen
576 * or not, as reported by the notification, remains unchanged *throughout the 567 * or not, as reported by the notification, remains unchanged *throughout the
@@ -589,12 +580,12 @@ cpu_hotplug_pm_callback(struct notifier_block *nb,
589 580
590 case PM_SUSPEND_PREPARE: 581 case PM_SUSPEND_PREPARE:
591 case PM_HIBERNATION_PREPARE: 582 case PM_HIBERNATION_PREPARE:
592 cpu_hotplug_disable_before_freeze(); 583 cpu_hotplug_disable();
593 break; 584 break;
594 585
595 case PM_POST_SUSPEND: 586 case PM_POST_SUSPEND:
596 case PM_POST_HIBERNATION: 587 case PM_POST_HIBERNATION:
597 cpu_hotplug_enable_after_thaw(); 588 cpu_hotplug_enable();
598 break; 589 break;
599 590
600 default: 591 default:
diff --git a/kernel/exit.c b/kernel/exit.c
index af2eb3cbd499..7bb73f9d09db 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -649,7 +649,6 @@ static void exit_notify(struct task_struct *tsk, int group_dead)
649 * jobs, send them a SIGHUP and then a SIGCONT. (POSIX 3.2.2.2) 649 * jobs, send them a SIGHUP and then a SIGCONT. (POSIX 3.2.2.2)
650 */ 650 */
651 forget_original_parent(tsk); 651 forget_original_parent(tsk);
652 exit_task_namespaces(tsk);
653 652
654 write_lock_irq(&tasklist_lock); 653 write_lock_irq(&tasklist_lock);
655 if (group_dead) 654 if (group_dead)
@@ -795,6 +794,7 @@ void do_exit(long code)
795 exit_shm(tsk); 794 exit_shm(tsk);
796 exit_files(tsk); 795 exit_files(tsk);
797 exit_fs(tsk); 796 exit_fs(tsk);
797 exit_task_namespaces(tsk);
798 exit_task_work(tsk); 798 exit_task_work(tsk);
799 check_stack_usage(); 799 check_stack_usage();
800 exit_thread(); 800 exit_thread();
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 5a83dde8ca0c..54a4d5223238 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -143,7 +143,10 @@ static unsigned int irq_domain_legacy_revmap(struct irq_domain *domain,
143 * irq_domain_add_simple() - Allocate and register a simple irq_domain. 143 * irq_domain_add_simple() - Allocate and register a simple irq_domain.
144 * @of_node: pointer to interrupt controller's device tree node. 144 * @of_node: pointer to interrupt controller's device tree node.
145 * @size: total number of irqs in mapping 145 * @size: total number of irqs in mapping
146 * @first_irq: first number of irq block assigned to the domain 146 * @first_irq: first number of irq block assigned to the domain,
147 * pass zero to assign irqs on-the-fly. This will result in a
148 * linear IRQ domain so it is important to use irq_create_mapping()
149 * for each used IRQ, especially when SPARSE_IRQ is enabled.
147 * @ops: map/unmap domain callbacks 150 * @ops: map/unmap domain callbacks
148 * @host_data: Controller private data pointer 151 * @host_data: Controller private data pointer
149 * 152 *
@@ -191,6 +194,7 @@ struct irq_domain *irq_domain_add_simple(struct device_node *of_node,
191 /* A linear domain is the default */ 194 /* A linear domain is the default */
192 return irq_domain_add_linear(of_node, size, ops, host_data); 195 return irq_domain_add_linear(of_node, size, ops, host_data);
193} 196}
197EXPORT_SYMBOL_GPL(irq_domain_add_simple);
194 198
195/** 199/**
196 * irq_domain_add_legacy() - Allocate and register a legacy revmap irq_domain. 200 * irq_domain_add_legacy() - Allocate and register a legacy revmap irq_domain.
@@ -397,11 +401,12 @@ static void irq_domain_disassociate_many(struct irq_domain *domain,
397 while (count--) { 401 while (count--) {
398 int irq = irq_base + count; 402 int irq = irq_base + count;
399 struct irq_data *irq_data = irq_get_irq_data(irq); 403 struct irq_data *irq_data = irq_get_irq_data(irq);
400 irq_hw_number_t hwirq = irq_data->hwirq; 404 irq_hw_number_t hwirq;
401 405
402 if (WARN_ON(!irq_data || irq_data->domain != domain)) 406 if (WARN_ON(!irq_data || irq_data->domain != domain))
403 continue; 407 continue;
404 408
409 hwirq = irq_data->hwirq;
405 irq_set_status_flags(irq, IRQ_NOREQUEST); 410 irq_set_status_flags(irq, IRQ_NOREQUEST);
406 411
407 /* remove chip and handler */ 412 /* remove chip and handler */
diff --git a/kernel/printk.c b/kernel/printk.c
index fa36e1494420..8212c1aef125 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -363,6 +363,53 @@ static void log_store(int facility, int level,
363 log_next_seq++; 363 log_next_seq++;
364} 364}
365 365
366#ifdef CONFIG_SECURITY_DMESG_RESTRICT
367int dmesg_restrict = 1;
368#else
369int dmesg_restrict;
370#endif
371
372static int syslog_action_restricted(int type)
373{
374 if (dmesg_restrict)
375 return 1;
376 /*
377 * Unless restricted, we allow "read all" and "get buffer size"
378 * for everybody.
379 */
380 return type != SYSLOG_ACTION_READ_ALL &&
381 type != SYSLOG_ACTION_SIZE_BUFFER;
382}
383
384static int check_syslog_permissions(int type, bool from_file)
385{
386 /*
387 * If this is from /proc/kmsg and we've already opened it, then we've
388 * already done the capabilities checks at open time.
389 */
390 if (from_file && type != SYSLOG_ACTION_OPEN)
391 return 0;
392
393 if (syslog_action_restricted(type)) {
394 if (capable(CAP_SYSLOG))
395 return 0;
396 /*
397 * For historical reasons, accept CAP_SYS_ADMIN too, with
398 * a warning.
399 */
400 if (capable(CAP_SYS_ADMIN)) {
401 pr_warn_once("%s (%d): Attempt to access syslog with "
402 "CAP_SYS_ADMIN but no CAP_SYSLOG "
403 "(deprecated).\n",
404 current->comm, task_pid_nr(current));
405 return 0;
406 }
407 return -EPERM;
408 }
409 return security_syslog(type);
410}
411
412
366/* /dev/kmsg - userspace message inject/listen interface */ 413/* /dev/kmsg - userspace message inject/listen interface */
367struct devkmsg_user { 414struct devkmsg_user {
368 u64 seq; 415 u64 seq;
@@ -620,7 +667,8 @@ static int devkmsg_open(struct inode *inode, struct file *file)
620 if ((file->f_flags & O_ACCMODE) == O_WRONLY) 667 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
621 return 0; 668 return 0;
622 669
623 err = security_syslog(SYSLOG_ACTION_READ_ALL); 670 err = check_syslog_permissions(SYSLOG_ACTION_READ_ALL,
671 SYSLOG_FROM_READER);
624 if (err) 672 if (err)
625 return err; 673 return err;
626 674
@@ -813,45 +861,6 @@ static inline void boot_delay_msec(int level)
813} 861}
814#endif 862#endif
815 863
816#ifdef CONFIG_SECURITY_DMESG_RESTRICT
817int dmesg_restrict = 1;
818#else
819int dmesg_restrict;
820#endif
821
822static int syslog_action_restricted(int type)
823{
824 if (dmesg_restrict)
825 return 1;
826 /* Unless restricted, we allow "read all" and "get buffer size" for everybody */
827 return type != SYSLOG_ACTION_READ_ALL && type != SYSLOG_ACTION_SIZE_BUFFER;
828}
829
830static int check_syslog_permissions(int type, bool from_file)
831{
832 /*
833 * If this is from /proc/kmsg and we've already opened it, then we've
834 * already done the capabilities checks at open time.
835 */
836 if (from_file && type != SYSLOG_ACTION_OPEN)
837 return 0;
838
839 if (syslog_action_restricted(type)) {
840 if (capable(CAP_SYSLOG))
841 return 0;
842 /* For historical reasons, accept CAP_SYS_ADMIN too, with a warning */
843 if (capable(CAP_SYS_ADMIN)) {
844 printk_once(KERN_WARNING "%s (%d): "
845 "Attempt to access syslog with CAP_SYS_ADMIN "
846 "but no CAP_SYSLOG (deprecated).\n",
847 current->comm, task_pid_nr(current));
848 return 0;
849 }
850 return -EPERM;
851 }
852 return 0;
853}
854
855#if defined(CONFIG_PRINTK_TIME) 864#if defined(CONFIG_PRINTK_TIME)
856static bool printk_time = 1; 865static bool printk_time = 1;
857#else 866#else
@@ -1249,7 +1258,7 @@ out:
1249 1258
1250SYSCALL_DEFINE3(syslog, int, type, char __user *, buf, int, len) 1259SYSCALL_DEFINE3(syslog, int, type, char __user *, buf, int, len)
1251{ 1260{
1252 return do_syslog(type, buf, len, SYSLOG_FROM_CALL); 1261 return do_syslog(type, buf, len, SYSLOG_FROM_READER);
1253} 1262}
1254 1263
1255/* 1264/*
diff --git a/kernel/range.c b/kernel/range.c
index 071b0ab455cb..eb911dbce267 100644
--- a/kernel/range.c
+++ b/kernel/range.c
@@ -48,9 +48,11 @@ int add_range_with_merge(struct range *range, int az, int nr_range,
48 final_start = min(range[i].start, start); 48 final_start = min(range[i].start, start);
49 final_end = max(range[i].end, end); 49 final_end = max(range[i].end, end);
50 50
51 range[i].start = final_start; 51 /* clear it and add it back for further merge */
52 range[i].end = final_end; 52 range[i].start = 0;
53 return nr_range; 53 range[i].end = 0;
54 return add_range_with_merge(range, az, nr_range,
55 final_start, final_end);
54 } 56 }
55 57
56 /* Need to add it: */ 58 /* Need to add it: */
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 16ea67925015..35380019f0fc 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -1451,9 +1451,9 @@ static int rcu_gp_init(struct rcu_state *rsp)
1451 rnp->grphi, rnp->qsmask); 1451 rnp->grphi, rnp->qsmask);
1452 raw_spin_unlock_irq(&rnp->lock); 1452 raw_spin_unlock_irq(&rnp->lock);
1453#ifdef CONFIG_PROVE_RCU_DELAY 1453#ifdef CONFIG_PROVE_RCU_DELAY
1454 if ((prandom_u32() % (rcu_num_nodes * 8)) == 0 && 1454 if ((prandom_u32() % (rcu_num_nodes + 1)) == 0 &&
1455 system_state == SYSTEM_RUNNING) 1455 system_state == SYSTEM_RUNNING)
1456 schedule_timeout_uninterruptible(2); 1456 udelay(200);
1457#endif /* #ifdef CONFIG_PROVE_RCU_DELAY */ 1457#endif /* #ifdef CONFIG_PROVE_RCU_DELAY */
1458 cond_resched(); 1458 cond_resched();
1459 } 1459 }
@@ -1613,6 +1613,14 @@ static int __noreturn rcu_gp_kthread(void *arg)
1613 } 1613 }
1614} 1614}
1615 1615
1616static void rsp_wakeup(struct irq_work *work)
1617{
1618 struct rcu_state *rsp = container_of(work, struct rcu_state, wakeup_work);
1619
1620 /* Wake up rcu_gp_kthread() to start the grace period. */
1621 wake_up(&rsp->gp_wq);
1622}
1623
1616/* 1624/*
1617 * Start a new RCU grace period if warranted, re-initializing the hierarchy 1625 * Start a new RCU grace period if warranted, re-initializing the hierarchy
1618 * in preparation for detecting the next grace period. The caller must hold 1626 * in preparation for detecting the next grace period. The caller must hold
@@ -1637,8 +1645,12 @@ rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp,
1637 } 1645 }
1638 rsp->gp_flags = RCU_GP_FLAG_INIT; 1646 rsp->gp_flags = RCU_GP_FLAG_INIT;
1639 1647
1640 /* Wake up rcu_gp_kthread() to start the grace period. */ 1648 /*
1641 wake_up(&rsp->gp_wq); 1649 * We can't do wakeups while holding the rnp->lock, as that
1650 * could cause possible deadlocks with the rq->lock. Deter
1651 * the wakeup to interrupt context.
1652 */
1653 irq_work_queue(&rsp->wakeup_work);
1642} 1654}
1643 1655
1644/* 1656/*
@@ -3235,6 +3247,7 @@ static void __init rcu_init_one(struct rcu_state *rsp,
3235 3247
3236 rsp->rda = rda; 3248 rsp->rda = rda;
3237 init_waitqueue_head(&rsp->gp_wq); 3249 init_waitqueue_head(&rsp->gp_wq);
3250 init_irq_work(&rsp->wakeup_work, rsp_wakeup);
3238 rnp = rsp->level[rcu_num_lvls - 1]; 3251 rnp = rsp->level[rcu_num_lvls - 1];
3239 for_each_possible_cpu(i) { 3252 for_each_possible_cpu(i) {
3240 while (i > rnp->grphi) 3253 while (i > rnp->grphi)
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index da77a8f57ff9..4df503470e42 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -27,6 +27,7 @@
27#include <linux/threads.h> 27#include <linux/threads.h>
28#include <linux/cpumask.h> 28#include <linux/cpumask.h>
29#include <linux/seqlock.h> 29#include <linux/seqlock.h>
30#include <linux/irq_work.h>
30 31
31/* 32/*
32 * Define shape of hierarchy based on NR_CPUS, CONFIG_RCU_FANOUT, and 33 * Define shape of hierarchy based on NR_CPUS, CONFIG_RCU_FANOUT, and
@@ -442,6 +443,7 @@ struct rcu_state {
442 char *name; /* Name of structure. */ 443 char *name; /* Name of structure. */
443 char abbr; /* Abbreviated name. */ 444 char abbr; /* Abbreviated name. */
444 struct list_head flavors; /* List of RCU flavors. */ 445 struct list_head flavors; /* List of RCU flavors. */
446 struct irq_work wakeup_work; /* Postponed wakeups */
445}; 447};
446 448
447/* Values for rcu_state structure's gp_flags field. */ 449/* Values for rcu_state structure's gp_flags field. */
diff --git a/kernel/softirq.c b/kernel/softirq.c
index b5197dcb0dad..3d6833f125d3 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -195,8 +195,12 @@ void local_bh_enable_ip(unsigned long ip)
195EXPORT_SYMBOL(local_bh_enable_ip); 195EXPORT_SYMBOL(local_bh_enable_ip);
196 196
197/* 197/*
198 * We restart softirq processing for at most 2 ms, 198 * We restart softirq processing for at most MAX_SOFTIRQ_RESTART times,
199 * and if need_resched() is not set. 199 * but break the loop if need_resched() is set or after 2 ms.
200 * The MAX_SOFTIRQ_TIME provides a nice upper bound in most cases, but in
201 * certain cases, such as stop_machine(), jiffies may cease to
202 * increment and so we need the MAX_SOFTIRQ_RESTART limit as
203 * well to make sure we eventually return from this method.
200 * 204 *
201 * These limits have been established via experimentation. 205 * These limits have been established via experimentation.
202 * The two things to balance is latency against fairness - 206 * The two things to balance is latency against fairness -
@@ -204,6 +208,7 @@ EXPORT_SYMBOL(local_bh_enable_ip);
204 * should not be able to lock up the box. 208 * should not be able to lock up the box.
205 */ 209 */
206#define MAX_SOFTIRQ_TIME msecs_to_jiffies(2) 210#define MAX_SOFTIRQ_TIME msecs_to_jiffies(2)
211#define MAX_SOFTIRQ_RESTART 10
207 212
208asmlinkage void __do_softirq(void) 213asmlinkage void __do_softirq(void)
209{ 214{
@@ -212,6 +217,7 @@ asmlinkage void __do_softirq(void)
212 unsigned long end = jiffies + MAX_SOFTIRQ_TIME; 217 unsigned long end = jiffies + MAX_SOFTIRQ_TIME;
213 int cpu; 218 int cpu;
214 unsigned long old_flags = current->flags; 219 unsigned long old_flags = current->flags;
220 int max_restart = MAX_SOFTIRQ_RESTART;
215 221
216 /* 222 /*
217 * Mask out PF_MEMALLOC s current task context is borrowed for the 223 * Mask out PF_MEMALLOC s current task context is borrowed for the
@@ -265,7 +271,8 @@ restart:
265 271
266 pending = local_softirq_pending(); 272 pending = local_softirq_pending();
267 if (pending) { 273 if (pending) {
268 if (time_before(jiffies, end) && !need_resched()) 274 if (time_before(jiffies, end) && !need_resched() &&
275 --max_restart)
269 goto restart; 276 goto restart;
270 277
271 wakeup_softirqd(); 278 wakeup_softirqd();
diff --git a/kernel/sys.c b/kernel/sys.c
index b95d3c72ba21..2bbd9a73b54c 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -362,6 +362,29 @@ int unregister_reboot_notifier(struct notifier_block *nb)
362} 362}
363EXPORT_SYMBOL(unregister_reboot_notifier); 363EXPORT_SYMBOL(unregister_reboot_notifier);
364 364
365/* Add backwards compatibility for stable trees. */
366#ifndef PF_NO_SETAFFINITY
367#define PF_NO_SETAFFINITY PF_THREAD_BOUND
368#endif
369
370static void migrate_to_reboot_cpu(void)
371{
372 /* The boot cpu is always logical cpu 0 */
373 int cpu = 0;
374
375 cpu_hotplug_disable();
376
377 /* Make certain the cpu I'm about to reboot on is online */
378 if (!cpu_online(cpu))
379 cpu = cpumask_first(cpu_online_mask);
380
381 /* Prevent races with other tasks migrating this task */
382 current->flags |= PF_NO_SETAFFINITY;
383
384 /* Make certain I only run on the appropriate processor */
385 set_cpus_allowed_ptr(current, cpumask_of(cpu));
386}
387
365/** 388/**
366 * kernel_restart - reboot the system 389 * kernel_restart - reboot the system
367 * @cmd: pointer to buffer containing command to execute for restart 390 * @cmd: pointer to buffer containing command to execute for restart
@@ -373,7 +396,7 @@ EXPORT_SYMBOL(unregister_reboot_notifier);
373void kernel_restart(char *cmd) 396void kernel_restart(char *cmd)
374{ 397{
375 kernel_restart_prepare(cmd); 398 kernel_restart_prepare(cmd);
376 disable_nonboot_cpus(); 399 migrate_to_reboot_cpu();
377 syscore_shutdown(); 400 syscore_shutdown();
378 if (!cmd) 401 if (!cmd)
379 printk(KERN_EMERG "Restarting system.\n"); 402 printk(KERN_EMERG "Restarting system.\n");
@@ -400,7 +423,7 @@ static void kernel_shutdown_prepare(enum system_states state)
400void kernel_halt(void) 423void kernel_halt(void)
401{ 424{
402 kernel_shutdown_prepare(SYSTEM_HALT); 425 kernel_shutdown_prepare(SYSTEM_HALT);
403 disable_nonboot_cpus(); 426 migrate_to_reboot_cpu();
404 syscore_shutdown(); 427 syscore_shutdown();
405 printk(KERN_EMERG "System halted.\n"); 428 printk(KERN_EMERG "System halted.\n");
406 kmsg_dump(KMSG_DUMP_HALT); 429 kmsg_dump(KMSG_DUMP_HALT);
@@ -419,7 +442,7 @@ void kernel_power_off(void)
419 kernel_shutdown_prepare(SYSTEM_POWER_OFF); 442 kernel_shutdown_prepare(SYSTEM_POWER_OFF);
420 if (pm_power_off_prepare) 443 if (pm_power_off_prepare)
421 pm_power_off_prepare(); 444 pm_power_off_prepare();
422 disable_nonboot_cpus(); 445 migrate_to_reboot_cpu();
423 syscore_shutdown(); 446 syscore_shutdown();
424 printk(KERN_EMERG "Power down.\n"); 447 printk(KERN_EMERG "Power down.\n");
425 kmsg_dump(KMSG_DUMP_POWEROFF); 448 kmsg_dump(KMSG_DUMP_POWEROFF);
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 12ff13a838c6..8f5b3b98577b 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -874,7 +874,6 @@ static void hardpps_update_phase(long error)
874void __hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts) 874void __hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts)
875{ 875{
876 struct pps_normtime pts_norm, freq_norm; 876 struct pps_normtime pts_norm, freq_norm;
877 unsigned long flags;
878 877
879 pts_norm = pps_normalize_ts(*phase_ts); 878 pts_norm = pps_normalize_ts(*phase_ts);
880 879
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index 24938d577669..0c739423b0f9 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -511,6 +511,12 @@ again:
511 } 511 }
512 } 512 }
513 513
514 /*
515 * Remove the current cpu from the pending mask. The event is
516 * delivered immediately in tick_do_broadcast() !
517 */
518 cpumask_clear_cpu(smp_processor_id(), tick_broadcast_pending_mask);
519
514 /* Take care of enforced broadcast requests */ 520 /* Take care of enforced broadcast requests */
515 cpumask_or(tmpmask, tmpmask, tick_broadcast_force_mask); 521 cpumask_or(tmpmask, tmpmask, tick_broadcast_force_mask);
516 cpumask_clear(tick_broadcast_force_mask); 522 cpumask_clear(tick_broadcast_force_mask);
@@ -575,8 +581,8 @@ void tick_broadcast_oneshot_control(unsigned long reason)
575 581
576 raw_spin_lock_irqsave(&tick_broadcast_lock, flags); 582 raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
577 if (reason == CLOCK_EVT_NOTIFY_BROADCAST_ENTER) { 583 if (reason == CLOCK_EVT_NOTIFY_BROADCAST_ENTER) {
578 WARN_ON_ONCE(cpumask_test_cpu(cpu, tick_broadcast_pending_mask));
579 if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_oneshot_mask)) { 584 if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_oneshot_mask)) {
585 WARN_ON_ONCE(cpumask_test_cpu(cpu, tick_broadcast_pending_mask));
580 clockevents_set_mode(dev, CLOCK_EVT_MODE_SHUTDOWN); 586 clockevents_set_mode(dev, CLOCK_EVT_MODE_SHUTDOWN);
581 /* 587 /*
582 * We only reprogram the broadcast timer if we 588 * We only reprogram the broadcast timer if we
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 98cd470bbe49..baeeb5c87cf1 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -975,6 +975,14 @@ static int timekeeping_suspend(void)
975 975
976 read_persistent_clock(&timekeeping_suspend_time); 976 read_persistent_clock(&timekeeping_suspend_time);
977 977
978 /*
979 * On some systems the persistent_clock can not be detected at
980 * timekeeping_init by its return value, so if we see a valid
981 * value returned, update the persistent_clock_exists flag.
982 */
983 if (timekeeping_suspend_time.tv_sec || timekeeping_suspend_time.tv_nsec)
984 persistent_clock_exist = true;
985
978 raw_spin_lock_irqsave(&timekeeper_lock, flags); 986 raw_spin_lock_irqsave(&timekeeper_lock, flags);
979 write_seqcount_begin(&timekeeper_seq); 987 write_seqcount_begin(&timekeeper_seq);
980 timekeeping_forward_now(tk); 988 timekeeping_forward_now(tk);
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index b549b0f5b977..6c508ff33c62 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -120,22 +120,22 @@ static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip);
120 120
121/* 121/*
122 * Traverse the ftrace_global_list, invoking all entries. The reason that we 122 * Traverse the ftrace_global_list, invoking all entries. The reason that we
123 * can use rcu_dereference_raw() is that elements removed from this list 123 * can use rcu_dereference_raw_notrace() is that elements removed from this list
124 * are simply leaked, so there is no need to interact with a grace-period 124 * are simply leaked, so there is no need to interact with a grace-period
125 * mechanism. The rcu_dereference_raw() calls are needed to handle 125 * mechanism. The rcu_dereference_raw_notrace() calls are needed to handle
126 * concurrent insertions into the ftrace_global_list. 126 * concurrent insertions into the ftrace_global_list.
127 * 127 *
128 * Silly Alpha and silly pointer-speculation compiler optimizations! 128 * Silly Alpha and silly pointer-speculation compiler optimizations!
129 */ 129 */
130#define do_for_each_ftrace_op(op, list) \ 130#define do_for_each_ftrace_op(op, list) \
131 op = rcu_dereference_raw(list); \ 131 op = rcu_dereference_raw_notrace(list); \
132 do 132 do
133 133
134/* 134/*
135 * Optimized for just a single item in the list (as that is the normal case). 135 * Optimized for just a single item in the list (as that is the normal case).
136 */ 136 */
137#define while_for_each_ftrace_op(op) \ 137#define while_for_each_ftrace_op(op) \
138 while (likely(op = rcu_dereference_raw((op)->next)) && \ 138 while (likely(op = rcu_dereference_raw_notrace((op)->next)) && \
139 unlikely((op) != &ftrace_list_end)) 139 unlikely((op) != &ftrace_list_end))
140 140
141static inline void ftrace_ops_init(struct ftrace_ops *ops) 141static inline void ftrace_ops_init(struct ftrace_ops *ops)
@@ -779,7 +779,7 @@ ftrace_find_profiled_func(struct ftrace_profile_stat *stat, unsigned long ip)
779 if (hlist_empty(hhd)) 779 if (hlist_empty(hhd))
780 return NULL; 780 return NULL;
781 781
782 hlist_for_each_entry_rcu(rec, hhd, node) { 782 hlist_for_each_entry_rcu_notrace(rec, hhd, node) {
783 if (rec->ip == ip) 783 if (rec->ip == ip)
784 return rec; 784 return rec;
785 } 785 }
@@ -1165,7 +1165,7 @@ ftrace_lookup_ip(struct ftrace_hash *hash, unsigned long ip)
1165 1165
1166 hhd = &hash->buckets[key]; 1166 hhd = &hash->buckets[key];
1167 1167
1168 hlist_for_each_entry_rcu(entry, hhd, hlist) { 1168 hlist_for_each_entry_rcu_notrace(entry, hhd, hlist) {
1169 if (entry->ip == ip) 1169 if (entry->ip == ip)
1170 return entry; 1170 return entry;
1171 } 1171 }
@@ -1422,8 +1422,8 @@ ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip)
1422 struct ftrace_hash *notrace_hash; 1422 struct ftrace_hash *notrace_hash;
1423 int ret; 1423 int ret;
1424 1424
1425 filter_hash = rcu_dereference_raw(ops->filter_hash); 1425 filter_hash = rcu_dereference_raw_notrace(ops->filter_hash);
1426 notrace_hash = rcu_dereference_raw(ops->notrace_hash); 1426 notrace_hash = rcu_dereference_raw_notrace(ops->notrace_hash);
1427 1427
1428 if ((ftrace_hash_empty(filter_hash) || 1428 if ((ftrace_hash_empty(filter_hash) ||
1429 ftrace_lookup_ip(filter_hash, ip)) && 1429 ftrace_lookup_ip(filter_hash, ip)) &&
@@ -2920,7 +2920,7 @@ static void function_trace_probe_call(unsigned long ip, unsigned long parent_ip,
2920 * on the hash. rcu_read_lock is too dangerous here. 2920 * on the hash. rcu_read_lock is too dangerous here.
2921 */ 2921 */
2922 preempt_disable_notrace(); 2922 preempt_disable_notrace();
2923 hlist_for_each_entry_rcu(entry, hhd, node) { 2923 hlist_for_each_entry_rcu_notrace(entry, hhd, node) {
2924 if (entry->ip == ip) 2924 if (entry->ip == ip)
2925 entry->ops->func(ip, parent_ip, &entry->data); 2925 entry->ops->func(ip, parent_ip, &entry->data);
2926 } 2926 }
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index b59aea2c48c2..e444ff88f0a4 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -620,6 +620,9 @@ int ring_buffer_poll_wait(struct ring_buffer *buffer, int cpu,
620 if (cpu == RING_BUFFER_ALL_CPUS) 620 if (cpu == RING_BUFFER_ALL_CPUS)
621 work = &buffer->irq_work; 621 work = &buffer->irq_work;
622 else { 622 else {
623 if (!cpumask_test_cpu(cpu, buffer->cpumask))
624 return -EINVAL;
625
623 cpu_buffer = buffer->buffers[cpu]; 626 cpu_buffer = buffer->buffers[cpu];
624 work = &cpu_buffer->irq_work; 627 work = &cpu_buffer->irq_work;
625 } 628 }
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index ae6fa2d1cdf7..e71a8be4a6ee 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -652,8 +652,6 @@ static struct {
652 ARCH_TRACE_CLOCKS 652 ARCH_TRACE_CLOCKS
653}; 653};
654 654
655int trace_clock_id;
656
657/* 655/*
658 * trace_parser_get_init - gets the buffer for trace parser 656 * trace_parser_get_init - gets the buffer for trace parser
659 */ 657 */
@@ -843,7 +841,15 @@ __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
843 841
844 memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN); 842 memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
845 max_data->pid = tsk->pid; 843 max_data->pid = tsk->pid;
846 max_data->uid = task_uid(tsk); 844 /*
845 * If tsk == current, then use current_uid(), as that does not use
846 * RCU. The irq tracer can be called out of RCU scope.
847 */
848 if (tsk == current)
849 max_data->uid = current_uid();
850 else
851 max_data->uid = task_uid(tsk);
852
847 max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO; 853 max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
848 max_data->policy = tsk->policy; 854 max_data->policy = tsk->policy;
849 max_data->rt_priority = tsk->rt_priority; 855 max_data->rt_priority = tsk->rt_priority;
@@ -2818,7 +2824,7 @@ __tracing_open(struct inode *inode, struct file *file, bool snapshot)
2818 iter->iter_flags |= TRACE_FILE_ANNOTATE; 2824 iter->iter_flags |= TRACE_FILE_ANNOTATE;
2819 2825
2820 /* Output in nanoseconds only if we are using a clock in nanoseconds. */ 2826 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
2821 if (trace_clocks[trace_clock_id].in_ns) 2827 if (trace_clocks[tr->clock_id].in_ns)
2822 iter->iter_flags |= TRACE_FILE_TIME_IN_NS; 2828 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
2823 2829
2824 /* stop the trace while dumping if we are not opening "snapshot" */ 2830 /* stop the trace while dumping if we are not opening "snapshot" */
@@ -3817,7 +3823,7 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
3817 iter->iter_flags |= TRACE_FILE_LAT_FMT; 3823 iter->iter_flags |= TRACE_FILE_LAT_FMT;
3818 3824
3819 /* Output in nanoseconds only if we are using a clock in nanoseconds. */ 3825 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
3820 if (trace_clocks[trace_clock_id].in_ns) 3826 if (trace_clocks[tr->clock_id].in_ns)
3821 iter->iter_flags |= TRACE_FILE_TIME_IN_NS; 3827 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3822 3828
3823 iter->cpu_file = tc->cpu; 3829 iter->cpu_file = tc->cpu;
@@ -5087,7 +5093,7 @@ tracing_stats_read(struct file *filp, char __user *ubuf,
5087 cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu); 5093 cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
5088 trace_seq_printf(s, "bytes: %ld\n", cnt); 5094 trace_seq_printf(s, "bytes: %ld\n", cnt);
5089 5095
5090 if (trace_clocks[trace_clock_id].in_ns) { 5096 if (trace_clocks[tr->clock_id].in_ns) {
5091 /* local or global for trace_clock */ 5097 /* local or global for trace_clock */
5092 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu)); 5098 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
5093 usec_rem = do_div(t, USEC_PER_SEC); 5099 usec_rem = do_div(t, USEC_PER_SEC);
@@ -6216,10 +6222,15 @@ __init static int tracer_alloc_buffers(void)
6216 6222
6217 trace_init_cmdlines(); 6223 trace_init_cmdlines();
6218 6224
6219 register_tracer(&nop_trace); 6225 /*
6220 6226 * register_tracer() might reference current_trace, so it
6227 * needs to be set before we register anything. This is
6228 * just a bootstrap of current_trace anyway.
6229 */
6221 global_trace.current_trace = &nop_trace; 6230 global_trace.current_trace = &nop_trace;
6222 6231
6232 register_tracer(&nop_trace);
6233
6223 /* All seems OK, enable tracing */ 6234 /* All seems OK, enable tracing */
6224 tracing_disabled = 0; 6235 tracing_disabled = 0;
6225 6236
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 711ca7d3e7f1..20572ed88c5c 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -700,8 +700,6 @@ enum print_line_t print_trace_line(struct trace_iterator *iter);
700 700
701extern unsigned long trace_flags; 701extern unsigned long trace_flags;
702 702
703extern int trace_clock_id;
704
705/* Standard output formatting function used for function return traces */ 703/* Standard output formatting function used for function return traces */
706#ifdef CONFIG_FUNCTION_GRAPH_TRACER 704#ifdef CONFIG_FUNCTION_GRAPH_TRACER
707 705
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 55e2cf66967b..2901e3b88590 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -1159,7 +1159,7 @@ trace_selftest_startup_branch(struct tracer *trace, struct trace_array *tr)
1159 /* stop the tracing. */ 1159 /* stop the tracing. */
1160 tracing_stop(); 1160 tracing_stop();
1161 /* check the trace buffer */ 1161 /* check the trace buffer */
1162 ret = trace_test_buffer(tr, &count); 1162 ret = trace_test_buffer(&tr->trace_buffer, &count);
1163 trace->reset(tr); 1163 trace->reset(tr);
1164 tracing_start(); 1164 tracing_start();
1165 1165