6 files changed, 250 insertions, 21 deletions
diff --git a/kernel/audit.c b/kernel/audit.c
index 2eeea9a14240..10c4930c2bbf 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -170,7 +170,9 @@ void audit_panic(const char *message)
                        printk(KERN_ERR "audit: %s\n", message);
                break;
        case AUDIT_FAIL_PANIC:
-                panic("audit: %s\n", message);
+                /* test audit_pid since printk is always losey, why bother? */
+                if (audit_pid)
+                        panic("audit: %s\n", message);
                break;
        }
 }
@@ -352,6 +354,7 @@ static int kauditd_thread(void *dummy)
                                if (err < 0) {
                                        BUG_ON(err != -ECONNREFUSED); /* Shoudn't happen */
                                        printk(KERN_ERR "audit: *NO* daemon at audit_pid=%d\n", audit_pid);
+                                        audit_log_lost("auditd dissapeared\n");
                                        audit_pid = 0;
                                }
                        } else {
@@ -1350,17 +1353,19 @@ void audit_log_end(struct audit_buffer *ab)
        if (!audit_rate_check()) {
                audit_log_lost("rate limit exceeded");
        } else {
+                struct nlmsghdr *nlh = nlmsg_hdr(ab->skb);
                if (audit_pid) {
-                        struct nlmsghdr *nlh = nlmsg_hdr(ab->skb);
                        nlh->nlmsg_len = ab->skb->len - NLMSG_SPACE(0);
                        skb_queue_tail(&audit_skb_queue, ab->skb);
                        ab->skb = NULL;
                        wake_up_interruptible(&kauditd_wait);
-                } else if (printk_ratelimit()) {
+                } else if (nlh->nlmsg_type != AUDIT_EOE) {
-                        struct nlmsghdr *nlh = nlmsg_hdr(ab->skb);
+                        if (printk_ratelimit()) {
-                        printk(KERN_NOTICE "type=%d %s\n", nlh->nlmsg_type, ab->skb->data + NLMSG_SPACE(0));
+                                printk(KERN_NOTICE "type=%d %s\n",
-                } else {
+                                        nlh->nlmsg_type,
-                        audit_log_lost("printk limit exceeded\n");
+                                        ab->skb->data + NLMSG_SPACE(0));
+                        } else
+                                audit_log_lost("printk limit exceeded\n");
                }
        }
        audit_buffer_free(ab);
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 2087d6de67ea..782262e4107d 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -1070,7 +1070,7 @@ static int audit_log_single_execve_arg(struct audit_context *context,
                 * so we can be sure nothing was lost.
                 */
                if ((i == 0) && (too_long))
-                        audit_log_format(*ab, "a%d_len=%ld ", arg_num,
+                        audit_log_format(*ab, "a%d_len=%zu ", arg_num,
                                         has_cntl ? 2*len : len);
                /*
diff --git a/kernel/rcupreempt.c b/kernel/rcupreempt.c
index 987cfb7ade89..e9517014b57c 100644
--- a/kernel/rcupreempt.c
+++ b/kernel/rcupreempt.c
@@ -23,6 +23,10 @@
 *              to Suparna Bhattacharya for pushing me completely away
 *              from atomic instructions on the read side.
 *
+ *  - Added handling of Dynamic Ticks
+ *      Copyright 2007 - Paul E. Mckenney <paulmck@us.ibm.com>
+ *                     - Steven Rostedt <srostedt@redhat.com>
+ *
 * Papers:  http://www.rdrop.com/users/paulmck/RCU
 *
 * Design Document: http://lwn.net/Articles/253651/
@@ -409,6 +413,212 @@ static void __rcu_advance_callbacks(struct rcu_data *rdp)
        }
 }
+#ifdef CONFIG_NO_HZ
+DEFINE_PER_CPU(long, dynticks_progress_counter) = 1;
+static DEFINE_PER_CPU(long, rcu_dyntick_snapshot);
+static DEFINE_PER_CPU(int, rcu_update_flag);
+/**
+ * rcu_irq_enter - Called from Hard irq handlers and NMI/SMI.
+ *
+ * If the CPU was idle with dynamic ticks active, this updates the
+ * dynticks_progress_counter to let the RCU handling know that the
+ * CPU is active.
+ */
+void rcu_irq_enter(void)
+{
+        int cpu = smp_processor_id();
+        if (per_cpu(rcu_update_flag, cpu))
+                per_cpu(rcu_update_flag, cpu)++;
+        /*
+         * Only update if we are coming from a stopped ticks mode
+         * (dynticks_progress_counter is even).
+         */
+        if (!in_interrupt() &&
+            (per_cpu(dynticks_progress_counter, cpu) & 0x1) == 0) {
+                /*
+                 * The following might seem like we could have a race
+                 * with NMI/SMIs. But this really isn't a problem.
+                 * Here we do a read/modify/write, and the race happens
+                 * when an NMI/SMI comes in after the read and before
+                 * the write. But NMI/SMIs will increment this counter
+                 * twice before returning, so the zero bit will not
+                 * be corrupted by the NMI/SMI which is the most important
+                 * part.
+                 *
+                 * The only thing is that we would bring back the counter
+                 * to a postion that it was in during the NMI/SMI.
+                 * But the zero bit would be set, so the rest of the
+                 * counter would again be ignored.
+                 *
+                 * On return from the IRQ, the counter may have the zero
+                 * bit be 0 and the counter the same as the return from
+                 * the NMI/SMI. If the state machine was so unlucky to
+                 * see that, it still doesn't matter, since all
+                 * RCU read-side critical sections on this CPU would
+                 * have already completed.
+                 */
+                per_cpu(dynticks_progress_counter, cpu)++;
+                /*
+                 * The following memory barrier ensures that any
+                 * rcu_read_lock() primitives in the irq handler
+                 * are seen by other CPUs to follow the above
+                 * increment to dynticks_progress_counter. This is
+                 * required in order for other CPUs to correctly
+                 * determine when it is safe to advance the RCU
+                 * grace-period state machine.
+                 */
+                smp_mb(); /* see above block comment. */
+                /*
+                 * Since we can't determine the dynamic tick mode from
+                 * the dynticks_progress_counter after this routine,
+                 * we use a second flag to acknowledge that we came
+                 * from an idle state with ticks stopped.
+                 */
+                per_cpu(rcu_update_flag, cpu)++;
+                /*
+                 * If we take an NMI/SMI now, they will also increment
+                 * the rcu_update_flag, and will not update the
+                 * dynticks_progress_counter on exit. That is for
+                 * this IRQ to do.
+                 */
+        }
+}
+/**
+ * rcu_irq_exit - Called from exiting Hard irq context.
+ *
+ * If the CPU was idle with dynamic ticks active, update the
+ * dynticks_progress_counter to put let the RCU handling be
+ * aware that the CPU is going back to idle with no ticks.
+ */
+void rcu_irq_exit(void)
+{
+        int cpu = smp_processor_id();
+        /*
+         * rcu_update_flag is set if we interrupted the CPU
+         * when it was idle with ticks stopped.
+         * Once this occurs, we keep track of interrupt nesting
+         * because a NMI/SMI could also come in, and we still
+         * only want the IRQ that started the increment of the
+         * dynticks_progress_counter to be the one that modifies
+         * it on exit.
+         */
+        if (per_cpu(rcu_update_flag, cpu)) {
+                if (--per_cpu(rcu_update_flag, cpu))
+                        return;
+                /* This must match the interrupt nesting */
+                WARN_ON(in_interrupt());
+                /*
+                 * If an NMI/SMI happens now we are still
+                 * protected by the dynticks_progress_counter being odd.
+                 */
+                /*
+                 * The following memory barrier ensures that any
+                 * rcu_read_unlock() primitives in the irq handler
+                 * are seen by other CPUs to preceed the following
+                 * increment to dynticks_progress_counter. This
+                 * is required in order for other CPUs to determine
+                 * when it is safe to advance the RCU grace-period
+                 * state machine.
+                 */
+                smp_mb(); /* see above block comment. */
+                per_cpu(dynticks_progress_counter, cpu)++;
+                WARN_ON(per_cpu(dynticks_progress_counter, cpu) & 0x1);
+        }
+}
+static void dyntick_save_progress_counter(int cpu)
+{
+        per_cpu(rcu_dyntick_snapshot, cpu) =
+                per_cpu(dynticks_progress_counter, cpu);
+}
+static inline int
+rcu_try_flip_waitack_needed(int cpu)
+{
+        long curr;
+        long snap;
+        curr = per_cpu(dynticks_progress_counter, cpu);
+        snap = per_cpu(rcu_dyntick_snapshot, cpu);
+        smp_mb(); /* force ordering with cpu entering/leaving dynticks. */
+        /*
+         * If the CPU remained in dynticks mode for the entire time
+         * and didn't take any interrupts, NMIs, SMIs, or whatever,
+         * then it cannot be in the middle of an rcu_read_lock(), so
+         * the next rcu_read_lock() it executes must use the new value
+         * of the counter.  So we can safely pretend that this CPU
+         * already acknowledged the counter.
+         */
+        if ((curr == snap) && ((curr & 0x1) == 0))
+                return 0;
+        /*
+         * If the CPU passed through or entered a dynticks idle phase with
+         * no active irq handlers, then, as above, we can safely pretend
+         * that this CPU already acknowledged the counter.
+         */
+        if ((curr - snap) > 2 || (snap & 0x1) == 0)
+                return 0;
+        /* We need this CPU to explicitly acknowledge the counter flip. */
+        return 1;
+}
+static inline int
+rcu_try_flip_waitmb_needed(int cpu)
+{
+        long curr;
+        long snap;
+        curr = per_cpu(dynticks_progress_counter, cpu);
+        snap = per_cpu(rcu_dyntick_snapshot, cpu);
+        smp_mb(); /* force ordering with cpu entering/leaving dynticks. */
+        /*
+         * If the CPU remained in dynticks mode for the entire time
+         * and didn't take any interrupts, NMIs, SMIs, or whatever,
+         * then it cannot have executed an RCU read-side critical section
+         * during that time, so there is no need for it to execute a
+         * memory barrier.
+         */
+        if ((curr == snap) && ((curr & 0x1) == 0))
+                return 0;
+        /*
+         * If the CPU either entered or exited an outermost interrupt,
+         * SMI, NMI, or whatever handler, then we know that it executed
+         * a memory barrier when doing so.  So we don't need another one.
+         */
+        if (curr != snap)
+                return 0;
+        /* We need the CPU to execute a memory barrier. */
+        return 1;
+}
+#else /* !CONFIG_NO_HZ */
+# define dyntick_save_progress_counter(cpu)     do { } while (0)
+# define rcu_try_flip_waitack_needed(cpu)       (1)
+# define rcu_try_flip_waitmb_needed(cpu)        (1)
+#endif /* CONFIG_NO_HZ */
 /*
 * Get here when RCU is idle.  Decide whether we need to
 * move out of idle state, and return non-zero if so.
@@ -447,8 +657,10 @@ rcu_try_flip_idle(void)
        /* Now ask each CPU for acknowledgement of the flip. */
-        for_each_cpu_mask(cpu, rcu_cpu_online_map)
+        for_each_cpu_mask(cpu, rcu_cpu_online_map) {
                per_cpu(rcu_flip_flag, cpu) = rcu_flipped;
+                dyntick_save_progress_counter(cpu);
+        }
        return 1;
 }
@@ -464,7 +676,8 @@ rcu_try_flip_waitack(void)
        RCU_TRACE_ME(rcupreempt_trace_try_flip_a1);
        for_each_cpu_mask(cpu, rcu_cpu_online_map)
-                if (per_cpu(rcu_flip_flag, cpu) != rcu_flip_seen) {
+                if (rcu_try_flip_waitack_needed(cpu) &&
+                    per_cpu(rcu_flip_flag, cpu) != rcu_flip_seen) {
                        RCU_TRACE_ME(rcupreempt_trace_try_flip_ae1);
                        return 0;
                }
@@ -509,8 +722,10 @@ rcu_try_flip_waitzero(void)
        smp_mb();  /*  ^^^^^^^^^^^^ */
        /* Call for a memory barrier from each CPU. */
-        for_each_cpu_mask(cpu, rcu_cpu_online_map)
+        for_each_cpu_mask(cpu, rcu_cpu_online_map) {
                per_cpu(rcu_mb_flag, cpu) = rcu_mb_needed;
+                dyntick_save_progress_counter(cpu);
+        }
        RCU_TRACE_ME(rcupreempt_trace_try_flip_z2);
        return 1;
@@ -528,7 +743,8 @@ rcu_try_flip_waitmb(void)
        RCU_TRACE_ME(rcupreempt_trace_try_flip_m1);
        for_each_cpu_mask(cpu, rcu_cpu_online_map)
-                if (per_cpu(rcu_mb_flag, cpu) != rcu_mb_done) {
+                if (rcu_try_flip_waitmb_needed(cpu) &&
+                    per_cpu(rcu_mb_flag, cpu) != rcu_mb_done) {
                        RCU_TRACE_ME(rcupreempt_trace_try_flip_me1);
                        return 0;
                }
@@ -702,8 +918,9 @@ void rcu_offline_cpu(int cpu)
         * fix.
         */
+        local_irq_save(flags);
        rdp = RCU_DATA_ME();
-        spin_lock_irqsave(&rdp->lock, flags);
+        spin_lock(&rdp->lock);
        *rdp->nexttail = list;
        if (list)
                rdp->nexttail = tail;
@@ -735,9 +952,11 @@ static void rcu_process_callbacks(struct softirq_action *unused)
 {
        unsigned long flags;
        struct rcu_head *next, *list;
-        struct rcu_data *rdp = RCU_DATA_ME();
+        struct rcu_data *rdp;
-        spin_lock_irqsave(&rdp->lock, flags);
+        local_irq_save(flags);
+        rdp = RCU_DATA_ME();
+        spin_lock(&rdp->lock);
        list = rdp->donelist;
        if (list == NULL) {
                spin_unlock_irqrestore(&rdp->lock, flags);
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 5b3aea5f471e..31e9f2a47928 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -313,6 +313,7 @@ void irq_exit(void)
        /* Make sure that timer wheel updates are propagated */
        if (!in_interrupt() && idle_cpu(smp_processor_id()) && !need_resched())
                tick_nohz_stop_sched_tick();
+        rcu_irq_exit();
 #endif
        preempt_enable_no_resched();
 }
diff --git a/kernel/softlockup.c b/kernel/softlockup.c
index 7c2da88db4ed..01b6522fd92b 100644
--- a/kernel/softlockup.c
+++ b/kernel/softlockup.c
@@ -216,26 +216,27 @@ static int watchdog(void *__bind_cpu)
        /* initialize timestamp */
        touch_softlockup_watchdog();
+        set_current_state(TASK_INTERRUPTIBLE);
        /*
         * Run briefly once per second to reset the softlockup timestamp.
         * If this gets delayed for more than 60 seconds then the
         * debug-printout triggers in softlockup_tick().
         */
        while (!kthread_should_stop()) {
-                set_current_state(TASK_INTERRUPTIBLE);
                touch_softlockup_watchdog();
                schedule();
                if (kthread_should_stop())
                        break;
-                if (this_cpu != check_cpu)
+                if (this_cpu == check_cpu) {
-                        continue;
+                        if (sysctl_hung_task_timeout_secs)
+                                check_hung_uninterruptible_tasks(this_cpu);
-                if (sysctl_hung_task_timeout_secs)
+                }
-                        check_hung_uninterruptible_tasks(this_cpu);
+                set_current_state(TASK_INTERRUPTIBLE);
        }
+        __set_current_state(TASK_RUNNING);
        return 0;
 }
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index fa9bb73dbdb4..2968298f8f36 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -282,6 +282,7 @@ void tick_nohz_stop_sched_tick(void)
                        ts->idle_tick = ts->sched_timer.expires;
                        ts->tick_stopped = 1;
                        ts->idle_jiffies = last_jiffies;
+                        rcu_enter_nohz();
                }
                /*
@@ -375,6 +376,8 @@ void tick_nohz_restart_sched_tick(void)
                return;
        }
+        rcu_exit_nohz();
        /* Update jiffies first */
        select_nohz_load_balancer(0);
        now = ktime_get();

diff --git a/kernel/audit.c b/kernel/audit.c index 2eeea9a14240..10c4930c2bbf 100644 --- a/kernel/audit.c +++ b/kernel/audit.c
@@ -170,7 +170,9 @@ void audit_panic(const char *message)
170	printk(KERN_ERR "audit: %s\n", message);	170	printk(KERN_ERR "audit: %s\n", message);
171	break;	171	break;
172	case AUDIT_FAIL_PANIC:	172	case AUDIT_FAIL_PANIC:
173	panic("audit: %s\n", message);	173	/* test audit_pid since printk is always losey, why bother? */
		174	if (audit_pid)
		175	panic("audit: %s\n", message);
174	break;	176	break;
175	}	177	}
176	}	178	}
@@ -352,6 +354,7 @@ static int kauditd_thread(void *dummy)
352	if (err < 0) {	354	if (err < 0) {
353	BUG_ON(err != -ECONNREFUSED); /* Shoudn't happen */	355	BUG_ON(err != -ECONNREFUSED); /* Shoudn't happen */
354	printk(KERN_ERR "audit: NO daemon at audit_pid=%d\n", audit_pid);	356	printk(KERN_ERR "audit: NO daemon at audit_pid=%d\n", audit_pid);
		357	audit_log_lost("auditd dissapeared\n");
355	audit_pid = 0;	358	audit_pid = 0;
356	}	359	}
357	} else {	360	} else {
@@ -1350,17 +1353,19 @@ void audit_log_end(struct audit_buffer *ab)
1350	if (!audit_rate_check()) {	1353	if (!audit_rate_check()) {
1351	audit_log_lost("rate limit exceeded");	1354	audit_log_lost("rate limit exceeded");
1352	} else {	1355	} else {
		1356	struct nlmsghdr *nlh = nlmsg_hdr(ab->skb);
1353	if (audit_pid) {	1357	if (audit_pid) {
1354	struct nlmsghdr *nlh = nlmsg_hdr(ab->skb);
1355	nlh->nlmsg_len = ab->skb->len - NLMSG_SPACE(0);	1358	nlh->nlmsg_len = ab->skb->len - NLMSG_SPACE(0);
1356	skb_queue_tail(&audit_skb_queue, ab->skb);	1359	skb_queue_tail(&audit_skb_queue, ab->skb);
1357	ab->skb = NULL;	1360	ab->skb = NULL;
1358	wake_up_interruptible(&kauditd_wait);	1361	wake_up_interruptible(&kauditd_wait);
1359	} else if (printk_ratelimit()) {	1362	} else if (nlh->nlmsg_type != AUDIT_EOE) {
1360	struct nlmsghdr *nlh = nlmsg_hdr(ab->skb);	1363	if (printk_ratelimit()) {
1361	printk(KERN_NOTICE "type=%d %s\n", nlh->nlmsg_type, ab->skb->data + NLMSG_SPACE(0));	1364	printk(KERN_NOTICE "type=%d %s\n",
1362	} else {	1365	nlh->nlmsg_type,
1363	audit_log_lost("printk limit exceeded\n");	1366	ab->skb->data + NLMSG_SPACE(0));
		1367	} else
		1368	audit_log_lost("printk limit exceeded\n");
1364	}	1369	}
1365	}	1370	}
1366	audit_buffer_free(ab);	1371	audit_buffer_free(ab);


diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 2087d6de67ea..782262e4107d 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c
@@ -1070,7 +1070,7 @@ static int audit_log_single_execve_arg(struct audit_context *context,
1070	* so we can be sure nothing was lost.	1070	* so we can be sure nothing was lost.
1071	*/	1071	*/
1072	if ((i == 0) && (too_long))	1072	if ((i == 0) && (too_long))
1073	audit_log_format(*ab, "a%d_len=%ld ", arg_num,	1073	audit_log_format(*ab, "a%d_len=%zu ", arg_num,
1074	has_cntl ? 2*len : len);	1074	has_cntl ? 2*len : len);
1075		1075
1076	/*	1076	/*


diff --git a/kernel/rcupreempt.c b/kernel/rcupreempt.c index 987cfb7ade89..e9517014b57c 100644 --- a/kernel/rcupreempt.c +++ b/kernel/rcupreempt.c
@@ -23,6 +23,10 @@
23	* to Suparna Bhattacharya for pushing me completely away	23	* to Suparna Bhattacharya for pushing me completely away
24	* from atomic instructions on the read side.	24	* from atomic instructions on the read side.
25	*	25	*
		26	* - Added handling of Dynamic Ticks
		27	* Copyright 2007 - Paul E. Mckenney <paulmck@us.ibm.com>
		28	* - Steven Rostedt <srostedt@redhat.com>
		29	*
26	* Papers: http://www.rdrop.com/users/paulmck/RCU	30	* Papers: http://www.rdrop.com/users/paulmck/RCU
27	*	31	*
28	* Design Document: http://lwn.net/Articles/253651/	32	* Design Document: http://lwn.net/Articles/253651/
@@ -409,6 +413,212 @@ static void __rcu_advance_callbacks(struct rcu_data *rdp)
409	}	413	}
410	}	414	}
411		415
		416	#ifdef CONFIG_NO_HZ
		417
		418	DEFINE_PER_CPU(long, dynticks_progress_counter) = 1;
		419	static DEFINE_PER_CPU(long, rcu_dyntick_snapshot);
		420	static DEFINE_PER_CPU(int, rcu_update_flag);
		421
		422	/**
		423	* rcu_irq_enter - Called from Hard irq handlers and NMI/SMI.
		424	*
		425	* If the CPU was idle with dynamic ticks active, this updates the
		426	* dynticks_progress_counter to let the RCU handling know that the
		427	* CPU is active.
		428	*/
		429	void rcu_irq_enter(void)
		430	{
		431	int cpu = smp_processor_id();
		432
		433	if (per_cpu(rcu_update_flag, cpu))
		434	per_cpu(rcu_update_flag, cpu)++;
		435
		436	/*
		437	* Only update if we are coming from a stopped ticks mode
		438	* (dynticks_progress_counter is even).
		439	*/
		440	if (!in_interrupt() &&
		441	(per_cpu(dynticks_progress_counter, cpu) & 0x1) == 0) {
		442	/*
		443	* The following might seem like we could have a race
		444	* with NMI/SMIs. But this really isn't a problem.
		445	* Here we do a read/modify/write, and the race happens
		446	* when an NMI/SMI comes in after the read and before
		447	* the write. But NMI/SMIs will increment this counter
		448	* twice before returning, so the zero bit will not
		449	* be corrupted by the NMI/SMI which is the most important
		450	* part.
		451	*
		452	* The only thing is that we would bring back the counter
		453	* to a postion that it was in during the NMI/SMI.
		454	* But the zero bit would be set, so the rest of the
		455	* counter would again be ignored.
		456	*
		457	* On return from the IRQ, the counter may have the zero
		458	* bit be 0 and the counter the same as the return from
		459	* the NMI/SMI. If the state machine was so unlucky to
		460	* see that, it still doesn't matter, since all
		461	* RCU read-side critical sections on this CPU would
		462	* have already completed.
		463	*/
		464	per_cpu(dynticks_progress_counter, cpu)++;
		465	/*
		466	* The following memory barrier ensures that any
		467	* rcu_read_lock() primitives in the irq handler
		468	* are seen by other CPUs to follow the above
		469	* increment to dynticks_progress_counter. This is
		470	* required in order for other CPUs to correctly
		471	* determine when it is safe to advance the RCU
		472	* grace-period state machine.
		473	*/
		474	smp_mb(); /* see above block comment. */
		475	/*
		476	* Since we can't determine the dynamic tick mode from
		477	* the dynticks_progress_counter after this routine,
		478	* we use a second flag to acknowledge that we came
		479	* from an idle state with ticks stopped.
		480	*/
		481	per_cpu(rcu_update_flag, cpu)++;
		482	/*
		483	* If we take an NMI/SMI now, they will also increment
		484	* the rcu_update_flag, and will not update the
		485	* dynticks_progress_counter on exit. That is for
		486	* this IRQ to do.
		487	*/
		488	}
		489	}
		490
		491	/**
		492	* rcu_irq_exit - Called from exiting Hard irq context.
		493	*
		494	* If the CPU was idle with dynamic ticks active, update the
		495	* dynticks_progress_counter to put let the RCU handling be
		496	* aware that the CPU is going back to idle with no ticks.
		497	*/
		498	void rcu_irq_exit(void)
		499	{
		500	int cpu = smp_processor_id();
		501
		502	/*
		503	* rcu_update_flag is set if we interrupted the CPU
		504	* when it was idle with ticks stopped.
		505	* Once this occurs, we keep track of interrupt nesting
		506	* because a NMI/SMI could also come in, and we still
		507	* only want the IRQ that started the increment of the
		508	* dynticks_progress_counter to be the one that modifies
		509	* it on exit.
		510	*/
		511	if (per_cpu(rcu_update_flag, cpu)) {
		512	if (--per_cpu(rcu_update_flag, cpu))
		513	return;
		514
		515	/* This must match the interrupt nesting */
		516	WARN_ON(in_interrupt());
		517
		518	/*
		519	* If an NMI/SMI happens now we are still
		520	* protected by the dynticks_progress_counter being odd.
		521	*/
		522
		523	/*
		524	* The following memory barrier ensures that any
		525	* rcu_read_unlock() primitives in the irq handler
		526	* are seen by other CPUs to preceed the following
		527	* increment to dynticks_progress_counter. This
		528	* is required in order for other CPUs to determine
		529	* when it is safe to advance the RCU grace-period
		530	* state machine.
		531	*/
		532	smp_mb(); /* see above block comment. */
		533	per_cpu(dynticks_progress_counter, cpu)++;
		534	WARN_ON(per_cpu(dynticks_progress_counter, cpu) & 0x1);
		535	}
		536	}
		537
		538	static void dyntick_save_progress_counter(int cpu)
		539	{
		540	per_cpu(rcu_dyntick_snapshot, cpu) =
		541	per_cpu(dynticks_progress_counter, cpu);
		542	}
		543
		544	static inline int
		545	rcu_try_flip_waitack_needed(int cpu)
		546	{
		547	long curr;
		548	long snap;
		549
		550	curr = per_cpu(dynticks_progress_counter, cpu);
		551	snap = per_cpu(rcu_dyntick_snapshot, cpu);
		552	smp_mb(); /* force ordering with cpu entering/leaving dynticks. */
		553
		554	/*
		555	* If the CPU remained in dynticks mode for the entire time
		556	* and didn't take any interrupts, NMIs, SMIs, or whatever,
		557	* then it cannot be in the middle of an rcu_read_lock(), so
		558	* the next rcu_read_lock() it executes must use the new value
		559	* of the counter. So we can safely pretend that this CPU
		560	* already acknowledged the counter.
		561	*/
		562
		563	if ((curr == snap) && ((curr & 0x1) == 0))
		564	return 0;
		565
		566	/*
		567	* If the CPU passed through or entered a dynticks idle phase with
		568	* no active irq handlers, then, as above, we can safely pretend
		569	* that this CPU already acknowledged the counter.
		570	*/
		571
		572	if ((curr - snap) > 2 \|\| (snap & 0x1) == 0)
		573	return 0;
		574
		575	/* We need this CPU to explicitly acknowledge the counter flip. */
		576
		577	return 1;
		578	}
		579
		580	static inline int
		581	rcu_try_flip_waitmb_needed(int cpu)
		582	{
		583	long curr;
		584	long snap;
		585
		586	curr = per_cpu(dynticks_progress_counter, cpu);
		587	snap = per_cpu(rcu_dyntick_snapshot, cpu);
		588	smp_mb(); /* force ordering with cpu entering/leaving dynticks. */
		589
		590	/*
		591	* If the CPU remained in dynticks mode for the entire time
		592	* and didn't take any interrupts, NMIs, SMIs, or whatever,
		593	* then it cannot have executed an RCU read-side critical section
		594	* during that time, so there is no need for it to execute a
		595	* memory barrier.
		596	*/
		597
		598	if ((curr == snap) && ((curr & 0x1) == 0))
		599	return 0;
		600
		601	/*
		602	* If the CPU either entered or exited an outermost interrupt,
		603	* SMI, NMI, or whatever handler, then we know that it executed
		604	* a memory barrier when doing so. So we don't need another one.
		605	*/
		606	if (curr != snap)
		607	return 0;
		608
		609	/* We need the CPU to execute a memory barrier. */
		610
		611	return 1;
		612	}
		613
		614	#else /* !CONFIG_NO_HZ */
		615
		616	# define dyntick_save_progress_counter(cpu) do { } while (0)
		617	# define rcu_try_flip_waitack_needed(cpu) (1)
		618	# define rcu_try_flip_waitmb_needed(cpu) (1)
		619
		620	#endif /* CONFIG_NO_HZ */
		621
412	/*	622	/*
413	* Get here when RCU is idle. Decide whether we need to	623	* Get here when RCU is idle. Decide whether we need to
414	* move out of idle state, and return non-zero if so.	624	* move out of idle state, and return non-zero if so.
@@ -447,8 +657,10 @@ rcu_try_flip_idle(void)
447		657
448	/* Now ask each CPU for acknowledgement of the flip. */	658	/* Now ask each CPU for acknowledgement of the flip. */
449		659
450	for_each_cpu_mask(cpu, rcu_cpu_online_map)	660	for_each_cpu_mask(cpu, rcu_cpu_online_map) {
451	per_cpu(rcu_flip_flag, cpu) = rcu_flipped;	661	per_cpu(rcu_flip_flag, cpu) = rcu_flipped;
		662	dyntick_save_progress_counter(cpu);
		663	}
452		664
453	return 1;	665	return 1;
454	}	666	}
@@ -464,7 +676,8 @@ rcu_try_flip_waitack(void)
464		676
465	RCU_TRACE_ME(rcupreempt_trace_try_flip_a1);	677	RCU_TRACE_ME(rcupreempt_trace_try_flip_a1);
466	for_each_cpu_mask(cpu, rcu_cpu_online_map)	678	for_each_cpu_mask(cpu, rcu_cpu_online_map)
467	if (per_cpu(rcu_flip_flag, cpu) != rcu_flip_seen) {	679	if (rcu_try_flip_waitack_needed(cpu) &&
		680	per_cpu(rcu_flip_flag, cpu) != rcu_flip_seen) {
468	RCU_TRACE_ME(rcupreempt_trace_try_flip_ae1);	681	RCU_TRACE_ME(rcupreempt_trace_try_flip_ae1);
469	return 0;	682	return 0;
470	}	683	}
@@ -509,8 +722,10 @@ rcu_try_flip_waitzero(void)
509	smp_mb(); /* ^^^^^^^^^^^^ */	722	smp_mb(); /* ^^^^^^^^^^^^ */
510		723
511	/* Call for a memory barrier from each CPU. */	724	/* Call for a memory barrier from each CPU. */
512	for_each_cpu_mask(cpu, rcu_cpu_online_map)	725	for_each_cpu_mask(cpu, rcu_cpu_online_map) {
513	per_cpu(rcu_mb_flag, cpu) = rcu_mb_needed;	726	per_cpu(rcu_mb_flag, cpu) = rcu_mb_needed;
		727	dyntick_save_progress_counter(cpu);
		728	}
514		729
515	RCU_TRACE_ME(rcupreempt_trace_try_flip_z2);	730	RCU_TRACE_ME(rcupreempt_trace_try_flip_z2);
516	return 1;	731	return 1;
@@ -528,7 +743,8 @@ rcu_try_flip_waitmb(void)
528		743
529	RCU_TRACE_ME(rcupreempt_trace_try_flip_m1);	744	RCU_TRACE_ME(rcupreempt_trace_try_flip_m1);
530	for_each_cpu_mask(cpu, rcu_cpu_online_map)	745	for_each_cpu_mask(cpu, rcu_cpu_online_map)
531	if (per_cpu(rcu_mb_flag, cpu) != rcu_mb_done) {	746	if (rcu_try_flip_waitmb_needed(cpu) &&
		747	per_cpu(rcu_mb_flag, cpu) != rcu_mb_done) {
532	RCU_TRACE_ME(rcupreempt_trace_try_flip_me1);	748	RCU_TRACE_ME(rcupreempt_trace_try_flip_me1);
533	return 0;	749	return 0;
534	}	750	}
@@ -702,8 +918,9 @@ void rcu_offline_cpu(int cpu)
702	* fix.	918	* fix.
703	*/	919	*/
704		920
		921	local_irq_save(flags);
705	rdp = RCU_DATA_ME();	922	rdp = RCU_DATA_ME();
706	spin_lock_irqsave(&rdp->lock, flags);	923	spin_lock(&rdp->lock);
707	*rdp->nexttail = list;	924	*rdp->nexttail = list;
708	if (list)	925	if (list)
709	rdp->nexttail = tail;	926	rdp->nexttail = tail;
@@ -735,9 +952,11 @@ static void rcu_process_callbacks(struct softirq_action *unused)
735	{	952	{
736	unsigned long flags;	953	unsigned long flags;
737	struct rcu_head next, list;	954	struct rcu_head next, list;
738	struct rcu_data *rdp = RCU_DATA_ME();	955	struct rcu_data *rdp;
739		956
740	spin_lock_irqsave(&rdp->lock, flags);	957	local_irq_save(flags);
		958	rdp = RCU_DATA_ME();
		959	spin_lock(&rdp->lock);
741	list = rdp->donelist;	960	list = rdp->donelist;
742	if (list == NULL) {	961	if (list == NULL) {
743	spin_unlock_irqrestore(&rdp->lock, flags);	962	spin_unlock_irqrestore(&rdp->lock, flags);


diff --git a/kernel/softirq.c b/kernel/softirq.c index 5b3aea5f471e..31e9f2a47928 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c
@@ -313,6 +313,7 @@ void irq_exit(void)
313	/* Make sure that timer wheel updates are propagated */	313	/* Make sure that timer wheel updates are propagated */
314	if (!in_interrupt() && idle_cpu(smp_processor_id()) && !need_resched())	314	if (!in_interrupt() && idle_cpu(smp_processor_id()) && !need_resched())
315	tick_nohz_stop_sched_tick();	315	tick_nohz_stop_sched_tick();
		316	rcu_irq_exit();
316	#endif	317	#endif
317	preempt_enable_no_resched();	318	preempt_enable_no_resched();
318	}	319	}


diff --git a/kernel/softlockup.c b/kernel/softlockup.c index 7c2da88db4ed..01b6522fd92b 100644 --- a/kernel/softlockup.c +++ b/kernel/softlockup.c
@@ -216,26 +216,27 @@ static int watchdog(void *__bind_cpu)
216	/* initialize timestamp */	216	/* initialize timestamp */
217	touch_softlockup_watchdog();	217	touch_softlockup_watchdog();
218		218
		219	set_current_state(TASK_INTERRUPTIBLE);
219	/*	220	/*
220	* Run briefly once per second to reset the softlockup timestamp.	221	* Run briefly once per second to reset the softlockup timestamp.
221	* If this gets delayed for more than 60 seconds then the	222	* If this gets delayed for more than 60 seconds then the
222	* debug-printout triggers in softlockup_tick().	223	* debug-printout triggers in softlockup_tick().
223	*/	224	*/
224	while (!kthread_should_stop()) {	225	while (!kthread_should_stop()) {
225	set_current_state(TASK_INTERRUPTIBLE);
226	touch_softlockup_watchdog();	226	touch_softlockup_watchdog();
227	schedule();	227	schedule();
228		228
229	if (kthread_should_stop())	229	if (kthread_should_stop())
230	break;	230	break;
231		231
232	if (this_cpu != check_cpu)	232	if (this_cpu == check_cpu) {
233	continue;	233	if (sysctl_hung_task_timeout_secs)
234		234	check_hung_uninterruptible_tasks(this_cpu);
235	if (sysctl_hung_task_timeout_secs)	235	}
236	check_hung_uninterruptible_tasks(this_cpu);
237		236
		237	set_current_state(TASK_INTERRUPTIBLE);
238	}	238	}
		239	__set_current_state(TASK_RUNNING);
239		240
240	return 0;	241	return 0;
241	}	242	}


diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index fa9bb73dbdb4..2968298f8f36 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c
@@ -282,6 +282,7 @@ void tick_nohz_stop_sched_tick(void)
282	ts->idle_tick = ts->sched_timer.expires;	282	ts->idle_tick = ts->sched_timer.expires;
283	ts->tick_stopped = 1;	283	ts->tick_stopped = 1;
284	ts->idle_jiffies = last_jiffies;	284	ts->idle_jiffies = last_jiffies;
		285	rcu_enter_nohz();
285	}	286	}
286		287
287	/*	288	/*
@@ -375,6 +376,8 @@ void tick_nohz_restart_sched_tick(void)
375	return;	376	return;
376	}	377	}
377		378
		379	rcu_exit_nohz();
		380
378	/* Update jiffies first */	381	/* Update jiffies first */
379	select_nohz_load_balancer(0);	382	select_nohz_load_balancer(0);
380	now = ktime_get();	383	now = ktime_get();