diff options
Diffstat (limited to 'kernel/rcutree.c')
-rw-r--r-- | kernel/rcutree.c | 156 |
1 files changed, 75 insertions, 81 deletions
diff --git a/kernel/rcutree.c b/kernel/rcutree.c index ccdc04c47981..d0ddfea6579d 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c | |||
@@ -67,9 +67,6 @@ static struct lock_class_key rcu_node_class[NUM_RCU_LVLS]; | |||
67 | .gpnum = -300, \ | 67 | .gpnum = -300, \ |
68 | .completed = -300, \ | 68 | .completed = -300, \ |
69 | .onofflock = __RAW_SPIN_LOCK_UNLOCKED(&structname.onofflock), \ | 69 | .onofflock = __RAW_SPIN_LOCK_UNLOCKED(&structname.onofflock), \ |
70 | .orphan_cbs_list = NULL, \ | ||
71 | .orphan_cbs_tail = &structname.orphan_cbs_list, \ | ||
72 | .orphan_qlen = 0, \ | ||
73 | .fqslock = __RAW_SPIN_LOCK_UNLOCKED(&structname.fqslock), \ | 70 | .fqslock = __RAW_SPIN_LOCK_UNLOCKED(&structname.fqslock), \ |
74 | .n_force_qs = 0, \ | 71 | .n_force_qs = 0, \ |
75 | .n_force_qs_ngp = 0, \ | 72 | .n_force_qs_ngp = 0, \ |
@@ -620,9 +617,17 @@ static void __init check_cpu_stall_init(void) | |||
620 | static void __note_new_gpnum(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp) | 617 | static void __note_new_gpnum(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp) |
621 | { | 618 | { |
622 | if (rdp->gpnum != rnp->gpnum) { | 619 | if (rdp->gpnum != rnp->gpnum) { |
623 | rdp->qs_pending = 1; | 620 | /* |
624 | rdp->passed_quiesc = 0; | 621 | * If the current grace period is waiting for this CPU, |
622 | * set up to detect a quiescent state, otherwise don't | ||
623 | * go looking for one. | ||
624 | */ | ||
625 | rdp->gpnum = rnp->gpnum; | 625 | rdp->gpnum = rnp->gpnum; |
626 | if (rnp->qsmask & rdp->grpmask) { | ||
627 | rdp->qs_pending = 1; | ||
628 | rdp->passed_quiesc = 0; | ||
629 | } else | ||
630 | rdp->qs_pending = 0; | ||
626 | } | 631 | } |
627 | } | 632 | } |
628 | 633 | ||
@@ -681,6 +686,24 @@ __rcu_process_gp_end(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_dat | |||
681 | 686 | ||
682 | /* Remember that we saw this grace-period completion. */ | 687 | /* Remember that we saw this grace-period completion. */ |
683 | rdp->completed = rnp->completed; | 688 | rdp->completed = rnp->completed; |
689 | |||
690 | /* | ||
691 | * If we were in an extended quiescent state, we may have | ||
692 | * missed some grace periods that others CPUs handled on | ||
693 | * our behalf. Catch up with this state to avoid noting | ||
694 | * spurious new grace periods. If another grace period | ||
695 | * has started, then rnp->gpnum will have advanced, so | ||
696 | * we will detect this later on. | ||
697 | */ | ||
698 | if (ULONG_CMP_LT(rdp->gpnum, rdp->completed)) | ||
699 | rdp->gpnum = rdp->completed; | ||
700 | |||
701 | /* | ||
702 | * If RCU does not need a quiescent state from this CPU, | ||
703 | * then make sure that this CPU doesn't go looking for one. | ||
704 | */ | ||
705 | if ((rnp->qsmask & rdp->grpmask) == 0) | ||
706 | rdp->qs_pending = 0; | ||
684 | } | 707 | } |
685 | } | 708 | } |
686 | 709 | ||
@@ -984,53 +1007,31 @@ rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp) | |||
984 | #ifdef CONFIG_HOTPLUG_CPU | 1007 | #ifdef CONFIG_HOTPLUG_CPU |
985 | 1008 | ||
986 | /* | 1009 | /* |
987 | * Move a dying CPU's RCU callbacks to the ->orphan_cbs_list for the | 1010 | * Move a dying CPU's RCU callbacks to online CPU's callback list. |
988 | * specified flavor of RCU. The callbacks will be adopted by the next | 1011 | * Synchronization is not required because this function executes |
989 | * _rcu_barrier() invocation or by the CPU_DEAD notifier, whichever | 1012 | * in stop_machine() context. |
990 | * comes first. Because this is invoked from the CPU_DYING notifier, | ||
991 | * irqs are already disabled. | ||
992 | */ | 1013 | */ |
993 | static void rcu_send_cbs_to_orphanage(struct rcu_state *rsp) | 1014 | static void rcu_send_cbs_to_online(struct rcu_state *rsp) |
994 | { | 1015 | { |
995 | int i; | 1016 | int i; |
1017 | /* current DYING CPU is cleared in the cpu_online_mask */ | ||
1018 | int receive_cpu = cpumask_any(cpu_online_mask); | ||
996 | struct rcu_data *rdp = this_cpu_ptr(rsp->rda); | 1019 | struct rcu_data *rdp = this_cpu_ptr(rsp->rda); |
1020 | struct rcu_data *receive_rdp = per_cpu_ptr(rsp->rda, receive_cpu); | ||
997 | 1021 | ||
998 | if (rdp->nxtlist == NULL) | 1022 | if (rdp->nxtlist == NULL) |
999 | return; /* irqs disabled, so comparison is stable. */ | 1023 | return; /* irqs disabled, so comparison is stable. */ |
1000 | raw_spin_lock(&rsp->onofflock); /* irqs already disabled. */ | 1024 | |
1001 | *rsp->orphan_cbs_tail = rdp->nxtlist; | 1025 | *receive_rdp->nxttail[RCU_NEXT_TAIL] = rdp->nxtlist; |
1002 | rsp->orphan_cbs_tail = rdp->nxttail[RCU_NEXT_TAIL]; | 1026 | receive_rdp->nxttail[RCU_NEXT_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; |
1027 | receive_rdp->qlen += rdp->qlen; | ||
1028 | receive_rdp->n_cbs_adopted += rdp->qlen; | ||
1029 | rdp->n_cbs_orphaned += rdp->qlen; | ||
1030 | |||
1003 | rdp->nxtlist = NULL; | 1031 | rdp->nxtlist = NULL; |
1004 | for (i = 0; i < RCU_NEXT_SIZE; i++) | 1032 | for (i = 0; i < RCU_NEXT_SIZE; i++) |
1005 | rdp->nxttail[i] = &rdp->nxtlist; | 1033 | rdp->nxttail[i] = &rdp->nxtlist; |
1006 | rsp->orphan_qlen += rdp->qlen; | ||
1007 | rdp->n_cbs_orphaned += rdp->qlen; | ||
1008 | rdp->qlen = 0; | 1034 | rdp->qlen = 0; |
1009 | raw_spin_unlock(&rsp->onofflock); /* irqs remain disabled. */ | ||
1010 | } | ||
1011 | |||
1012 | /* | ||
1013 | * Adopt previously orphaned RCU callbacks. | ||
1014 | */ | ||
1015 | static void rcu_adopt_orphan_cbs(struct rcu_state *rsp) | ||
1016 | { | ||
1017 | unsigned long flags; | ||
1018 | struct rcu_data *rdp; | ||
1019 | |||
1020 | raw_spin_lock_irqsave(&rsp->onofflock, flags); | ||
1021 | rdp = this_cpu_ptr(rsp->rda); | ||
1022 | if (rsp->orphan_cbs_list == NULL) { | ||
1023 | raw_spin_unlock_irqrestore(&rsp->onofflock, flags); | ||
1024 | return; | ||
1025 | } | ||
1026 | *rdp->nxttail[RCU_NEXT_TAIL] = rsp->orphan_cbs_list; | ||
1027 | rdp->nxttail[RCU_NEXT_TAIL] = rsp->orphan_cbs_tail; | ||
1028 | rdp->qlen += rsp->orphan_qlen; | ||
1029 | rdp->n_cbs_adopted += rsp->orphan_qlen; | ||
1030 | rsp->orphan_cbs_list = NULL; | ||
1031 | rsp->orphan_cbs_tail = &rsp->orphan_cbs_list; | ||
1032 | rsp->orphan_qlen = 0; | ||
1033 | raw_spin_unlock_irqrestore(&rsp->onofflock, flags); | ||
1034 | } | 1035 | } |
1035 | 1036 | ||
1036 | /* | 1037 | /* |
@@ -1081,8 +1082,6 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp) | |||
1081 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 1082 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
1082 | if (need_report & RCU_OFL_TASKS_EXP_GP) | 1083 | if (need_report & RCU_OFL_TASKS_EXP_GP) |
1083 | rcu_report_exp_rnp(rsp, rnp); | 1084 | rcu_report_exp_rnp(rsp, rnp); |
1084 | |||
1085 | rcu_adopt_orphan_cbs(rsp); | ||
1086 | } | 1085 | } |
1087 | 1086 | ||
1088 | /* | 1087 | /* |
@@ -1100,11 +1099,7 @@ static void rcu_offline_cpu(int cpu) | |||
1100 | 1099 | ||
1101 | #else /* #ifdef CONFIG_HOTPLUG_CPU */ | 1100 | #else /* #ifdef CONFIG_HOTPLUG_CPU */ |
1102 | 1101 | ||
1103 | static void rcu_send_cbs_to_orphanage(struct rcu_state *rsp) | 1102 | static void rcu_send_cbs_to_online(struct rcu_state *rsp) |
1104 | { | ||
1105 | } | ||
1106 | |||
1107 | static void rcu_adopt_orphan_cbs(struct rcu_state *rsp) | ||
1108 | { | 1103 | { |
1109 | } | 1104 | } |
1110 | 1105 | ||
@@ -1440,22 +1435,11 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), | |||
1440 | */ | 1435 | */ |
1441 | local_irq_save(flags); | 1436 | local_irq_save(flags); |
1442 | rdp = this_cpu_ptr(rsp->rda); | 1437 | rdp = this_cpu_ptr(rsp->rda); |
1443 | rcu_process_gp_end(rsp, rdp); | ||
1444 | check_for_new_grace_period(rsp, rdp); | ||
1445 | 1438 | ||
1446 | /* Add the callback to our list. */ | 1439 | /* Add the callback to our list. */ |
1447 | *rdp->nxttail[RCU_NEXT_TAIL] = head; | 1440 | *rdp->nxttail[RCU_NEXT_TAIL] = head; |
1448 | rdp->nxttail[RCU_NEXT_TAIL] = &head->next; | 1441 | rdp->nxttail[RCU_NEXT_TAIL] = &head->next; |
1449 | 1442 | ||
1450 | /* Start a new grace period if one not already started. */ | ||
1451 | if (!rcu_gp_in_progress(rsp)) { | ||
1452 | unsigned long nestflag; | ||
1453 | struct rcu_node *rnp_root = rcu_get_root(rsp); | ||
1454 | |||
1455 | raw_spin_lock_irqsave(&rnp_root->lock, nestflag); | ||
1456 | rcu_start_gp(rsp, nestflag); /* releases rnp_root->lock. */ | ||
1457 | } | ||
1458 | |||
1459 | /* | 1443 | /* |
1460 | * Force the grace period if too many callbacks or too long waiting. | 1444 | * Force the grace period if too many callbacks or too long waiting. |
1461 | * Enforce hysteresis, and don't invoke force_quiescent_state() | 1445 | * Enforce hysteresis, and don't invoke force_quiescent_state() |
@@ -1464,12 +1448,27 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), | |||
1464 | * is the only one waiting for a grace period to complete. | 1448 | * is the only one waiting for a grace period to complete. |
1465 | */ | 1449 | */ |
1466 | if (unlikely(++rdp->qlen > rdp->qlen_last_fqs_check + qhimark)) { | 1450 | if (unlikely(++rdp->qlen > rdp->qlen_last_fqs_check + qhimark)) { |
1467 | rdp->blimit = LONG_MAX; | 1451 | |
1468 | if (rsp->n_force_qs == rdp->n_force_qs_snap && | 1452 | /* Are we ignoring a completed grace period? */ |
1469 | *rdp->nxttail[RCU_DONE_TAIL] != head) | 1453 | rcu_process_gp_end(rsp, rdp); |
1470 | force_quiescent_state(rsp, 0); | 1454 | check_for_new_grace_period(rsp, rdp); |
1471 | rdp->n_force_qs_snap = rsp->n_force_qs; | 1455 | |
1472 | rdp->qlen_last_fqs_check = rdp->qlen; | 1456 | /* Start a new grace period if one not already started. */ |
1457 | if (!rcu_gp_in_progress(rsp)) { | ||
1458 | unsigned long nestflag; | ||
1459 | struct rcu_node *rnp_root = rcu_get_root(rsp); | ||
1460 | |||
1461 | raw_spin_lock_irqsave(&rnp_root->lock, nestflag); | ||
1462 | rcu_start_gp(rsp, nestflag); /* rlses rnp_root->lock */ | ||
1463 | } else { | ||
1464 | /* Give the grace period a kick. */ | ||
1465 | rdp->blimit = LONG_MAX; | ||
1466 | if (rsp->n_force_qs == rdp->n_force_qs_snap && | ||
1467 | *rdp->nxttail[RCU_DONE_TAIL] != head) | ||
1468 | force_quiescent_state(rsp, 0); | ||
1469 | rdp->n_force_qs_snap = rsp->n_force_qs; | ||
1470 | rdp->qlen_last_fqs_check = rdp->qlen; | ||
1471 | } | ||
1473 | } else if (ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs), jiffies)) | 1472 | } else if (ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs), jiffies)) |
1474 | force_quiescent_state(rsp, 1); | 1473 | force_quiescent_state(rsp, 1); |
1475 | local_irq_restore(flags); | 1474 | local_irq_restore(flags); |
@@ -1699,13 +1698,12 @@ static void _rcu_barrier(struct rcu_state *rsp, | |||
1699 | * decrement rcu_barrier_cpu_count -- otherwise the first CPU | 1698 | * decrement rcu_barrier_cpu_count -- otherwise the first CPU |
1700 | * might complete its grace period before all of the other CPUs | 1699 | * might complete its grace period before all of the other CPUs |
1701 | * did their increment, causing this function to return too | 1700 | * did their increment, causing this function to return too |
1702 | * early. | 1701 | * early. Note that on_each_cpu() disables irqs, which prevents |
1702 | * any CPUs from coming online or going offline until each online | ||
1703 | * CPU has queued its RCU-barrier callback. | ||
1703 | */ | 1704 | */ |
1704 | atomic_set(&rcu_barrier_cpu_count, 1); | 1705 | atomic_set(&rcu_barrier_cpu_count, 1); |
1705 | preempt_disable(); /* stop CPU_DYING from filling orphan_cbs_list */ | ||
1706 | rcu_adopt_orphan_cbs(rsp); | ||
1707 | on_each_cpu(rcu_barrier_func, (void *)call_rcu_func, 1); | 1706 | on_each_cpu(rcu_barrier_func, (void *)call_rcu_func, 1); |
1708 | preempt_enable(); /* CPU_DYING can again fill orphan_cbs_list */ | ||
1709 | if (atomic_dec_and_test(&rcu_barrier_cpu_count)) | 1707 | if (atomic_dec_and_test(&rcu_barrier_cpu_count)) |
1710 | complete(&rcu_barrier_completion); | 1708 | complete(&rcu_barrier_completion); |
1711 | wait_for_completion(&rcu_barrier_completion); | 1709 | wait_for_completion(&rcu_barrier_completion); |
@@ -1831,18 +1829,13 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self, | |||
1831 | case CPU_DYING: | 1829 | case CPU_DYING: |
1832 | case CPU_DYING_FROZEN: | 1830 | case CPU_DYING_FROZEN: |
1833 | /* | 1831 | /* |
1834 | * preempt_disable() in _rcu_barrier() prevents stop_machine(), | 1832 | * The whole machine is "stopped" except this CPU, so we can |
1835 | * so when "on_each_cpu(rcu_barrier_func, (void *)type, 1);" | 1833 | * touch any data without introducing corruption. We send the |
1836 | * returns, all online cpus have queued rcu_barrier_func(). | 1834 | * dying CPU's callbacks to an arbitrarily chosen online CPU. |
1837 | * The dying CPU clears its cpu_online_mask bit and | ||
1838 | * moves all of its RCU callbacks to ->orphan_cbs_list | ||
1839 | * in the context of stop_machine(), so subsequent calls | ||
1840 | * to _rcu_barrier() will adopt these callbacks and only | ||
1841 | * then queue rcu_barrier_func() on all remaining CPUs. | ||
1842 | */ | 1835 | */ |
1843 | rcu_send_cbs_to_orphanage(&rcu_bh_state); | 1836 | rcu_send_cbs_to_online(&rcu_bh_state); |
1844 | rcu_send_cbs_to_orphanage(&rcu_sched_state); | 1837 | rcu_send_cbs_to_online(&rcu_sched_state); |
1845 | rcu_preempt_send_cbs_to_orphanage(); | 1838 | rcu_preempt_send_cbs_to_online(); |
1846 | break; | 1839 | break; |
1847 | case CPU_DEAD: | 1840 | case CPU_DEAD: |
1848 | case CPU_DEAD_FROZEN: | 1841 | case CPU_DEAD_FROZEN: |
@@ -1880,8 +1873,9 @@ static void __init rcu_init_levelspread(struct rcu_state *rsp) | |||
1880 | { | 1873 | { |
1881 | int i; | 1874 | int i; |
1882 | 1875 | ||
1883 | for (i = NUM_RCU_LVLS - 1; i >= 0; i--) | 1876 | for (i = NUM_RCU_LVLS - 1; i > 0; i--) |
1884 | rsp->levelspread[i] = CONFIG_RCU_FANOUT; | 1877 | rsp->levelspread[i] = CONFIG_RCU_FANOUT; |
1878 | rsp->levelspread[0] = RCU_FANOUT_LEAF; | ||
1885 | } | 1879 | } |
1886 | #else /* #ifdef CONFIG_RCU_FANOUT_EXACT */ | 1880 | #else /* #ifdef CONFIG_RCU_FANOUT_EXACT */ |
1887 | static void __init rcu_init_levelspread(struct rcu_state *rsp) | 1881 | static void __init rcu_init_levelspread(struct rcu_state *rsp) |