diff options
Diffstat (limited to 'kernel/rcutree.c')
| -rw-r--r-- | kernel/rcutree.c | 160 |
1 files changed, 77 insertions, 83 deletions
diff --git a/kernel/rcutree.c b/kernel/rcutree.c index ccdc04c47981..dd4aea806f8e 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c | |||
| @@ -67,9 +67,6 @@ static struct lock_class_key rcu_node_class[NUM_RCU_LVLS]; | |||
| 67 | .gpnum = -300, \ | 67 | .gpnum = -300, \ |
| 68 | .completed = -300, \ | 68 | .completed = -300, \ |
| 69 | .onofflock = __RAW_SPIN_LOCK_UNLOCKED(&structname.onofflock), \ | 69 | .onofflock = __RAW_SPIN_LOCK_UNLOCKED(&structname.onofflock), \ |
| 70 | .orphan_cbs_list = NULL, \ | ||
| 71 | .orphan_cbs_tail = &structname.orphan_cbs_list, \ | ||
| 72 | .orphan_qlen = 0, \ | ||
| 73 | .fqslock = __RAW_SPIN_LOCK_UNLOCKED(&structname.fqslock), \ | 70 | .fqslock = __RAW_SPIN_LOCK_UNLOCKED(&structname.fqslock), \ |
| 74 | .n_force_qs = 0, \ | 71 | .n_force_qs = 0, \ |
| 75 | .n_force_qs_ngp = 0, \ | 72 | .n_force_qs_ngp = 0, \ |
| @@ -367,8 +364,8 @@ void rcu_irq_exit(void) | |||
| 367 | WARN_ON_ONCE(rdtp->dynticks & 0x1); | 364 | WARN_ON_ONCE(rdtp->dynticks & 0x1); |
| 368 | 365 | ||
| 369 | /* If the interrupt queued a callback, get out of dyntick mode. */ | 366 | /* If the interrupt queued a callback, get out of dyntick mode. */ |
| 370 | if (__get_cpu_var(rcu_sched_data).nxtlist || | 367 | if (__this_cpu_read(rcu_sched_data.nxtlist) || |
| 371 | __get_cpu_var(rcu_bh_data).nxtlist) | 368 | __this_cpu_read(rcu_bh_data.nxtlist)) |
| 372 | set_need_resched(); | 369 | set_need_resched(); |
| 373 | } | 370 | } |
| 374 | 371 | ||
| @@ -620,9 +617,17 @@ static void __init check_cpu_stall_init(void) | |||
| 620 | static void __note_new_gpnum(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp) | 617 | static void __note_new_gpnum(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp) |
| 621 | { | 618 | { |
| 622 | if (rdp->gpnum != rnp->gpnum) { | 619 | if (rdp->gpnum != rnp->gpnum) { |
| 623 | rdp->qs_pending = 1; | 620 | /* |
| 624 | rdp->passed_quiesc = 0; | 621 | * If the current grace period is waiting for this CPU, |
| 622 | * set up to detect a quiescent state, otherwise don't | ||
| 623 | * go looking for one. | ||
| 624 | */ | ||
| 625 | rdp->gpnum = rnp->gpnum; | 625 | rdp->gpnum = rnp->gpnum; |
| 626 | if (rnp->qsmask & rdp->grpmask) { | ||
| 627 | rdp->qs_pending = 1; | ||
| 628 | rdp->passed_quiesc = 0; | ||
| 629 | } else | ||
| 630 | rdp->qs_pending = 0; | ||
| 626 | } | 631 | } |
| 627 | } | 632 | } |
| 628 | 633 | ||
| @@ -681,6 +686,24 @@ __rcu_process_gp_end(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_dat | |||
| 681 | 686 | ||
| 682 | /* Remember that we saw this grace-period completion. */ | 687 | /* Remember that we saw this grace-period completion. */ |
| 683 | rdp->completed = rnp->completed; | 688 | rdp->completed = rnp->completed; |
| 689 | |||
| 690 | /* | ||
| 691 | * If we were in an extended quiescent state, we may have | ||
| 692 | * missed some grace periods that others CPUs handled on | ||
| 693 | * our behalf. Catch up with this state to avoid noting | ||
| 694 | * spurious new grace periods. If another grace period | ||
| 695 | * has started, then rnp->gpnum will have advanced, so | ||
| 696 | * we will detect this later on. | ||
| 697 | */ | ||
| 698 | if (ULONG_CMP_LT(rdp->gpnum, rdp->completed)) | ||
| 699 | rdp->gpnum = rdp->completed; | ||
| 700 | |||
| 701 | /* | ||
| 702 | * If RCU does not need a quiescent state from this CPU, | ||
| 703 | * then make sure that this CPU doesn't go looking for one. | ||
| 704 | */ | ||
| 705 | if ((rnp->qsmask & rdp->grpmask) == 0) | ||
| 706 | rdp->qs_pending = 0; | ||
| 684 | } | 707 | } |
| 685 | } | 708 | } |
| 686 | 709 | ||
| @@ -984,53 +1007,31 @@ rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp) | |||
| 984 | #ifdef CONFIG_HOTPLUG_CPU | 1007 | #ifdef CONFIG_HOTPLUG_CPU |
| 985 | 1008 | ||
| 986 | /* | 1009 | /* |
| 987 | * Move a dying CPU's RCU callbacks to the ->orphan_cbs_list for the | 1010 | * Move a dying CPU's RCU callbacks to online CPU's callback list. |
| 988 | * specified flavor of RCU. The callbacks will be adopted by the next | 1011 | * Synchronization is not required because this function executes |
| 989 | * _rcu_barrier() invocation or by the CPU_DEAD notifier, whichever | 1012 | * in stop_machine() context. |
| 990 | * comes first. Because this is invoked from the CPU_DYING notifier, | ||
| 991 | * irqs are already disabled. | ||
| 992 | */ | 1013 | */ |
| 993 | static void rcu_send_cbs_to_orphanage(struct rcu_state *rsp) | 1014 | static void rcu_send_cbs_to_online(struct rcu_state *rsp) |
| 994 | { | 1015 | { |
| 995 | int i; | 1016 | int i; |
| 1017 | /* current DYING CPU is cleared in the cpu_online_mask */ | ||
| 1018 | int receive_cpu = cpumask_any(cpu_online_mask); | ||
| 996 | struct rcu_data *rdp = this_cpu_ptr(rsp->rda); | 1019 | struct rcu_data *rdp = this_cpu_ptr(rsp->rda); |
| 1020 | struct rcu_data *receive_rdp = per_cpu_ptr(rsp->rda, receive_cpu); | ||
| 997 | 1021 | ||
| 998 | if (rdp->nxtlist == NULL) | 1022 | if (rdp->nxtlist == NULL) |
| 999 | return; /* irqs disabled, so comparison is stable. */ | 1023 | return; /* irqs disabled, so comparison is stable. */ |
| 1000 | raw_spin_lock(&rsp->onofflock); /* irqs already disabled. */ | 1024 | |
| 1001 | *rsp->orphan_cbs_tail = rdp->nxtlist; | 1025 | *receive_rdp->nxttail[RCU_NEXT_TAIL] = rdp->nxtlist; |
| 1002 | rsp->orphan_cbs_tail = rdp->nxttail[RCU_NEXT_TAIL]; | 1026 | receive_rdp->nxttail[RCU_NEXT_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; |
| 1027 | receive_rdp->qlen += rdp->qlen; | ||
| 1028 | receive_rdp->n_cbs_adopted += rdp->qlen; | ||
| 1029 | rdp->n_cbs_orphaned += rdp->qlen; | ||
| 1030 | |||
| 1003 | rdp->nxtlist = NULL; | 1031 | rdp->nxtlist = NULL; |
| 1004 | for (i = 0; i < RCU_NEXT_SIZE; i++) | 1032 | for (i = 0; i < RCU_NEXT_SIZE; i++) |
| 1005 | rdp->nxttail[i] = &rdp->nxtlist; | 1033 | rdp->nxttail[i] = &rdp->nxtlist; |
| 1006 | rsp->orphan_qlen += rdp->qlen; | ||
| 1007 | rdp->n_cbs_orphaned += rdp->qlen; | ||
| 1008 | rdp->qlen = 0; | 1034 | rdp->qlen = 0; |
| 1009 | raw_spin_unlock(&rsp->onofflock); /* irqs remain disabled. */ | ||
| 1010 | } | ||
| 1011 | |||
| 1012 | /* | ||
| 1013 | * Adopt previously orphaned RCU callbacks. | ||
| 1014 | */ | ||
| 1015 | static void rcu_adopt_orphan_cbs(struct rcu_state *rsp) | ||
| 1016 | { | ||
| 1017 | unsigned long flags; | ||
| 1018 | struct rcu_data *rdp; | ||
| 1019 | |||
| 1020 | raw_spin_lock_irqsave(&rsp->onofflock, flags); | ||
| 1021 | rdp = this_cpu_ptr(rsp->rda); | ||
| 1022 | if (rsp->orphan_cbs_list == NULL) { | ||
| 1023 | raw_spin_unlock_irqrestore(&rsp->onofflock, flags); | ||
| 1024 | return; | ||
| 1025 | } | ||
| 1026 | *rdp->nxttail[RCU_NEXT_TAIL] = rsp->orphan_cbs_list; | ||
| 1027 | rdp->nxttail[RCU_NEXT_TAIL] = rsp->orphan_cbs_tail; | ||
| 1028 | rdp->qlen += rsp->orphan_qlen; | ||
| 1029 | rdp->n_cbs_adopted += rsp->orphan_qlen; | ||
| 1030 | rsp->orphan_cbs_list = NULL; | ||
| 1031 | rsp->orphan_cbs_tail = &rsp->orphan_cbs_list; | ||
| 1032 | rsp->orphan_qlen = 0; | ||
| 1033 | raw_spin_unlock_irqrestore(&rsp->onofflock, flags); | ||
| 1034 | } | 1035 | } |
| 1035 | 1036 | ||
| 1036 | /* | 1037 | /* |
| @@ -1081,8 +1082,6 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp) | |||
| 1081 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 1082 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
| 1082 | if (need_report & RCU_OFL_TASKS_EXP_GP) | 1083 | if (need_report & RCU_OFL_TASKS_EXP_GP) |
| 1083 | rcu_report_exp_rnp(rsp, rnp); | 1084 | rcu_report_exp_rnp(rsp, rnp); |
| 1084 | |||
| 1085 | rcu_adopt_orphan_cbs(rsp); | ||
| 1086 | } | 1085 | } |
| 1087 | 1086 | ||
| 1088 | /* | 1087 | /* |
| @@ -1100,11 +1099,7 @@ static void rcu_offline_cpu(int cpu) | |||
| 1100 | 1099 | ||
| 1101 | #else /* #ifdef CONFIG_HOTPLUG_CPU */ | 1100 | #else /* #ifdef CONFIG_HOTPLUG_CPU */ |
| 1102 | 1101 | ||
| 1103 | static void rcu_send_cbs_to_orphanage(struct rcu_state *rsp) | 1102 | static void rcu_send_cbs_to_online(struct rcu_state *rsp) |
| 1104 | { | ||
| 1105 | } | ||
| 1106 | |||
| 1107 | static void rcu_adopt_orphan_cbs(struct rcu_state *rsp) | ||
| 1108 | { | 1103 | { |
| 1109 | } | 1104 | } |
| 1110 | 1105 | ||
| @@ -1440,22 +1435,11 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), | |||
| 1440 | */ | 1435 | */ |
| 1441 | local_irq_save(flags); | 1436 | local_irq_save(flags); |
| 1442 | rdp = this_cpu_ptr(rsp->rda); | 1437 | rdp = this_cpu_ptr(rsp->rda); |
| 1443 | rcu_process_gp_end(rsp, rdp); | ||
| 1444 | check_for_new_grace_period(rsp, rdp); | ||
| 1445 | 1438 | ||
| 1446 | /* Add the callback to our list. */ | 1439 | /* Add the callback to our list. */ |
| 1447 | *rdp->nxttail[RCU_NEXT_TAIL] = head; | 1440 | *rdp->nxttail[RCU_NEXT_TAIL] = head; |
| 1448 | rdp->nxttail[RCU_NEXT_TAIL] = &head->next; | 1441 | rdp->nxttail[RCU_NEXT_TAIL] = &head->next; |
| 1449 | 1442 | ||
| 1450 | /* Start a new grace period if one not already started. */ | ||
| 1451 | if (!rcu_gp_in_progress(rsp)) { | ||
| 1452 | unsigned long nestflag; | ||
| 1453 | struct rcu_node *rnp_root = rcu_get_root(rsp); | ||
| 1454 | |||
| 1455 | raw_spin_lock_irqsave(&rnp_root->lock, nestflag); | ||
| 1456 | rcu_start_gp(rsp, nestflag); /* releases rnp_root->lock. */ | ||
| 1457 | } | ||
| 1458 | |||
| 1459 | /* | 1443 | /* |
| 1460 | * Force the grace period if too many callbacks or too long waiting. | 1444 | * Force the grace period if too many callbacks or too long waiting. |
| 1461 | * Enforce hysteresis, and don't invoke force_quiescent_state() | 1445 | * Enforce hysteresis, and don't invoke force_quiescent_state() |
| @@ -1464,12 +1448,27 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), | |||
| 1464 | * is the only one waiting for a grace period to complete. | 1448 | * is the only one waiting for a grace period to complete. |
| 1465 | */ | 1449 | */ |
| 1466 | if (unlikely(++rdp->qlen > rdp->qlen_last_fqs_check + qhimark)) { | 1450 | if (unlikely(++rdp->qlen > rdp->qlen_last_fqs_check + qhimark)) { |
| 1467 | rdp->blimit = LONG_MAX; | 1451 | |
| 1468 | if (rsp->n_force_qs == rdp->n_force_qs_snap && | 1452 | /* Are we ignoring a completed grace period? */ |
| 1469 | *rdp->nxttail[RCU_DONE_TAIL] != head) | 1453 | rcu_process_gp_end(rsp, rdp); |
| 1470 | force_quiescent_state(rsp, 0); | 1454 | check_for_new_grace_period(rsp, rdp); |
| 1471 | rdp->n_force_qs_snap = rsp->n_force_qs; | 1455 | |
| 1472 | rdp->qlen_last_fqs_check = rdp->qlen; | 1456 | /* Start a new grace period if one not already started. */ |
| 1457 | if (!rcu_gp_in_progress(rsp)) { | ||
| 1458 | unsigned long nestflag; | ||
| 1459 | struct rcu_node *rnp_root = rcu_get_root(rsp); | ||
| 1460 | |||
| 1461 | raw_spin_lock_irqsave(&rnp_root->lock, nestflag); | ||
| 1462 | rcu_start_gp(rsp, nestflag); /* rlses rnp_root->lock */ | ||
| 1463 | } else { | ||
| 1464 | /* Give the grace period a kick. */ | ||
| 1465 | rdp->blimit = LONG_MAX; | ||
| 1466 | if (rsp->n_force_qs == rdp->n_force_qs_snap && | ||
| 1467 | *rdp->nxttail[RCU_DONE_TAIL] != head) | ||
| 1468 | force_quiescent_state(rsp, 0); | ||
| 1469 | rdp->n_force_qs_snap = rsp->n_force_qs; | ||
| 1470 | rdp->qlen_last_fqs_check = rdp->qlen; | ||
| 1471 | } | ||
| 1473 | } else if (ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs), jiffies)) | 1472 | } else if (ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs), jiffies)) |
| 1474 | force_quiescent_state(rsp, 1); | 1473 | force_quiescent_state(rsp, 1); |
| 1475 | local_irq_restore(flags); | 1474 | local_irq_restore(flags); |
| @@ -1699,13 +1698,12 @@ static void _rcu_barrier(struct rcu_state *rsp, | |||
| 1699 | * decrement rcu_barrier_cpu_count -- otherwise the first CPU | 1698 | * decrement rcu_barrier_cpu_count -- otherwise the first CPU |
| 1700 | * might complete its grace period before all of the other CPUs | 1699 | * might complete its grace period before all of the other CPUs |
| 1701 | * did their increment, causing this function to return too | 1700 | * did their increment, causing this function to return too |
| 1702 | * early. | 1701 | * early. Note that on_each_cpu() disables irqs, which prevents |
| 1702 | * any CPUs from coming online or going offline until each online | ||
| 1703 | * CPU has queued its RCU-barrier callback. | ||
| 1703 | */ | 1704 | */ |
| 1704 | atomic_set(&rcu_barrier_cpu_count, 1); | 1705 | atomic_set(&rcu_barrier_cpu_count, 1); |
| 1705 | preempt_disable(); /* stop CPU_DYING from filling orphan_cbs_list */ | ||
| 1706 | rcu_adopt_orphan_cbs(rsp); | ||
| 1707 | on_each_cpu(rcu_barrier_func, (void *)call_rcu_func, 1); | 1706 | on_each_cpu(rcu_barrier_func, (void *)call_rcu_func, 1); |
| 1708 | preempt_enable(); /* CPU_DYING can again fill orphan_cbs_list */ | ||
| 1709 | if (atomic_dec_and_test(&rcu_barrier_cpu_count)) | 1707 | if (atomic_dec_and_test(&rcu_barrier_cpu_count)) |
| 1710 | complete(&rcu_barrier_completion); | 1708 | complete(&rcu_barrier_completion); |
| 1711 | wait_for_completion(&rcu_barrier_completion); | 1709 | wait_for_completion(&rcu_barrier_completion); |
| @@ -1831,18 +1829,13 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self, | |||
| 1831 | case CPU_DYING: | 1829 | case CPU_DYING: |
| 1832 | case CPU_DYING_FROZEN: | 1830 | case CPU_DYING_FROZEN: |
| 1833 | /* | 1831 | /* |
| 1834 | * preempt_disable() in _rcu_barrier() prevents stop_machine(), | 1832 | * The whole machine is "stopped" except this CPU, so we can |
| 1835 | * so when "on_each_cpu(rcu_barrier_func, (void *)type, 1);" | 1833 | * touch any data without introducing corruption. We send the |
| 1836 | * returns, all online cpus have queued rcu_barrier_func(). | 1834 | * dying CPU's callbacks to an arbitrarily chosen online CPU. |
| 1837 | * The dying CPU clears its cpu_online_mask bit and | ||
| 1838 | * moves all of its RCU callbacks to ->orphan_cbs_list | ||
| 1839 | * in the context of stop_machine(), so subsequent calls | ||
| 1840 | * to _rcu_barrier() will adopt these callbacks and only | ||
| 1841 | * then queue rcu_barrier_func() on all remaining CPUs. | ||
| 1842 | */ | 1835 | */ |
| 1843 | rcu_send_cbs_to_orphanage(&rcu_bh_state); | 1836 | rcu_send_cbs_to_online(&rcu_bh_state); |
| 1844 | rcu_send_cbs_to_orphanage(&rcu_sched_state); | 1837 | rcu_send_cbs_to_online(&rcu_sched_state); |
| 1845 | rcu_preempt_send_cbs_to_orphanage(); | 1838 | rcu_preempt_send_cbs_to_online(); |
| 1846 | break; | 1839 | break; |
| 1847 | case CPU_DEAD: | 1840 | case CPU_DEAD: |
| 1848 | case CPU_DEAD_FROZEN: | 1841 | case CPU_DEAD_FROZEN: |
| @@ -1880,8 +1873,9 @@ static void __init rcu_init_levelspread(struct rcu_state *rsp) | |||
| 1880 | { | 1873 | { |
| 1881 | int i; | 1874 | int i; |
| 1882 | 1875 | ||
| 1883 | for (i = NUM_RCU_LVLS - 1; i >= 0; i--) | 1876 | for (i = NUM_RCU_LVLS - 1; i > 0; i--) |
| 1884 | rsp->levelspread[i] = CONFIG_RCU_FANOUT; | 1877 | rsp->levelspread[i] = CONFIG_RCU_FANOUT; |
| 1878 | rsp->levelspread[0] = RCU_FANOUT_LEAF; | ||
| 1885 | } | 1879 | } |
| 1886 | #else /* #ifdef CONFIG_RCU_FANOUT_EXACT */ | 1880 | #else /* #ifdef CONFIG_RCU_FANOUT_EXACT */ |
| 1887 | static void __init rcu_init_levelspread(struct rcu_state *rsp) | 1881 | static void __init rcu_init_levelspread(struct rcu_state *rsp) |
