diff options
author | Paul E. McKenney <paulmck@linux.vnet.ibm.com> | 2012-04-23 18:52:53 -0400 |
---|---|---|
committer | Paul E. McKenney <paulmck@linux.vnet.ibm.com> | 2012-07-02 15:33:20 -0400 |
commit | f885b7f2b2de70be266d2cecc476f773a1e2ca5d (patch) | |
tree | c4f4d03ca1469f22701b848274034a74c5ae2b04 /kernel/rcutree.c | |
parent | cba6d0d64ee53772b285d0c0c288deefbeaf7775 (diff) |
rcu: Control RCU_FANOUT_LEAF from boot-time parameter
Although making RCU_FANOUT_LEAF a kernel configuration parameter rather
than a fixed constant makes it easier for people to decrease cache-miss
overhead for large systems, it is of little help for people who must
run a single pre-built kernel binary.
This commit therefore allows the value of RCU_FANOUT_LEAF to be
increased (but not decreased!) via a boot-time parameter named
rcutree.rcu_fanout_leaf.
Reported-by: Mike Galbraith <efault@gmx.de>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Diffstat (limited to 'kernel/rcutree.c')
-rw-r--r-- | kernel/rcutree.c | 97 |
1 files changed, 83 insertions, 14 deletions
diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 4b97bba7396..a4c592b66e1 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c | |||
@@ -60,17 +60,10 @@ | |||
60 | 60 | ||
61 | /* Data structures. */ | 61 | /* Data structures. */ |
62 | 62 | ||
63 | static struct lock_class_key rcu_node_class[NUM_RCU_LVLS]; | 63 | static struct lock_class_key rcu_node_class[RCU_NUM_LVLS]; |
64 | 64 | ||
65 | #define RCU_STATE_INITIALIZER(structname) { \ | 65 | #define RCU_STATE_INITIALIZER(structname) { \ |
66 | .level = { &structname##_state.node[0] }, \ | 66 | .level = { &structname##_state.node[0] }, \ |
67 | .levelcnt = { \ | ||
68 | NUM_RCU_LVL_0, /* root of hierarchy. */ \ | ||
69 | NUM_RCU_LVL_1, \ | ||
70 | NUM_RCU_LVL_2, \ | ||
71 | NUM_RCU_LVL_3, \ | ||
72 | NUM_RCU_LVL_4, /* == MAX_RCU_LVLS */ \ | ||
73 | }, \ | ||
74 | .fqs_state = RCU_GP_IDLE, \ | 67 | .fqs_state = RCU_GP_IDLE, \ |
75 | .gpnum = -300, \ | 68 | .gpnum = -300, \ |
76 | .completed = -300, \ | 69 | .completed = -300, \ |
@@ -91,6 +84,19 @@ DEFINE_PER_CPU(struct rcu_data, rcu_bh_data); | |||
91 | 84 | ||
92 | static struct rcu_state *rcu_state; | 85 | static struct rcu_state *rcu_state; |
93 | 86 | ||
87 | /* Increase (but not decrease) the CONFIG_RCU_FANOUT_LEAF at boot time. */ | ||
88 | static int rcu_fanout_leaf = CONFIG_RCU_FANOUT_LEAF; | ||
89 | module_param(rcu_fanout_leaf, int, 0); | ||
90 | int rcu_num_lvls __read_mostly = RCU_NUM_LVLS; | ||
91 | static int num_rcu_lvl[] = { /* Number of rcu_nodes at specified level. */ | ||
92 | NUM_RCU_LVL_0, | ||
93 | NUM_RCU_LVL_1, | ||
94 | NUM_RCU_LVL_2, | ||
95 | NUM_RCU_LVL_3, | ||
96 | NUM_RCU_LVL_4, | ||
97 | }; | ||
98 | int rcu_num_nodes __read_mostly = NUM_RCU_NODES; /* Total # rcu_nodes in use. */ | ||
99 | |||
94 | /* | 100 | /* |
95 | * The rcu_scheduler_active variable transitions from zero to one just | 101 | * The rcu_scheduler_active variable transitions from zero to one just |
96 | * before the first task is spawned. So when this variable is zero, RCU | 102 | * before the first task is spawned. So when this variable is zero, RCU |
@@ -2574,9 +2580,9 @@ static void __init rcu_init_levelspread(struct rcu_state *rsp) | |||
2574 | { | 2580 | { |
2575 | int i; | 2581 | int i; |
2576 | 2582 | ||
2577 | for (i = NUM_RCU_LVLS - 1; i > 0; i--) | 2583 | for (i = rcu_num_lvls - 1; i > 0; i--) |
2578 | rsp->levelspread[i] = CONFIG_RCU_FANOUT; | 2584 | rsp->levelspread[i] = CONFIG_RCU_FANOUT; |
2579 | rsp->levelspread[0] = CONFIG_RCU_FANOUT_LEAF; | 2585 | rsp->levelspread[0] = rcu_fanout_leaf; |
2580 | } | 2586 | } |
2581 | #else /* #ifdef CONFIG_RCU_FANOUT_EXACT */ | 2587 | #else /* #ifdef CONFIG_RCU_FANOUT_EXACT */ |
2582 | static void __init rcu_init_levelspread(struct rcu_state *rsp) | 2588 | static void __init rcu_init_levelspread(struct rcu_state *rsp) |
@@ -2586,7 +2592,7 @@ static void __init rcu_init_levelspread(struct rcu_state *rsp) | |||
2586 | int i; | 2592 | int i; |
2587 | 2593 | ||
2588 | cprv = NR_CPUS; | 2594 | cprv = NR_CPUS; |
2589 | for (i = NUM_RCU_LVLS - 1; i >= 0; i--) { | 2595 | for (i = rcu_num_lvls - 1; i >= 0; i--) { |
2590 | ccur = rsp->levelcnt[i]; | 2596 | ccur = rsp->levelcnt[i]; |
2591 | rsp->levelspread[i] = (cprv + ccur - 1) / ccur; | 2597 | rsp->levelspread[i] = (cprv + ccur - 1) / ccur; |
2592 | cprv = ccur; | 2598 | cprv = ccur; |
@@ -2613,13 +2619,15 @@ static void __init rcu_init_one(struct rcu_state *rsp, | |||
2613 | 2619 | ||
2614 | /* Initialize the level-tracking arrays. */ | 2620 | /* Initialize the level-tracking arrays. */ |
2615 | 2621 | ||
2616 | for (i = 1; i < NUM_RCU_LVLS; i++) | 2622 | for (i = 0; i < rcu_num_lvls; i++) |
2623 | rsp->levelcnt[i] = num_rcu_lvl[i]; | ||
2624 | for (i = 1; i < rcu_num_lvls; i++) | ||
2617 | rsp->level[i] = rsp->level[i - 1] + rsp->levelcnt[i - 1]; | 2625 | rsp->level[i] = rsp->level[i - 1] + rsp->levelcnt[i - 1]; |
2618 | rcu_init_levelspread(rsp); | 2626 | rcu_init_levelspread(rsp); |
2619 | 2627 | ||
2620 | /* Initialize the elements themselves, starting from the leaves. */ | 2628 | /* Initialize the elements themselves, starting from the leaves. */ |
2621 | 2629 | ||
2622 | for (i = NUM_RCU_LVLS - 1; i >= 0; i--) { | 2630 | for (i = rcu_num_lvls - 1; i >= 0; i--) { |
2623 | cpustride *= rsp->levelspread[i]; | 2631 | cpustride *= rsp->levelspread[i]; |
2624 | rnp = rsp->level[i]; | 2632 | rnp = rsp->level[i]; |
2625 | for (j = 0; j < rsp->levelcnt[i]; j++, rnp++) { | 2633 | for (j = 0; j < rsp->levelcnt[i]; j++, rnp++) { |
@@ -2649,7 +2657,7 @@ static void __init rcu_init_one(struct rcu_state *rsp, | |||
2649 | } | 2657 | } |
2650 | 2658 | ||
2651 | rsp->rda = rda; | 2659 | rsp->rda = rda; |
2652 | rnp = rsp->level[NUM_RCU_LVLS - 1]; | 2660 | rnp = rsp->level[rcu_num_lvls - 1]; |
2653 | for_each_possible_cpu(i) { | 2661 | for_each_possible_cpu(i) { |
2654 | while (i > rnp->grphi) | 2662 | while (i > rnp->grphi) |
2655 | rnp++; | 2663 | rnp++; |
@@ -2658,11 +2666,72 @@ static void __init rcu_init_one(struct rcu_state *rsp, | |||
2658 | } | 2666 | } |
2659 | } | 2667 | } |
2660 | 2668 | ||
2669 | /* | ||
2670 | * Compute the rcu_node tree geometry from kernel parameters. This cannot | ||
2671 | * replace the definitions in rcutree.h because those are needed to size | ||
2672 | * the ->node array in the rcu_state structure. | ||
2673 | */ | ||
2674 | static void __init rcu_init_geometry(void) | ||
2675 | { | ||
2676 | int i; | ||
2677 | int j; | ||
2678 | int n = NR_CPUS; | ||
2679 | int rcu_capacity[MAX_RCU_LVLS + 1]; | ||
2680 | |||
2681 | /* If the compile-time values are accurate, just leave. */ | ||
2682 | if (rcu_fanout_leaf == CONFIG_RCU_FANOUT_LEAF) | ||
2683 | return; | ||
2684 | |||
2685 | /* | ||
2686 | * Compute number of nodes that can be handled an rcu_node tree | ||
2687 | * with the given number of levels. Setting rcu_capacity[0] makes | ||
2688 | * some of the arithmetic easier. | ||
2689 | */ | ||
2690 | rcu_capacity[0] = 1; | ||
2691 | rcu_capacity[1] = rcu_fanout_leaf; | ||
2692 | for (i = 2; i <= MAX_RCU_LVLS; i++) | ||
2693 | rcu_capacity[i] = rcu_capacity[i - 1] * CONFIG_RCU_FANOUT; | ||
2694 | |||
2695 | /* | ||
2696 | * The boot-time rcu_fanout_leaf parameter is only permitted | ||
2697 | * to increase the leaf-level fanout, not decrease it. Of course, | ||
2698 | * the leaf-level fanout cannot exceed the number of bits in | ||
2699 | * the rcu_node masks. Finally, the tree must be able to accommodate | ||
2700 | * the configured number of CPUs. Complain and fall back to the | ||
2701 | * compile-time values if these limits are exceeded. | ||
2702 | */ | ||
2703 | if (rcu_fanout_leaf < CONFIG_RCU_FANOUT_LEAF || | ||
2704 | rcu_fanout_leaf > sizeof(unsigned long) * 8 || | ||
2705 | n > rcu_capacity[MAX_RCU_LVLS]) { | ||
2706 | WARN_ON(1); | ||
2707 | return; | ||
2708 | } | ||
2709 | |||
2710 | /* Calculate the number of rcu_nodes at each level of the tree. */ | ||
2711 | for (i = 1; i <= MAX_RCU_LVLS; i++) | ||
2712 | if (n <= rcu_capacity[i]) { | ||
2713 | for (j = 0; j <= i; j++) | ||
2714 | num_rcu_lvl[j] = | ||
2715 | DIV_ROUND_UP(n, rcu_capacity[i - j]); | ||
2716 | rcu_num_lvls = i; | ||
2717 | for (j = i + 1; j <= MAX_RCU_LVLS; j++) | ||
2718 | num_rcu_lvl[j] = 0; | ||
2719 | break; | ||
2720 | } | ||
2721 | |||
2722 | /* Calculate the total number of rcu_node structures. */ | ||
2723 | rcu_num_nodes = 0; | ||
2724 | for (i = 0; i <= MAX_RCU_LVLS; i++) | ||
2725 | rcu_num_nodes += num_rcu_lvl[i]; | ||
2726 | rcu_num_nodes -= n; | ||
2727 | } | ||
2728 | |||
2661 | void __init rcu_init(void) | 2729 | void __init rcu_init(void) |
2662 | { | 2730 | { |
2663 | int cpu; | 2731 | int cpu; |
2664 | 2732 | ||
2665 | rcu_bootup_announce(); | 2733 | rcu_bootup_announce(); |
2734 | rcu_init_geometry(); | ||
2666 | rcu_init_one(&rcu_sched_state, &rcu_sched_data); | 2735 | rcu_init_one(&rcu_sched_state, &rcu_sched_data); |
2667 | rcu_init_one(&rcu_bh_state, &rcu_bh_data); | 2736 | rcu_init_one(&rcu_bh_state, &rcu_bh_data); |
2668 | __rcu_init_preempt(); | 2737 | __rcu_init_preempt(); |