diff options
-rw-r--r-- | kernel/locking/qspinlock.c | 34 | ||||
-rw-r--r-- | kernel/locking/qspinlock_paravirt.h | 4 |
2 files changed, 27 insertions, 11 deletions
diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index ce6af1ee2cac..8a8c3c208c5e 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c | |||
@@ -74,12 +74,24 @@ | |||
74 | */ | 74 | */ |
75 | 75 | ||
76 | #include "mcs_spinlock.h" | 76 | #include "mcs_spinlock.h" |
77 | #define MAX_NODES 4 | ||
77 | 78 | ||
79 | /* | ||
80 | * On 64-bit architectures, the mcs_spinlock structure will be 16 bytes in | ||
81 | * size and four of them will fit nicely in one 64-byte cacheline. For | ||
82 | * pvqspinlock, however, we need more space for extra data. To accommodate | ||
83 | * that, we insert two more long words to pad it up to 32 bytes. IOW, only | ||
84 | * two of them can fit in a cacheline in this case. That is OK as it is rare | ||
85 | * to have more than 2 levels of slowpath nesting in actual use. We don't | ||
86 | * want to penalize pvqspinlocks to optimize for a rare case in native | ||
87 | * qspinlocks. | ||
88 | */ | ||
89 | struct qnode { | ||
90 | struct mcs_spinlock mcs; | ||
78 | #ifdef CONFIG_PARAVIRT_SPINLOCKS | 91 | #ifdef CONFIG_PARAVIRT_SPINLOCKS |
79 | #define MAX_NODES 8 | 92 | long reserved[2]; |
80 | #else | ||
81 | #define MAX_NODES 4 | ||
82 | #endif | 93 | #endif |
94 | }; | ||
83 | 95 | ||
84 | /* | 96 | /* |
85 | * The pending bit spinning loop count. | 97 | * The pending bit spinning loop count. |
@@ -101,7 +113,7 @@ | |||
101 | * | 113 | * |
102 | * PV doubles the storage and uses the second cacheline for PV state. | 114 | * PV doubles the storage and uses the second cacheline for PV state. |
103 | */ | 115 | */ |
104 | static DEFINE_PER_CPU_ALIGNED(struct mcs_spinlock, mcs_nodes[MAX_NODES]); | 116 | static DEFINE_PER_CPU_ALIGNED(struct qnode, qnodes[MAX_NODES]); |
105 | 117 | ||
106 | /* | 118 | /* |
107 | * We must be able to distinguish between no-tail and the tail at 0:0, | 119 | * We must be able to distinguish between no-tail and the tail at 0:0, |
@@ -126,7 +138,13 @@ static inline __pure struct mcs_spinlock *decode_tail(u32 tail) | |||
126 | int cpu = (tail >> _Q_TAIL_CPU_OFFSET) - 1; | 138 | int cpu = (tail >> _Q_TAIL_CPU_OFFSET) - 1; |
127 | int idx = (tail & _Q_TAIL_IDX_MASK) >> _Q_TAIL_IDX_OFFSET; | 139 | int idx = (tail & _Q_TAIL_IDX_MASK) >> _Q_TAIL_IDX_OFFSET; |
128 | 140 | ||
129 | return per_cpu_ptr(&mcs_nodes[idx], cpu); | 141 | return per_cpu_ptr(&qnodes[idx].mcs, cpu); |
142 | } | ||
143 | |||
144 | static inline __pure | ||
145 | struct mcs_spinlock *grab_mcs_node(struct mcs_spinlock *base, int idx) | ||
146 | { | ||
147 | return &((struct qnode *)base + idx)->mcs; | ||
130 | } | 148 | } |
131 | 149 | ||
132 | #define _Q_LOCKED_PENDING_MASK (_Q_LOCKED_MASK | _Q_PENDING_MASK) | 150 | #define _Q_LOCKED_PENDING_MASK (_Q_LOCKED_MASK | _Q_PENDING_MASK) |
@@ -390,11 +408,11 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) | |||
390 | queue: | 408 | queue: |
391 | qstat_inc(qstat_lock_slowpath, true); | 409 | qstat_inc(qstat_lock_slowpath, true); |
392 | pv_queue: | 410 | pv_queue: |
393 | node = this_cpu_ptr(&mcs_nodes[0]); | 411 | node = this_cpu_ptr(&qnodes[0].mcs); |
394 | idx = node->count++; | 412 | idx = node->count++; |
395 | tail = encode_tail(smp_processor_id(), idx); | 413 | tail = encode_tail(smp_processor_id(), idx); |
396 | 414 | ||
397 | node += idx; | 415 | node = grab_mcs_node(node, idx); |
398 | 416 | ||
399 | /* | 417 | /* |
400 | * Keep counts of non-zero index values: | 418 | * Keep counts of non-zero index values: |
@@ -534,7 +552,7 @@ release: | |||
534 | /* | 552 | /* |
535 | * release the node | 553 | * release the node |
536 | */ | 554 | */ |
537 | __this_cpu_dec(mcs_nodes[0].count); | 555 | __this_cpu_dec(qnodes[0].mcs.count); |
538 | } | 556 | } |
539 | EXPORT_SYMBOL(queued_spin_lock_slowpath); | 557 | EXPORT_SYMBOL(queued_spin_lock_slowpath); |
540 | 558 | ||
diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h index 5a0cf5f9008c..0130e488ebfe 100644 --- a/kernel/locking/qspinlock_paravirt.h +++ b/kernel/locking/qspinlock_paravirt.h | |||
@@ -49,8 +49,6 @@ enum vcpu_state { | |||
49 | 49 | ||
50 | struct pv_node { | 50 | struct pv_node { |
51 | struct mcs_spinlock mcs; | 51 | struct mcs_spinlock mcs; |
52 | struct mcs_spinlock __res[3]; | ||
53 | |||
54 | int cpu; | 52 | int cpu; |
55 | u8 state; | 53 | u8 state; |
56 | }; | 54 | }; |
@@ -281,7 +279,7 @@ static void pv_init_node(struct mcs_spinlock *node) | |||
281 | { | 279 | { |
282 | struct pv_node *pn = (struct pv_node *)node; | 280 | struct pv_node *pn = (struct pv_node *)node; |
283 | 281 | ||
284 | BUILD_BUG_ON(sizeof(struct pv_node) > 5*sizeof(struct mcs_spinlock)); | 282 | BUILD_BUG_ON(sizeof(struct pv_node) > sizeof(struct qnode)); |
285 | 283 | ||
286 | pn->cpu = smp_processor_id(); | 284 | pn->cpu = smp_processor_id(); |
287 | pn->state = vcpu_running; | 285 | pn->state = vcpu_running; |