diff options
Diffstat (limited to 'kernel/srcu.c')
| -rw-r--r-- | kernel/srcu.c | 548 |
1 files changed, 435 insertions, 113 deletions
diff --git a/kernel/srcu.c b/kernel/srcu.c index ba35f3a4a1f4..2095be3318d5 100644 --- a/kernel/srcu.c +++ b/kernel/srcu.c | |||
| @@ -34,10 +34,77 @@ | |||
| 34 | #include <linux/delay.h> | 34 | #include <linux/delay.h> |
| 35 | #include <linux/srcu.h> | 35 | #include <linux/srcu.h> |
| 36 | 36 | ||
| 37 | /* | ||
| 38 | * Initialize an rcu_batch structure to empty. | ||
| 39 | */ | ||
| 40 | static inline void rcu_batch_init(struct rcu_batch *b) | ||
| 41 | { | ||
| 42 | b->head = NULL; | ||
| 43 | b->tail = &b->head; | ||
| 44 | } | ||
| 45 | |||
| 46 | /* | ||
| 47 | * Enqueue a callback onto the tail of the specified rcu_batch structure. | ||
| 48 | */ | ||
| 49 | static inline void rcu_batch_queue(struct rcu_batch *b, struct rcu_head *head) | ||
| 50 | { | ||
| 51 | *b->tail = head; | ||
| 52 | b->tail = &head->next; | ||
| 53 | } | ||
| 54 | |||
| 55 | /* | ||
| 56 | * Is the specified rcu_batch structure empty? | ||
| 57 | */ | ||
| 58 | static inline bool rcu_batch_empty(struct rcu_batch *b) | ||
| 59 | { | ||
| 60 | return b->tail == &b->head; | ||
| 61 | } | ||
| 62 | |||
| 63 | /* | ||
| 64 | * Remove the callback at the head of the specified rcu_batch structure | ||
| 65 | * and return a pointer to it, or return NULL if the structure is empty. | ||
| 66 | */ | ||
| 67 | static inline struct rcu_head *rcu_batch_dequeue(struct rcu_batch *b) | ||
| 68 | { | ||
| 69 | struct rcu_head *head; | ||
| 70 | |||
| 71 | if (rcu_batch_empty(b)) | ||
| 72 | return NULL; | ||
| 73 | |||
| 74 | head = b->head; | ||
| 75 | b->head = head->next; | ||
| 76 | if (b->tail == &head->next) | ||
| 77 | rcu_batch_init(b); | ||
| 78 | |||
| 79 | return head; | ||
| 80 | } | ||
| 81 | |||
| 82 | /* | ||
| 83 | * Move all callbacks from the rcu_batch structure specified by "from" to | ||
| 84 | * the structure specified by "to". | ||
| 85 | */ | ||
| 86 | static inline void rcu_batch_move(struct rcu_batch *to, struct rcu_batch *from) | ||
| 87 | { | ||
| 88 | if (!rcu_batch_empty(from)) { | ||
| 89 | *to->tail = from->head; | ||
| 90 | to->tail = from->tail; | ||
| 91 | rcu_batch_init(from); | ||
| 92 | } | ||
| 93 | } | ||
| 94 | |||
| 95 | /* single-thread state-machine */ | ||
| 96 | static void process_srcu(struct work_struct *work); | ||
| 97 | |||
| 37 | static int init_srcu_struct_fields(struct srcu_struct *sp) | 98 | static int init_srcu_struct_fields(struct srcu_struct *sp) |
| 38 | { | 99 | { |
| 39 | sp->completed = 0; | 100 | sp->completed = 0; |
| 40 | mutex_init(&sp->mutex); | 101 | spin_lock_init(&sp->queue_lock); |
| 102 | sp->running = false; | ||
| 103 | rcu_batch_init(&sp->batch_queue); | ||
| 104 | rcu_batch_init(&sp->batch_check0); | ||
| 105 | rcu_batch_init(&sp->batch_check1); | ||
| 106 | rcu_batch_init(&sp->batch_done); | ||
| 107 | INIT_DELAYED_WORK(&sp->work, process_srcu); | ||
| 41 | sp->per_cpu_ref = alloc_percpu(struct srcu_struct_array); | 108 | sp->per_cpu_ref = alloc_percpu(struct srcu_struct_array); |
| 42 | return sp->per_cpu_ref ? 0 : -ENOMEM; | 109 | return sp->per_cpu_ref ? 0 : -ENOMEM; |
| 43 | } | 110 | } |
| @@ -73,21 +140,116 @@ EXPORT_SYMBOL_GPL(init_srcu_struct); | |||
| 73 | #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ | 140 | #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ |
| 74 | 141 | ||
| 75 | /* | 142 | /* |
| 76 | * srcu_readers_active_idx -- returns approximate number of readers | 143 | * Returns approximate total of the readers' ->seq[] values for the |
| 77 | * active on the specified rank of per-CPU counters. | 144 | * rank of per-CPU counters specified by idx. |
| 78 | */ | 145 | */ |
| 146 | static unsigned long srcu_readers_seq_idx(struct srcu_struct *sp, int idx) | ||
| 147 | { | ||
| 148 | int cpu; | ||
| 149 | unsigned long sum = 0; | ||
| 150 | unsigned long t; | ||
| 79 | 151 | ||
| 80 | static int srcu_readers_active_idx(struct srcu_struct *sp, int idx) | 152 | for_each_possible_cpu(cpu) { |
| 153 | t = ACCESS_ONCE(per_cpu_ptr(sp->per_cpu_ref, cpu)->seq[idx]); | ||
| 154 | sum += t; | ||
| 155 | } | ||
| 156 | return sum; | ||
| 157 | } | ||
| 158 | |||
| 159 | /* | ||
| 160 | * Returns approximate number of readers active on the specified rank | ||
| 161 | * of the per-CPU ->c[] counters. | ||
| 162 | */ | ||
| 163 | static unsigned long srcu_readers_active_idx(struct srcu_struct *sp, int idx) | ||
| 81 | { | 164 | { |
| 82 | int cpu; | 165 | int cpu; |
| 83 | int sum; | 166 | unsigned long sum = 0; |
| 167 | unsigned long t; | ||
| 84 | 168 | ||
| 85 | sum = 0; | 169 | for_each_possible_cpu(cpu) { |
| 86 | for_each_possible_cpu(cpu) | 170 | t = ACCESS_ONCE(per_cpu_ptr(sp->per_cpu_ref, cpu)->c[idx]); |
| 87 | sum += per_cpu_ptr(sp->per_cpu_ref, cpu)->c[idx]; | 171 | sum += t; |
| 172 | } | ||
| 88 | return sum; | 173 | return sum; |
| 89 | } | 174 | } |
| 90 | 175 | ||
| 176 | /* | ||
| 177 | * Return true if the number of pre-existing readers is determined to | ||
| 178 | * be stably zero. An example unstable zero can occur if the call | ||
| 179 | * to srcu_readers_active_idx() misses an __srcu_read_lock() increment, | ||
| 180 | * but due to task migration, sees the corresponding __srcu_read_unlock() | ||
| 181 | * decrement. This can happen because srcu_readers_active_idx() takes | ||
| 182 | * time to sum the array, and might in fact be interrupted or preempted | ||
| 183 | * partway through the summation. | ||
| 184 | */ | ||
| 185 | static bool srcu_readers_active_idx_check(struct srcu_struct *sp, int idx) | ||
| 186 | { | ||
| 187 | unsigned long seq; | ||
| 188 | |||
| 189 | seq = srcu_readers_seq_idx(sp, idx); | ||
| 190 | |||
| 191 | /* | ||
| 192 | * The following smp_mb() A pairs with the smp_mb() B located in | ||
| 193 | * __srcu_read_lock(). This pairing ensures that if an | ||
| 194 | * __srcu_read_lock() increments its counter after the summation | ||
| 195 | * in srcu_readers_active_idx(), then the corresponding SRCU read-side | ||
| 196 | * critical section will see any changes made prior to the start | ||
| 197 | * of the current SRCU grace period. | ||
| 198 | * | ||
| 199 | * Also, if the above call to srcu_readers_seq_idx() saw the | ||
| 200 | * increment of ->seq[], then the call to srcu_readers_active_idx() | ||
| 201 | * must see the increment of ->c[]. | ||
| 202 | */ | ||
| 203 | smp_mb(); /* A */ | ||
| 204 | |||
| 205 | /* | ||
| 206 | * Note that srcu_readers_active_idx() can incorrectly return | ||
| 207 | * zero even though there is a pre-existing reader throughout. | ||
| 208 | * To see this, suppose that task A is in a very long SRCU | ||
| 209 | * read-side critical section that started on CPU 0, and that | ||
| 210 | * no other reader exists, so that the sum of the counters | ||
| 211 | * is equal to one. Then suppose that task B starts executing | ||
| 212 | * srcu_readers_active_idx(), summing up to CPU 1, and then that | ||
| 213 | * task C starts reading on CPU 0, so that its increment is not | ||
| 214 | * summed, but finishes reading on CPU 2, so that its decrement | ||
| 215 | * -is- summed. Then when task B completes its sum, it will | ||
| 216 | * incorrectly get zero, despite the fact that task A has been | ||
| 217 | * in its SRCU read-side critical section the whole time. | ||
| 218 | * | ||
| 219 | * We therefore do a validation step should srcu_readers_active_idx() | ||
| 220 | * return zero. | ||
| 221 | */ | ||
| 222 | if (srcu_readers_active_idx(sp, idx) != 0) | ||
| 223 | return false; | ||
| 224 | |||
| 225 | /* | ||
| 226 | * The remainder of this function is the validation step. | ||
| 227 | * The following smp_mb() D pairs with the smp_mb() C in | ||
| 228 | * __srcu_read_unlock(). If the __srcu_read_unlock() was seen | ||
| 229 | * by srcu_readers_active_idx() above, then any destructive | ||
| 230 | * operation performed after the grace period will happen after | ||
| 231 | * the corresponding SRCU read-side critical section. | ||
| 232 | * | ||
| 233 | * Note that there can be at most NR_CPUS worth of readers using | ||
| 234 | * the old index, which is not enough to overflow even a 32-bit | ||
| 235 | * integer. (Yes, this does mean that systems having more than | ||
| 236 | * a billion or so CPUs need to be 64-bit systems.) Therefore, | ||
| 237 | * the sum of the ->seq[] counters cannot possibly overflow. | ||
| 238 | * Therefore, the only way that the return values of the two | ||
| 239 | * calls to srcu_readers_seq_idx() can be equal is if there were | ||
| 240 | * no increments of the corresponding rank of ->seq[] counts | ||
| 241 | * in the interim. But the missed-increment scenario laid out | ||
| 242 | * above includes an increment of the ->seq[] counter by | ||
| 243 | * the corresponding __srcu_read_lock(). Therefore, if this | ||
| 244 | * scenario occurs, the return values from the two calls to | ||
| 245 | * srcu_readers_seq_idx() will differ, and thus the validation | ||
| 246 | * step below suffices. | ||
| 247 | */ | ||
| 248 | smp_mb(); /* D */ | ||
| 249 | |||
| 250 | return srcu_readers_seq_idx(sp, idx) == seq; | ||
| 251 | } | ||
| 252 | |||
| 91 | /** | 253 | /** |
| 92 | * srcu_readers_active - returns approximate number of readers. | 254 | * srcu_readers_active - returns approximate number of readers. |
| 93 | * @sp: which srcu_struct to count active readers (holding srcu_read_lock). | 255 | * @sp: which srcu_struct to count active readers (holding srcu_read_lock). |
| @@ -98,7 +260,14 @@ static int srcu_readers_active_idx(struct srcu_struct *sp, int idx) | |||
| 98 | */ | 260 | */ |
| 99 | static int srcu_readers_active(struct srcu_struct *sp) | 261 | static int srcu_readers_active(struct srcu_struct *sp) |
| 100 | { | 262 | { |
| 101 | return srcu_readers_active_idx(sp, 0) + srcu_readers_active_idx(sp, 1); | 263 | int cpu; |
| 264 | unsigned long sum = 0; | ||
| 265 | |||
| 266 | for_each_possible_cpu(cpu) { | ||
| 267 | sum += ACCESS_ONCE(per_cpu_ptr(sp->per_cpu_ref, cpu)->c[0]); | ||
| 268 | sum += ACCESS_ONCE(per_cpu_ptr(sp->per_cpu_ref, cpu)->c[1]); | ||
| 269 | } | ||
| 270 | return sum; | ||
| 102 | } | 271 | } |
| 103 | 272 | ||
| 104 | /** | 273 | /** |
| @@ -131,10 +300,11 @@ int __srcu_read_lock(struct srcu_struct *sp) | |||
| 131 | int idx; | 300 | int idx; |
| 132 | 301 | ||
| 133 | preempt_disable(); | 302 | preempt_disable(); |
| 134 | idx = sp->completed & 0x1; | 303 | idx = rcu_dereference_index_check(sp->completed, |
| 135 | barrier(); /* ensure compiler looks -once- at sp->completed. */ | 304 | rcu_read_lock_sched_held()) & 0x1; |
| 136 | per_cpu_ptr(sp->per_cpu_ref, smp_processor_id())->c[idx]++; | 305 | ACCESS_ONCE(this_cpu_ptr(sp->per_cpu_ref)->c[idx]) += 1; |
| 137 | srcu_barrier(); /* ensure compiler won't misorder critical section. */ | 306 | smp_mb(); /* B */ /* Avoid leaking the critical section. */ |
| 307 | ACCESS_ONCE(this_cpu_ptr(sp->per_cpu_ref)->seq[idx]) += 1; | ||
| 138 | preempt_enable(); | 308 | preempt_enable(); |
| 139 | return idx; | 309 | return idx; |
| 140 | } | 310 | } |
| @@ -149,8 +319,8 @@ EXPORT_SYMBOL_GPL(__srcu_read_lock); | |||
| 149 | void __srcu_read_unlock(struct srcu_struct *sp, int idx) | 319 | void __srcu_read_unlock(struct srcu_struct *sp, int idx) |
| 150 | { | 320 | { |
| 151 | preempt_disable(); | 321 | preempt_disable(); |
| 152 | srcu_barrier(); /* ensure compiler won't misorder critical section. */ | 322 | smp_mb(); /* C */ /* Avoid leaking the critical section. */ |
| 153 | per_cpu_ptr(sp->per_cpu_ref, smp_processor_id())->c[idx]--; | 323 | ACCESS_ONCE(this_cpu_ptr(sp->per_cpu_ref)->c[idx]) -= 1; |
| 154 | preempt_enable(); | 324 | preempt_enable(); |
| 155 | } | 325 | } |
| 156 | EXPORT_SYMBOL_GPL(__srcu_read_unlock); | 326 | EXPORT_SYMBOL_GPL(__srcu_read_unlock); |
| @@ -163,106 +333,119 @@ EXPORT_SYMBOL_GPL(__srcu_read_unlock); | |||
| 163 | * we repeatedly block for 1-millisecond time periods. This approach | 333 | * we repeatedly block for 1-millisecond time periods. This approach |
| 164 | * has done well in testing, so there is no need for a config parameter. | 334 | * has done well in testing, so there is no need for a config parameter. |
| 165 | */ | 335 | */ |
| 166 | #define SYNCHRONIZE_SRCU_READER_DELAY 10 | 336 | #define SRCU_RETRY_CHECK_DELAY 5 |
| 337 | #define SYNCHRONIZE_SRCU_TRYCOUNT 2 | ||
| 338 | #define SYNCHRONIZE_SRCU_EXP_TRYCOUNT 12 | ||
| 167 | 339 | ||
| 168 | /* | 340 | /* |
| 169 | * Helper function for synchronize_srcu() and synchronize_srcu_expedited(). | 341 | * @@@ Wait until all pre-existing readers complete. Such readers |
| 342 | * will have used the index specified by "idx". | ||
| 343 | * the caller should ensures the ->completed is not changed while checking | ||
| 344 | * and idx = (->completed & 1) ^ 1 | ||
| 170 | */ | 345 | */ |
| 171 | static void __synchronize_srcu(struct srcu_struct *sp, void (*sync_func)(void)) | 346 | static bool try_check_zero(struct srcu_struct *sp, int idx, int trycount) |
| 172 | { | 347 | { |
| 173 | int idx; | 348 | for (;;) { |
| 174 | 349 | if (srcu_readers_active_idx_check(sp, idx)) | |
| 175 | rcu_lockdep_assert(!lock_is_held(&sp->dep_map) && | 350 | return true; |
| 176 | !lock_is_held(&rcu_bh_lock_map) && | 351 | if (--trycount <= 0) |
| 177 | !lock_is_held(&rcu_lock_map) && | 352 | return false; |
| 178 | !lock_is_held(&rcu_sched_lock_map), | 353 | udelay(SRCU_RETRY_CHECK_DELAY); |
| 179 | "Illegal synchronize_srcu() in same-type SRCU (or RCU) read-side critical section"); | 354 | } |
| 180 | 355 | } | |
| 181 | idx = sp->completed; | ||
| 182 | mutex_lock(&sp->mutex); | ||
| 183 | 356 | ||
| 184 | /* | 357 | /* |
| 185 | * Check to see if someone else did the work for us while we were | 358 | * Increment the ->completed counter so that future SRCU readers will |
| 186 | * waiting to acquire the lock. We need -two- advances of | 359 | * use the other rank of the ->c[] and ->seq[] arrays. This allows |
| 187 | * the counter, not just one. If there was but one, we might have | 360 | * us to wait for pre-existing readers in a starvation-free manner. |
| 188 | * shown up -after- our helper's first synchronize_sched(), thus | 361 | */ |
| 189 | * having failed to prevent CPU-reordering races with concurrent | 362 | static void srcu_flip(struct srcu_struct *sp) |
| 190 | * srcu_read_unlock()s on other CPUs (see comment below). So we | 363 | { |
| 191 | * either (1) wait for two or (2) supply the second ourselves. | 364 | sp->completed++; |
| 192 | */ | 365 | } |
| 193 | 366 | ||
| 194 | if ((sp->completed - idx) >= 2) { | 367 | /* |
| 195 | mutex_unlock(&sp->mutex); | 368 | * Enqueue an SRCU callback on the specified srcu_struct structure, |
| 196 | return; | 369 | * initiating grace-period processing if it is not already running. |
| 370 | */ | ||
| 371 | void call_srcu(struct srcu_struct *sp, struct rcu_head *head, | ||
| 372 | void (*func)(struct rcu_head *head)) | ||
| 373 | { | ||
| 374 | unsigned long flags; | ||
| 375 | |||
| 376 | head->next = NULL; | ||
| 377 | head->func = func; | ||
| 378 | spin_lock_irqsave(&sp->queue_lock, flags); | ||
| 379 | rcu_batch_queue(&sp->batch_queue, head); | ||
| 380 | if (!sp->running) { | ||
| 381 | sp->running = true; | ||
| 382 | queue_delayed_work(system_nrt_wq, &sp->work, 0); | ||
| 197 | } | 383 | } |
| 384 | spin_unlock_irqrestore(&sp->queue_lock, flags); | ||
| 385 | } | ||
| 386 | EXPORT_SYMBOL_GPL(call_srcu); | ||
| 198 | 387 | ||
| 199 | sync_func(); /* Force memory barrier on all CPUs. */ | 388 | struct rcu_synchronize { |
| 389 | struct rcu_head head; | ||
| 390 | struct completion completion; | ||
| 391 | }; | ||
| 200 | 392 | ||
| 201 | /* | 393 | /* |
| 202 | * The preceding synchronize_sched() ensures that any CPU that | 394 | * Awaken the corresponding synchronize_srcu() instance now that a |
| 203 | * sees the new value of sp->completed will also see any preceding | 395 | * grace period has elapsed. |
| 204 | * changes to data structures made by this CPU. This prevents | 396 | */ |
| 205 | * some other CPU from reordering the accesses in its SRCU | 397 | static void wakeme_after_rcu(struct rcu_head *head) |
| 206 | * read-side critical section to precede the corresponding | 398 | { |
| 207 | * srcu_read_lock() -- ensuring that such references will in | 399 | struct rcu_synchronize *rcu; |
| 208 | * fact be protected. | ||
| 209 | * | ||
| 210 | * So it is now safe to do the flip. | ||
| 211 | */ | ||
| 212 | 400 | ||
| 213 | idx = sp->completed & 0x1; | 401 | rcu = container_of(head, struct rcu_synchronize, head); |
| 214 | sp->completed++; | 402 | complete(&rcu->completion); |
| 403 | } | ||
| 215 | 404 | ||
| 216 | sync_func(); /* Force memory barrier on all CPUs. */ | 405 | static void srcu_advance_batches(struct srcu_struct *sp, int trycount); |
| 406 | static void srcu_reschedule(struct srcu_struct *sp); | ||
| 217 | 407 | ||
| 218 | /* | 408 | /* |
| 219 | * At this point, because of the preceding synchronize_sched(), | 409 | * Helper function for synchronize_srcu() and synchronize_srcu_expedited(). |
| 220 | * all srcu_read_lock() calls using the old counters have completed. | 410 | */ |
| 221 | * Their corresponding critical sections might well be still | 411 | static void __synchronize_srcu(struct srcu_struct *sp, int trycount) |
| 222 | * executing, but the srcu_read_lock() primitives themselves | 412 | { |
| 223 | * will have finished executing. We initially give readers | 413 | struct rcu_synchronize rcu; |
| 224 | * an arbitrarily chosen 10 microseconds to get out of their | 414 | struct rcu_head *head = &rcu.head; |
| 225 | * SRCU read-side critical sections, then loop waiting 1/HZ | 415 | bool done = false; |
| 226 | * seconds per iteration. The 10-microsecond value has done | ||
| 227 | * very well in testing. | ||
| 228 | */ | ||
| 229 | |||
| 230 | if (srcu_readers_active_idx(sp, idx)) | ||
| 231 | udelay(SYNCHRONIZE_SRCU_READER_DELAY); | ||
| 232 | while (srcu_readers_active_idx(sp, idx)) | ||
| 233 | schedule_timeout_interruptible(1); | ||
| 234 | 416 | ||
| 235 | sync_func(); /* Force memory barrier on all CPUs. */ | 417 | rcu_lockdep_assert(!lock_is_held(&sp->dep_map) && |
| 418 | !lock_is_held(&rcu_bh_lock_map) && | ||
| 419 | !lock_is_held(&rcu_lock_map) && | ||
| 420 | !lock_is_held(&rcu_sched_lock_map), | ||
| 421 | "Illegal synchronize_srcu() in same-type SRCU (or RCU) read-side critical section"); | ||
| 236 | 422 | ||
| 237 | /* | 423 | init_completion(&rcu.completion); |
| 238 | * The preceding synchronize_sched() forces all srcu_read_unlock() | 424 | |
| 239 | * primitives that were executing concurrently with the preceding | 425 | head->next = NULL; |
| 240 | * for_each_possible_cpu() loop to have completed by this point. | 426 | head->func = wakeme_after_rcu; |
| 241 | * More importantly, it also forces the corresponding SRCU read-side | 427 | spin_lock_irq(&sp->queue_lock); |
| 242 | * critical sections to have also completed, and the corresponding | 428 | if (!sp->running) { |
| 243 | * references to SRCU-protected data items to be dropped. | 429 | /* steal the processing owner */ |
| 244 | * | 430 | sp->running = true; |
| 245 | * Note: | 431 | rcu_batch_queue(&sp->batch_check0, head); |
| 246 | * | 432 | spin_unlock_irq(&sp->queue_lock); |
| 247 | * Despite what you might think at first glance, the | 433 | |
| 248 | * preceding synchronize_sched() -must- be within the | 434 | srcu_advance_batches(sp, trycount); |
| 249 | * critical section ended by the following mutex_unlock(). | 435 | if (!rcu_batch_empty(&sp->batch_done)) { |
| 250 | * Otherwise, a task taking the early exit can race | 436 | BUG_ON(sp->batch_done.head != head); |
| 251 | * with a srcu_read_unlock(), which might have executed | 437 | rcu_batch_dequeue(&sp->batch_done); |
| 252 | * just before the preceding srcu_readers_active() check, | 438 | done = true; |
| 253 | * and whose CPU might have reordered the srcu_read_unlock() | 439 | } |
| 254 | * with the preceding critical section. In this case, there | 440 | /* give the processing owner to work_struct */ |
| 255 | * is nothing preventing the synchronize_sched() task that is | 441 | srcu_reschedule(sp); |
| 256 | * taking the early exit from freeing a data structure that | 442 | } else { |
| 257 | * is still being referenced (out of order) by the task | 443 | rcu_batch_queue(&sp->batch_queue, head); |
| 258 | * doing the srcu_read_unlock(). | 444 | spin_unlock_irq(&sp->queue_lock); |
| 259 | * | 445 | } |
| 260 | * Alternatively, the comparison with "2" on the early exit | ||
| 261 | * could be changed to "3", but this increases synchronize_srcu() | ||
| 262 | * latency for bulk loads. So the current code is preferred. | ||
| 263 | */ | ||
| 264 | 446 | ||
| 265 | mutex_unlock(&sp->mutex); | 447 | if (!done) |
| 448 | wait_for_completion(&rcu.completion); | ||
| 266 | } | 449 | } |
| 267 | 450 | ||
| 268 | /** | 451 | /** |
| @@ -281,7 +464,7 @@ static void __synchronize_srcu(struct srcu_struct *sp, void (*sync_func)(void)) | |||
| 281 | */ | 464 | */ |
| 282 | void synchronize_srcu(struct srcu_struct *sp) | 465 | void synchronize_srcu(struct srcu_struct *sp) |
| 283 | { | 466 | { |
| 284 | __synchronize_srcu(sp, synchronize_sched); | 467 | __synchronize_srcu(sp, SYNCHRONIZE_SRCU_TRYCOUNT); |
| 285 | } | 468 | } |
| 286 | EXPORT_SYMBOL_GPL(synchronize_srcu); | 469 | EXPORT_SYMBOL_GPL(synchronize_srcu); |
| 287 | 470 | ||
| @@ -289,18 +472,11 @@ EXPORT_SYMBOL_GPL(synchronize_srcu); | |||
| 289 | * synchronize_srcu_expedited - Brute-force SRCU grace period | 472 | * synchronize_srcu_expedited - Brute-force SRCU grace period |
| 290 | * @sp: srcu_struct with which to synchronize. | 473 | * @sp: srcu_struct with which to synchronize. |
| 291 | * | 474 | * |
| 292 | * Wait for an SRCU grace period to elapse, but use a "big hammer" | 475 | * Wait for an SRCU grace period to elapse, but be more aggressive about |
| 293 | * approach to force the grace period to end quickly. This consumes | 476 | * spinning rather than blocking when waiting. |
| 294 | * significant time on all CPUs and is unfriendly to real-time workloads, | ||
| 295 | * so is thus not recommended for any sort of common-case code. In fact, | ||
| 296 | * if you are using synchronize_srcu_expedited() in a loop, please | ||
| 297 | * restructure your code to batch your updates, and then use a single | ||
| 298 | * synchronize_srcu() instead. | ||
| 299 | * | 477 | * |
| 300 | * Note that it is illegal to call this function while holding any lock | 478 | * Note that it is illegal to call this function while holding any lock |
| 301 | * that is acquired by a CPU-hotplug notifier. And yes, it is also illegal | 479 | * that is acquired by a CPU-hotplug notifier. It is also illegal to call |
| 302 | * to call this function from a CPU-hotplug notifier. Failing to observe | ||
| 303 | * these restriction will result in deadlock. It is also illegal to call | ||
| 304 | * synchronize_srcu_expedited() from the corresponding SRCU read-side | 480 | * synchronize_srcu_expedited() from the corresponding SRCU read-side |
| 305 | * critical section; doing so will result in deadlock. However, it is | 481 | * critical section; doing so will result in deadlock. However, it is |
| 306 | * perfectly legal to call synchronize_srcu_expedited() on one srcu_struct | 482 | * perfectly legal to call synchronize_srcu_expedited() on one srcu_struct |
| @@ -309,20 +485,166 @@ EXPORT_SYMBOL_GPL(synchronize_srcu); | |||
| 309 | */ | 485 | */ |
| 310 | void synchronize_srcu_expedited(struct srcu_struct *sp) | 486 | void synchronize_srcu_expedited(struct srcu_struct *sp) |
| 311 | { | 487 | { |
| 312 | __synchronize_srcu(sp, synchronize_sched_expedited); | 488 | __synchronize_srcu(sp, SYNCHRONIZE_SRCU_EXP_TRYCOUNT); |
| 313 | } | 489 | } |
| 314 | EXPORT_SYMBOL_GPL(synchronize_srcu_expedited); | 490 | EXPORT_SYMBOL_GPL(synchronize_srcu_expedited); |
| 315 | 491 | ||
| 316 | /** | 492 | /** |
| 493 | * srcu_barrier - Wait until all in-flight call_srcu() callbacks complete. | ||
| 494 | */ | ||
| 495 | void srcu_barrier(struct srcu_struct *sp) | ||
| 496 | { | ||
| 497 | synchronize_srcu(sp); | ||
| 498 | } | ||
| 499 | EXPORT_SYMBOL_GPL(srcu_barrier); | ||
| 500 | |||
| 501 | /** | ||
| 317 | * srcu_batches_completed - return batches completed. | 502 | * srcu_batches_completed - return batches completed. |
| 318 | * @sp: srcu_struct on which to report batch completion. | 503 | * @sp: srcu_struct on which to report batch completion. |
| 319 | * | 504 | * |
| 320 | * Report the number of batches, correlated with, but not necessarily | 505 | * Report the number of batches, correlated with, but not necessarily |
| 321 | * precisely the same as, the number of grace periods that have elapsed. | 506 | * precisely the same as, the number of grace periods that have elapsed. |
| 322 | */ | 507 | */ |
| 323 | |||
| 324 | long srcu_batches_completed(struct srcu_struct *sp) | 508 | long srcu_batches_completed(struct srcu_struct *sp) |
| 325 | { | 509 | { |
| 326 | return sp->completed; | 510 | return sp->completed; |
| 327 | } | 511 | } |
| 328 | EXPORT_SYMBOL_GPL(srcu_batches_completed); | 512 | EXPORT_SYMBOL_GPL(srcu_batches_completed); |
| 513 | |||
| 514 | #define SRCU_CALLBACK_BATCH 10 | ||
| 515 | #define SRCU_INTERVAL 1 | ||
| 516 | |||
| 517 | /* | ||
| 518 | * Move any new SRCU callbacks to the first stage of the SRCU grace | ||
| 519 | * period pipeline. | ||
| 520 | */ | ||
| 521 | static void srcu_collect_new(struct srcu_struct *sp) | ||
| 522 | { | ||
| 523 | if (!rcu_batch_empty(&sp->batch_queue)) { | ||
| 524 | spin_lock_irq(&sp->queue_lock); | ||
| 525 | rcu_batch_move(&sp->batch_check0, &sp->batch_queue); | ||
| 526 | spin_unlock_irq(&sp->queue_lock); | ||
| 527 | } | ||
| 528 | } | ||
| 529 | |||
| 530 | /* | ||
| 531 | * Core SRCU state machine. Advance callbacks from ->batch_check0 to | ||
| 532 | * ->batch_check1 and then to ->batch_done as readers drain. | ||
| 533 | */ | ||
| 534 | static void srcu_advance_batches(struct srcu_struct *sp, int trycount) | ||
| 535 | { | ||
| 536 | int idx = 1 ^ (sp->completed & 1); | ||
| 537 | |||
| 538 | /* | ||
| 539 | * Because readers might be delayed for an extended period after | ||
| 540 | * fetching ->completed for their index, at any point in time there | ||
| 541 | * might well be readers using both idx=0 and idx=1. We therefore | ||
| 542 | * need to wait for readers to clear from both index values before | ||
| 543 | * invoking a callback. | ||
| 544 | */ | ||
| 545 | |||
| 546 | if (rcu_batch_empty(&sp->batch_check0) && | ||
| 547 | rcu_batch_empty(&sp->batch_check1)) | ||
| 548 | return; /* no callbacks need to be advanced */ | ||
| 549 | |||
| 550 | if (!try_check_zero(sp, idx, trycount)) | ||
| 551 | return; /* failed to advance, will try after SRCU_INTERVAL */ | ||
| 552 | |||
| 553 | /* | ||
| 554 | * The callbacks in ->batch_check1 have already done with their | ||
| 555 | * first zero check and flip back when they were enqueued on | ||
| 556 | * ->batch_check0 in a previous invocation of srcu_advance_batches(). | ||
| 557 | * (Presumably try_check_zero() returned false during that | ||
| 558 | * invocation, leaving the callbacks stranded on ->batch_check1.) | ||
| 559 | * They are therefore ready to invoke, so move them to ->batch_done. | ||
| 560 | */ | ||
| 561 | rcu_batch_move(&sp->batch_done, &sp->batch_check1); | ||
| 562 | |||
| 563 | if (rcu_batch_empty(&sp->batch_check0)) | ||
| 564 | return; /* no callbacks need to be advanced */ | ||
| 565 | srcu_flip(sp); | ||
| 566 | |||
| 567 | /* | ||
| 568 | * The callbacks in ->batch_check0 just finished their | ||
| 569 | * first check zero and flip, so move them to ->batch_check1 | ||
| 570 | * for future checking on the other idx. | ||
| 571 | */ | ||
| 572 | rcu_batch_move(&sp->batch_check1, &sp->batch_check0); | ||
| 573 | |||
| 574 | /* | ||
| 575 | * SRCU read-side critical sections are normally short, so check | ||
| 576 | * at least twice in quick succession after a flip. | ||
| 577 | */ | ||
| 578 | trycount = trycount < 2 ? 2 : trycount; | ||
| 579 | if (!try_check_zero(sp, idx^1, trycount)) | ||
| 580 | return; /* failed to advance, will try after SRCU_INTERVAL */ | ||
| 581 | |||
| 582 | /* | ||
| 583 | * The callbacks in ->batch_check1 have now waited for all | ||
| 584 | * pre-existing readers using both idx values. They are therefore | ||
| 585 | * ready to invoke, so move them to ->batch_done. | ||
| 586 | */ | ||
| 587 | rcu_batch_move(&sp->batch_done, &sp->batch_check1); | ||
| 588 | } | ||
| 589 | |||
| 590 | /* | ||
| 591 | * Invoke a limited number of SRCU callbacks that have passed through | ||
| 592 | * their grace period. If there are more to do, SRCU will reschedule | ||
| 593 | * the workqueue. | ||
| 594 | */ | ||
| 595 | static void srcu_invoke_callbacks(struct srcu_struct *sp) | ||
| 596 | { | ||
| 597 | int i; | ||
| 598 | struct rcu_head *head; | ||
| 599 | |||
| 600 | for (i = 0; i < SRCU_CALLBACK_BATCH; i++) { | ||
| 601 | head = rcu_batch_dequeue(&sp->batch_done); | ||
| 602 | if (!head) | ||
| 603 | break; | ||
| 604 | local_bh_disable(); | ||
| 605 | head->func(head); | ||
| 606 | local_bh_enable(); | ||
| 607 | } | ||
| 608 | } | ||
| 609 | |||
| 610 | /* | ||
| 611 | * Finished one round of SRCU grace period. Start another if there are | ||
| 612 | * more SRCU callbacks queued, otherwise put SRCU into not-running state. | ||
| 613 | */ | ||
| 614 | static void srcu_reschedule(struct srcu_struct *sp) | ||
| 615 | { | ||
| 616 | bool pending = true; | ||
| 617 | |||
| 618 | if (rcu_batch_empty(&sp->batch_done) && | ||
| 619 | rcu_batch_empty(&sp->batch_check1) && | ||
| 620 | rcu_batch_empty(&sp->batch_check0) && | ||
| 621 | rcu_batch_empty(&sp->batch_queue)) { | ||
| 622 | spin_lock_irq(&sp->queue_lock); | ||
| 623 | if (rcu_batch_empty(&sp->batch_done) && | ||
| 624 | rcu_batch_empty(&sp->batch_check1) && | ||
| 625 | rcu_batch_empty(&sp->batch_check0) && | ||
| 626 | rcu_batch_empty(&sp->batch_queue)) { | ||
| 627 | sp->running = false; | ||
| 628 | pending = false; | ||
| 629 | } | ||
| 630 | spin_unlock_irq(&sp->queue_lock); | ||
| 631 | } | ||
| 632 | |||
| 633 | if (pending) | ||
| 634 | queue_delayed_work(system_nrt_wq, &sp->work, SRCU_INTERVAL); | ||
| 635 | } | ||
| 636 | |||
| 637 | /* | ||
| 638 | * This is the work-queue function that handles SRCU grace periods. | ||
| 639 | */ | ||
| 640 | static void process_srcu(struct work_struct *work) | ||
| 641 | { | ||
| 642 | struct srcu_struct *sp; | ||
| 643 | |||
| 644 | sp = container_of(work, struct srcu_struct, work.work); | ||
| 645 | |||
| 646 | srcu_collect_new(sp); | ||
| 647 | srcu_advance_batches(sp, 1); | ||
| 648 | srcu_invoke_callbacks(sp); | ||
| 649 | srcu_reschedule(sp); | ||
| 650 | } | ||
