diff options
author | David Howells <dhowells@redhat.com> | 2009-04-28 10:01:38 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-04-29 08:15:55 -0400 |
commit | 50fa610a3b6ba7cf91d7a92229177dfaff2b81a1 (patch) | |
tree | 0369fc3269a18f8d2ffcedaebe4ac875bbe9870c | |
parent | 56a50adda49b2020156616c4eb15353e0f9ad7de (diff) |
sched: Document memory barriers implied by sleep/wake-up primitives
Add a section to the memory barriers document to note the implied
memory barriers of sleep primitives (set_current_state() and wrappers)
and wake-up primitives (wake_up() and co.).
Also extend the in-code comments on the wake_up() functions to note
these implied barriers.
[ Impact: add documentation ]
Signed-off-by: David Howells <dhowells@redhat.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
LKML-Reference: <20090428140138.1192.94723.stgit@warthog.procyon.org.uk>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r-- | Documentation/memory-barriers.txt | 129 | ||||
-rw-r--r-- | kernel/sched.c | 23 |
2 files changed, 151 insertions, 1 deletions
diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt index f5b7127f54ac..7f5809eddee6 100644 --- a/Documentation/memory-barriers.txt +++ b/Documentation/memory-barriers.txt | |||
@@ -31,6 +31,7 @@ Contents: | |||
31 | 31 | ||
32 | - Locking functions. | 32 | - Locking functions. |
33 | - Interrupt disabling functions. | 33 | - Interrupt disabling functions. |
34 | - Sleep and wake-up functions. | ||
34 | - Miscellaneous functions. | 35 | - Miscellaneous functions. |
35 | 36 | ||
36 | (*) Inter-CPU locking barrier effects. | 37 | (*) Inter-CPU locking barrier effects. |
@@ -1217,6 +1218,132 @@ barriers are required in such a situation, they must be provided from some | |||
1217 | other means. | 1218 | other means. |
1218 | 1219 | ||
1219 | 1220 | ||
1221 | SLEEP AND WAKE-UP FUNCTIONS | ||
1222 | --------------------------- | ||
1223 | |||
1224 | Sleeping and waking on an event flagged in global data can be viewed as an | ||
1225 | interaction between two pieces of data: the task state of the task waiting for | ||
1226 | the event and the global data used to indicate the event. To make sure that | ||
1227 | these appear to happen in the right order, the primitives to begin the process | ||
1228 | of going to sleep, and the primitives to initiate a wake up imply certain | ||
1229 | barriers. | ||
1230 | |||
1231 | Firstly, the sleeper normally follows something like this sequence of events: | ||
1232 | |||
1233 | for (;;) { | ||
1234 | set_current_state(TASK_UNINTERRUPTIBLE); | ||
1235 | if (event_indicated) | ||
1236 | break; | ||
1237 | schedule(); | ||
1238 | } | ||
1239 | |||
1240 | A general memory barrier is interpolated automatically by set_current_state() | ||
1241 | after it has altered the task state: | ||
1242 | |||
1243 | CPU 1 | ||
1244 | =============================== | ||
1245 | set_current_state(); | ||
1246 | set_mb(); | ||
1247 | STORE current->state | ||
1248 | <general barrier> | ||
1249 | LOAD event_indicated | ||
1250 | |||
1251 | set_current_state() may be wrapped by: | ||
1252 | |||
1253 | prepare_to_wait(); | ||
1254 | prepare_to_wait_exclusive(); | ||
1255 | |||
1256 | which therefore also imply a general memory barrier after setting the state. | ||
1257 | The whole sequence above is available in various canned forms, all of which | ||
1258 | interpolate the memory barrier in the right place: | ||
1259 | |||
1260 | wait_event(); | ||
1261 | wait_event_interruptible(); | ||
1262 | wait_event_interruptible_exclusive(); | ||
1263 | wait_event_interruptible_timeout(); | ||
1264 | wait_event_killable(); | ||
1265 | wait_event_timeout(); | ||
1266 | wait_on_bit(); | ||
1267 | wait_on_bit_lock(); | ||
1268 | |||
1269 | |||
1270 | Secondly, code that performs a wake up normally follows something like this: | ||
1271 | |||
1272 | event_indicated = 1; | ||
1273 | wake_up(&event_wait_queue); | ||
1274 | |||
1275 | or: | ||
1276 | |||
1277 | event_indicated = 1; | ||
1278 | wake_up_process(event_daemon); | ||
1279 | |||
1280 | A write memory barrier is implied by wake_up() and co. if and only if they wake | ||
1281 | something up. The barrier occurs before the task state is cleared, and so sits | ||
1282 | between the STORE to indicate the event and the STORE to set TASK_RUNNING: | ||
1283 | |||
1284 | CPU 1 CPU 2 | ||
1285 | =============================== =============================== | ||
1286 | set_current_state(); STORE event_indicated | ||
1287 | set_mb(); wake_up(); | ||
1288 | STORE current->state <write barrier> | ||
1289 | <general barrier> STORE current->state | ||
1290 | LOAD event_indicated | ||
1291 | |||
1292 | The available waker functions include: | ||
1293 | |||
1294 | complete(); | ||
1295 | wake_up(); | ||
1296 | wake_up_all(); | ||
1297 | wake_up_bit(); | ||
1298 | wake_up_interruptible(); | ||
1299 | wake_up_interruptible_all(); | ||
1300 | wake_up_interruptible_nr(); | ||
1301 | wake_up_interruptible_poll(); | ||
1302 | wake_up_interruptible_sync(); | ||
1303 | wake_up_interruptible_sync_poll(); | ||
1304 | wake_up_locked(); | ||
1305 | wake_up_locked_poll(); | ||
1306 | wake_up_nr(); | ||
1307 | wake_up_poll(); | ||
1308 | wake_up_process(); | ||
1309 | |||
1310 | |||
1311 | [!] Note that the memory barriers implied by the sleeper and the waker do _not_ | ||
1312 | order multiple stores before the wake-up with respect to loads of those stored | ||
1313 | values after the sleeper has called set_current_state(). For instance, if the | ||
1314 | sleeper does: | ||
1315 | |||
1316 | set_current_state(TASK_INTERRUPTIBLE); | ||
1317 | if (event_indicated) | ||
1318 | break; | ||
1319 | __set_current_state(TASK_RUNNING); | ||
1320 | do_something(my_data); | ||
1321 | |||
1322 | and the waker does: | ||
1323 | |||
1324 | my_data = value; | ||
1325 | event_indicated = 1; | ||
1326 | wake_up(&event_wait_queue); | ||
1327 | |||
1328 | there's no guarantee that the change to event_indicated will be perceived by | ||
1329 | the sleeper as coming after the change to my_data. In such a circumstance, the | ||
1330 | code on both sides must interpolate its own memory barriers between the | ||
1331 | separate data accesses. Thus the above sleeper ought to do: | ||
1332 | |||
1333 | set_current_state(TASK_INTERRUPTIBLE); | ||
1334 | if (event_indicated) { | ||
1335 | smp_rmb(); | ||
1336 | do_something(my_data); | ||
1337 | } | ||
1338 | |||
1339 | and the waker should do: | ||
1340 | |||
1341 | my_data = value; | ||
1342 | smp_wmb(); | ||
1343 | event_indicated = 1; | ||
1344 | wake_up(&event_wait_queue); | ||
1345 | |||
1346 | |||
1220 | MISCELLANEOUS FUNCTIONS | 1347 | MISCELLANEOUS FUNCTIONS |
1221 | ----------------------- | 1348 | ----------------------- |
1222 | 1349 | ||
@@ -1366,7 +1493,7 @@ WHERE ARE MEMORY BARRIERS NEEDED? | |||
1366 | 1493 | ||
1367 | Under normal operation, memory operation reordering is generally not going to | 1494 | Under normal operation, memory operation reordering is generally not going to |
1368 | be a problem as a single-threaded linear piece of code will still appear to | 1495 | be a problem as a single-threaded linear piece of code will still appear to |
1369 | work correctly, even if it's in an SMP kernel. There are, however, three | 1496 | work correctly, even if it's in an SMP kernel. There are, however, four |
1370 | circumstances in which reordering definitely _could_ be a problem: | 1497 | circumstances in which reordering definitely _could_ be a problem: |
1371 | 1498 | ||
1372 | (*) Interprocessor interaction. | 1499 | (*) Interprocessor interaction. |
diff --git a/kernel/sched.c b/kernel/sched.c index b902e587a3a0..fd0c2cee3f35 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -2458,6 +2458,17 @@ out: | |||
2458 | return success; | 2458 | return success; |
2459 | } | 2459 | } |
2460 | 2460 | ||
2461 | /** | ||
2462 | * wake_up_process - Wake up a specific process | ||
2463 | * @p: The process to be woken up. | ||
2464 | * | ||
2465 | * Attempt to wake up the nominated process and move it to the set of runnable | ||
2466 | * processes. Returns 1 if the process was woken up, 0 if it was already | ||
2467 | * running. | ||
2468 | * | ||
2469 | * It may be assumed that this function implies a write memory barrier before | ||
2470 | * changing the task state if and only if any tasks are woken up. | ||
2471 | */ | ||
2461 | int wake_up_process(struct task_struct *p) | 2472 | int wake_up_process(struct task_struct *p) |
2462 | { | 2473 | { |
2463 | return try_to_wake_up(p, TASK_ALL, 0); | 2474 | return try_to_wake_up(p, TASK_ALL, 0); |
@@ -5241,6 +5252,9 @@ void __wake_up_common(wait_queue_head_t *q, unsigned int mode, | |||
5241 | * @mode: which threads | 5252 | * @mode: which threads |
5242 | * @nr_exclusive: how many wake-one or wake-many threads to wake up | 5253 | * @nr_exclusive: how many wake-one or wake-many threads to wake up |
5243 | * @key: is directly passed to the wakeup function | 5254 | * @key: is directly passed to the wakeup function |
5255 | * | ||
5256 | * It may be assumed that this function implies a write memory barrier before | ||
5257 | * changing the task state if and only if any tasks are woken up. | ||
5244 | */ | 5258 | */ |
5245 | void __wake_up(wait_queue_head_t *q, unsigned int mode, | 5259 | void __wake_up(wait_queue_head_t *q, unsigned int mode, |
5246 | int nr_exclusive, void *key) | 5260 | int nr_exclusive, void *key) |
@@ -5279,6 +5293,9 @@ void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key) | |||
5279 | * with each other. This can prevent needless bouncing between CPUs. | 5293 | * with each other. This can prevent needless bouncing between CPUs. |
5280 | * | 5294 | * |
5281 | * On UP it can prevent extra preemption. | 5295 | * On UP it can prevent extra preemption. |
5296 | * | ||
5297 | * It may be assumed that this function implies a write memory barrier before | ||
5298 | * changing the task state if and only if any tasks are woken up. | ||
5282 | */ | 5299 | */ |
5283 | void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode, | 5300 | void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode, |
5284 | int nr_exclusive, void *key) | 5301 | int nr_exclusive, void *key) |
@@ -5315,6 +5332,9 @@ EXPORT_SYMBOL_GPL(__wake_up_sync); /* For internal use only */ | |||
5315 | * awakened in the same order in which they were queued. | 5332 | * awakened in the same order in which they were queued. |
5316 | * | 5333 | * |
5317 | * See also complete_all(), wait_for_completion() and related routines. | 5334 | * See also complete_all(), wait_for_completion() and related routines. |
5335 | * | ||
5336 | * It may be assumed that this function implies a write memory barrier before | ||
5337 | * changing the task state if and only if any tasks are woken up. | ||
5318 | */ | 5338 | */ |
5319 | void complete(struct completion *x) | 5339 | void complete(struct completion *x) |
5320 | { | 5340 | { |
@@ -5332,6 +5352,9 @@ EXPORT_SYMBOL(complete); | |||
5332 | * @x: holds the state of this particular completion | 5352 | * @x: holds the state of this particular completion |
5333 | * | 5353 | * |
5334 | * This will wake up all threads waiting on this particular completion event. | 5354 | * This will wake up all threads waiting on this particular completion event. |
5355 | * | ||
5356 | * It may be assumed that this function implies a write memory barrier before | ||
5357 | * changing the task state if and only if any tasks are woken up. | ||
5335 | */ | 5358 | */ |
5336 | void complete_all(struct completion *x) | 5359 | void complete_all(struct completion *x) |
5337 | { | 5360 | { |