diff options
| -rw-r--r-- | Documentation/memory-barriers.txt | 129 | ||||
| -rw-r--r-- | kernel/sched.c | 23 |
2 files changed, 151 insertions, 1 deletions
diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt index f5b7127f54ac..7f5809eddee6 100644 --- a/Documentation/memory-barriers.txt +++ b/Documentation/memory-barriers.txt | |||
| @@ -31,6 +31,7 @@ Contents: | |||
| 31 | 31 | ||
| 32 | - Locking functions. | 32 | - Locking functions. |
| 33 | - Interrupt disabling functions. | 33 | - Interrupt disabling functions. |
| 34 | - Sleep and wake-up functions. | ||
| 34 | - Miscellaneous functions. | 35 | - Miscellaneous functions. |
| 35 | 36 | ||
| 36 | (*) Inter-CPU locking barrier effects. | 37 | (*) Inter-CPU locking barrier effects. |
| @@ -1217,6 +1218,132 @@ barriers are required in such a situation, they must be provided from some | |||
| 1217 | other means. | 1218 | other means. |
| 1218 | 1219 | ||
| 1219 | 1220 | ||
| 1221 | SLEEP AND WAKE-UP FUNCTIONS | ||
| 1222 | --------------------------- | ||
| 1223 | |||
| 1224 | Sleeping and waking on an event flagged in global data can be viewed as an | ||
| 1225 | interaction between two pieces of data: the task state of the task waiting for | ||
| 1226 | the event and the global data used to indicate the event. To make sure that | ||
| 1227 | these appear to happen in the right order, the primitives to begin the process | ||
| 1228 | of going to sleep, and the primitives to initiate a wake up imply certain | ||
| 1229 | barriers. | ||
| 1230 | |||
| 1231 | Firstly, the sleeper normally follows something like this sequence of events: | ||
| 1232 | |||
| 1233 | for (;;) { | ||
| 1234 | set_current_state(TASK_UNINTERRUPTIBLE); | ||
| 1235 | if (event_indicated) | ||
| 1236 | break; | ||
| 1237 | schedule(); | ||
| 1238 | } | ||
| 1239 | |||
| 1240 | A general memory barrier is interpolated automatically by set_current_state() | ||
| 1241 | after it has altered the task state: | ||
| 1242 | |||
| 1243 | CPU 1 | ||
| 1244 | =============================== | ||
| 1245 | set_current_state(); | ||
| 1246 | set_mb(); | ||
| 1247 | STORE current->state | ||
| 1248 | <general barrier> | ||
| 1249 | LOAD event_indicated | ||
| 1250 | |||
| 1251 | set_current_state() may be wrapped by: | ||
| 1252 | |||
| 1253 | prepare_to_wait(); | ||
| 1254 | prepare_to_wait_exclusive(); | ||
| 1255 | |||
| 1256 | which therefore also imply a general memory barrier after setting the state. | ||
| 1257 | The whole sequence above is available in various canned forms, all of which | ||
| 1258 | interpolate the memory barrier in the right place: | ||
| 1259 | |||
| 1260 | wait_event(); | ||
| 1261 | wait_event_interruptible(); | ||
| 1262 | wait_event_interruptible_exclusive(); | ||
| 1263 | wait_event_interruptible_timeout(); | ||
| 1264 | wait_event_killable(); | ||
| 1265 | wait_event_timeout(); | ||
| 1266 | wait_on_bit(); | ||
| 1267 | wait_on_bit_lock(); | ||
| 1268 | |||
| 1269 | |||
| 1270 | Secondly, code that performs a wake up normally follows something like this: | ||
| 1271 | |||
| 1272 | event_indicated = 1; | ||
| 1273 | wake_up(&event_wait_queue); | ||
| 1274 | |||
| 1275 | or: | ||
| 1276 | |||
| 1277 | event_indicated = 1; | ||
| 1278 | wake_up_process(event_daemon); | ||
| 1279 | |||
| 1280 | A write memory barrier is implied by wake_up() and co. if and only if they wake | ||
| 1281 | something up. The barrier occurs before the task state is cleared, and so sits | ||
| 1282 | between the STORE to indicate the event and the STORE to set TASK_RUNNING: | ||
| 1283 | |||
| 1284 | CPU 1 CPU 2 | ||
| 1285 | =============================== =============================== | ||
| 1286 | set_current_state(); STORE event_indicated | ||
| 1287 | set_mb(); wake_up(); | ||
| 1288 | STORE current->state <write barrier> | ||
| 1289 | <general barrier> STORE current->state | ||
| 1290 | LOAD event_indicated | ||
| 1291 | |||
| 1292 | The available waker functions include: | ||
| 1293 | |||
| 1294 | complete(); | ||
| 1295 | wake_up(); | ||
| 1296 | wake_up_all(); | ||
| 1297 | wake_up_bit(); | ||
| 1298 | wake_up_interruptible(); | ||
| 1299 | wake_up_interruptible_all(); | ||
| 1300 | wake_up_interruptible_nr(); | ||
| 1301 | wake_up_interruptible_poll(); | ||
| 1302 | wake_up_interruptible_sync(); | ||
| 1303 | wake_up_interruptible_sync_poll(); | ||
| 1304 | wake_up_locked(); | ||
| 1305 | wake_up_locked_poll(); | ||
| 1306 | wake_up_nr(); | ||
| 1307 | wake_up_poll(); | ||
| 1308 | wake_up_process(); | ||
| 1309 | |||
| 1310 | |||
| 1311 | [!] Note that the memory barriers implied by the sleeper and the waker do _not_ | ||
| 1312 | order multiple stores before the wake-up with respect to loads of those stored | ||
| 1313 | values after the sleeper has called set_current_state(). For instance, if the | ||
| 1314 | sleeper does: | ||
| 1315 | |||
| 1316 | set_current_state(TASK_INTERRUPTIBLE); | ||
| 1317 | if (event_indicated) | ||
| 1318 | break; | ||
| 1319 | __set_current_state(TASK_RUNNING); | ||
| 1320 | do_something(my_data); | ||
| 1321 | |||
| 1322 | and the waker does: | ||
| 1323 | |||
| 1324 | my_data = value; | ||
| 1325 | event_indicated = 1; | ||
| 1326 | wake_up(&event_wait_queue); | ||
| 1327 | |||
| 1328 | there's no guarantee that the change to event_indicated will be perceived by | ||
| 1329 | the sleeper as coming after the change to my_data. In such a circumstance, the | ||
| 1330 | code on both sides must interpolate its own memory barriers between the | ||
| 1331 | separate data accesses. Thus the above sleeper ought to do: | ||
| 1332 | |||
| 1333 | set_current_state(TASK_INTERRUPTIBLE); | ||
| 1334 | if (event_indicated) { | ||
| 1335 | smp_rmb(); | ||
| 1336 | do_something(my_data); | ||
| 1337 | } | ||
| 1338 | |||
| 1339 | and the waker should do: | ||
| 1340 | |||
| 1341 | my_data = value; | ||
| 1342 | smp_wmb(); | ||
| 1343 | event_indicated = 1; | ||
| 1344 | wake_up(&event_wait_queue); | ||
| 1345 | |||
| 1346 | |||
| 1220 | MISCELLANEOUS FUNCTIONS | 1347 | MISCELLANEOUS FUNCTIONS |
| 1221 | ----------------------- | 1348 | ----------------------- |
| 1222 | 1349 | ||
| @@ -1366,7 +1493,7 @@ WHERE ARE MEMORY BARRIERS NEEDED? | |||
| 1366 | 1493 | ||
| 1367 | Under normal operation, memory operation reordering is generally not going to | 1494 | Under normal operation, memory operation reordering is generally not going to |
| 1368 | be a problem as a single-threaded linear piece of code will still appear to | 1495 | be a problem as a single-threaded linear piece of code will still appear to |
| 1369 | work correctly, even if it's in an SMP kernel. There are, however, three | 1496 | work correctly, even if it's in an SMP kernel. There are, however, four |
| 1370 | circumstances in which reordering definitely _could_ be a problem: | 1497 | circumstances in which reordering definitely _could_ be a problem: |
| 1371 | 1498 | ||
| 1372 | (*) Interprocessor interaction. | 1499 | (*) Interprocessor interaction. |
diff --git a/kernel/sched.c b/kernel/sched.c index 228acae8821f..c3c04e256560 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
| @@ -2464,6 +2464,17 @@ out: | |||
| 2464 | return success; | 2464 | return success; |
| 2465 | } | 2465 | } |
| 2466 | 2466 | ||
| 2467 | /** | ||
| 2468 | * wake_up_process - Wake up a specific process | ||
| 2469 | * @p: The process to be woken up. | ||
| 2470 | * | ||
| 2471 | * Attempt to wake up the nominated process and move it to the set of runnable | ||
| 2472 | * processes. Returns 1 if the process was woken up, 0 if it was already | ||
| 2473 | * running. | ||
| 2474 | * | ||
| 2475 | * It may be assumed that this function implies a write memory barrier before | ||
| 2476 | * changing the task state if and only if any tasks are woken up. | ||
| 2477 | */ | ||
| 2467 | int wake_up_process(struct task_struct *p) | 2478 | int wake_up_process(struct task_struct *p) |
| 2468 | { | 2479 | { |
| 2469 | return try_to_wake_up(p, TASK_ALL, 0); | 2480 | return try_to_wake_up(p, TASK_ALL, 0); |
| @@ -5425,6 +5436,9 @@ static void __wake_up_common(wait_queue_head_t *q, unsigned int mode, | |||
| 5425 | * @mode: which threads | 5436 | * @mode: which threads |
| 5426 | * @nr_exclusive: how many wake-one or wake-many threads to wake up | 5437 | * @nr_exclusive: how many wake-one or wake-many threads to wake up |
| 5427 | * @key: is directly passed to the wakeup function | 5438 | * @key: is directly passed to the wakeup function |
| 5439 | * | ||
| 5440 | * It may be assumed that this function implies a write memory barrier before | ||
| 5441 | * changing the task state if and only if any tasks are woken up. | ||
| 5428 | */ | 5442 | */ |
| 5429 | void __wake_up(wait_queue_head_t *q, unsigned int mode, | 5443 | void __wake_up(wait_queue_head_t *q, unsigned int mode, |
| 5430 | int nr_exclusive, void *key) | 5444 | int nr_exclusive, void *key) |
| @@ -5463,6 +5477,9 @@ void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key) | |||
| 5463 | * with each other. This can prevent needless bouncing between CPUs. | 5477 | * with each other. This can prevent needless bouncing between CPUs. |
| 5464 | * | 5478 | * |
| 5465 | * On UP it can prevent extra preemption. | 5479 | * On UP it can prevent extra preemption. |
| 5480 | * | ||
| 5481 | * It may be assumed that this function implies a write memory barrier before | ||
| 5482 | * changing the task state if and only if any tasks are woken up. | ||
| 5466 | */ | 5483 | */ |
| 5467 | void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode, | 5484 | void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode, |
| 5468 | int nr_exclusive, void *key) | 5485 | int nr_exclusive, void *key) |
| @@ -5499,6 +5516,9 @@ EXPORT_SYMBOL_GPL(__wake_up_sync); /* For internal use only */ | |||
| 5499 | * awakened in the same order in which they were queued. | 5516 | * awakened in the same order in which they were queued. |
| 5500 | * | 5517 | * |
| 5501 | * See also complete_all(), wait_for_completion() and related routines. | 5518 | * See also complete_all(), wait_for_completion() and related routines. |
| 5519 | * | ||
| 5520 | * It may be assumed that this function implies a write memory barrier before | ||
| 5521 | * changing the task state if and only if any tasks are woken up. | ||
| 5502 | */ | 5522 | */ |
| 5503 | void complete(struct completion *x) | 5523 | void complete(struct completion *x) |
| 5504 | { | 5524 | { |
| @@ -5516,6 +5536,9 @@ EXPORT_SYMBOL(complete); | |||
| 5516 | * @x: holds the state of this particular completion | 5536 | * @x: holds the state of this particular completion |
| 5517 | * | 5537 | * |
| 5518 | * This will wake up all threads waiting on this particular completion event. | 5538 | * This will wake up all threads waiting on this particular completion event. |
| 5539 | * | ||
| 5540 | * It may be assumed that this function implies a write memory barrier before | ||
| 5541 | * changing the task state if and only if any tasks are woken up. | ||
| 5519 | */ | 5542 | */ |
| 5520 | void complete_all(struct completion *x) | 5543 | void complete_all(struct completion *x) |
| 5521 | { | 5544 | { |
