diff options
| -rw-r--r-- | Documentation/memory-barriers.txt | 129 | ||||
| -rw-r--r-- | kernel/sched.c | 23 |
2 files changed, 151 insertions, 1 deletions
diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt index f5b7127f54ac..7f5809eddee6 100644 --- a/Documentation/memory-barriers.txt +++ b/Documentation/memory-barriers.txt | |||
| @@ -31,6 +31,7 @@ Contents: | |||
| 31 | 31 | ||
| 32 | - Locking functions. | 32 | - Locking functions. |
| 33 | - Interrupt disabling functions. | 33 | - Interrupt disabling functions. |
| 34 | - Sleep and wake-up functions. | ||
| 34 | - Miscellaneous functions. | 35 | - Miscellaneous functions. |
| 35 | 36 | ||
| 36 | (*) Inter-CPU locking barrier effects. | 37 | (*) Inter-CPU locking barrier effects. |
| @@ -1217,6 +1218,132 @@ barriers are required in such a situation, they must be provided from some | |||
| 1217 | other means. | 1218 | other means. |
| 1218 | 1219 | ||
| 1219 | 1220 | ||
| 1221 | SLEEP AND WAKE-UP FUNCTIONS | ||
| 1222 | --------------------------- | ||
| 1223 | |||
| 1224 | Sleeping and waking on an event flagged in global data can be viewed as an | ||
| 1225 | interaction between two pieces of data: the task state of the task waiting for | ||
| 1226 | the event and the global data used to indicate the event. To make sure that | ||
| 1227 | these appear to happen in the right order, the primitives to begin the process | ||
| 1228 | of going to sleep, and the primitives to initiate a wake up imply certain | ||
| 1229 | barriers. | ||
| 1230 | |||
| 1231 | Firstly, the sleeper normally follows something like this sequence of events: | ||
| 1232 | |||
| 1233 | for (;;) { | ||
| 1234 | set_current_state(TASK_UNINTERRUPTIBLE); | ||
| 1235 | if (event_indicated) | ||
| 1236 | break; | ||
| 1237 | schedule(); | ||
| 1238 | } | ||
| 1239 | |||
| 1240 | A general memory barrier is interpolated automatically by set_current_state() | ||
| 1241 | after it has altered the task state: | ||
| 1242 | |||
| 1243 | CPU 1 | ||
| 1244 | =============================== | ||
| 1245 | set_current_state(); | ||
| 1246 | set_mb(); | ||
| 1247 | STORE current->state | ||
| 1248 | <general barrier> | ||
| 1249 | LOAD event_indicated | ||
| 1250 | |||
| 1251 | set_current_state() may be wrapped by: | ||
| 1252 | |||
| 1253 | prepare_to_wait(); | ||
| 1254 | prepare_to_wait_exclusive(); | ||
| 1255 | |||
| 1256 | which therefore also imply a general memory barrier after setting the state. | ||
| 1257 | The whole sequence above is available in various canned forms, all of which | ||
| 1258 | interpolate the memory barrier in the right place: | ||
| 1259 | |||
| 1260 | wait_event(); | ||
| 1261 | wait_event_interruptible(); | ||
| 1262 | wait_event_interruptible_exclusive(); | ||
| 1263 | wait_event_interruptible_timeout(); | ||
| 1264 | wait_event_killable(); | ||
| 1265 | wait_event_timeout(); | ||
| 1266 | wait_on_bit(); | ||
| 1267 | wait_on_bit_lock(); | ||
| 1268 | |||
| 1269 | |||
| 1270 | Secondly, code that performs a wake up normally follows something like this: | ||
| 1271 | |||
| 1272 | event_indicated = 1; | ||
| 1273 | wake_up(&event_wait_queue); | ||
| 1274 | |||
| 1275 | or: | ||
| 1276 | |||
| 1277 | event_indicated = 1; | ||
| 1278 | wake_up_process(event_daemon); | ||
| 1279 | |||
| 1280 | A write memory barrier is implied by wake_up() and co. if and only if they wake | ||
| 1281 | something up. The barrier occurs before the task state is cleared, and so sits | ||
| 1282 | between the STORE to indicate the event and the STORE to set TASK_RUNNING: | ||
| 1283 | |||
| 1284 | CPU 1 CPU 2 | ||
| 1285 | =============================== =============================== | ||
| 1286 | set_current_state(); STORE event_indicated | ||
| 1287 | set_mb(); wake_up(); | ||
| 1288 | STORE current->state <write barrier> | ||
| 1289 | <general barrier> STORE current->state | ||
| 1290 | LOAD event_indicated | ||
| 1291 | |||
| 1292 | The available waker functions include: | ||
| 1293 | |||
| 1294 | complete(); | ||
| 1295 | wake_up(); | ||
| 1296 | wake_up_all(); | ||
| 1297 | wake_up_bit(); | ||
| 1298 | wake_up_interruptible(); | ||
| 1299 | wake_up_interruptible_all(); | ||
| 1300 | wake_up_interruptible_nr(); | ||
| 1301 | wake_up_interruptible_poll(); | ||
| 1302 | wake_up_interruptible_sync(); | ||
| 1303 | wake_up_interruptible_sync_poll(); | ||
| 1304 | wake_up_locked(); | ||
| 1305 | wake_up_locked_poll(); | ||
| 1306 | wake_up_nr(); | ||
| 1307 | wake_up_poll(); | ||
| 1308 | wake_up_process(); | ||
| 1309 | |||
| 1310 | |||
| 1311 | [!] Note that the memory barriers implied by the sleeper and the waker do _not_ | ||
| 1312 | order multiple stores before the wake-up with respect to loads of those stored | ||
| 1313 | values after the sleeper has called set_current_state(). For instance, if the | ||
| 1314 | sleeper does: | ||
| 1315 | |||
| 1316 | set_current_state(TASK_INTERRUPTIBLE); | ||
| 1317 | if (event_indicated) | ||
| 1318 | break; | ||
| 1319 | __set_current_state(TASK_RUNNING); | ||
| 1320 | do_something(my_data); | ||
| 1321 | |||
| 1322 | and the waker does: | ||
| 1323 | |||
| 1324 | my_data = value; | ||
| 1325 | event_indicated = 1; | ||
| 1326 | wake_up(&event_wait_queue); | ||
| 1327 | |||
| 1328 | there's no guarantee that the change to event_indicated will be perceived by | ||
| 1329 | the sleeper as coming after the change to my_data. In such a circumstance, the | ||
| 1330 | code on both sides must interpolate its own memory barriers between the | ||
| 1331 | separate data accesses. Thus the above sleeper ought to do: | ||
| 1332 | |||
| 1333 | set_current_state(TASK_INTERRUPTIBLE); | ||
| 1334 | if (event_indicated) { | ||
| 1335 | smp_rmb(); | ||
| 1336 | do_something(my_data); | ||
| 1337 | } | ||
| 1338 | |||
| 1339 | and the waker should do: | ||
| 1340 | |||
| 1341 | my_data = value; | ||
| 1342 | smp_wmb(); | ||
| 1343 | event_indicated = 1; | ||
| 1344 | wake_up(&event_wait_queue); | ||
| 1345 | |||
| 1346 | |||
| 1220 | MISCELLANEOUS FUNCTIONS | 1347 | MISCELLANEOUS FUNCTIONS |
| 1221 | ----------------------- | 1348 | ----------------------- |
| 1222 | 1349 | ||
| @@ -1366,7 +1493,7 @@ WHERE ARE MEMORY BARRIERS NEEDED? | |||
| 1366 | 1493 | ||
| 1367 | Under normal operation, memory operation reordering is generally not going to | 1494 | Under normal operation, memory operation reordering is generally not going to |
| 1368 | be a problem as a single-threaded linear piece of code will still appear to | 1495 | be a problem as a single-threaded linear piece of code will still appear to |
| 1369 | work correctly, even if it's in an SMP kernel. There are, however, three | 1496 | work correctly, even if it's in an SMP kernel. There are, however, four |
| 1370 | circumstances in which reordering definitely _could_ be a problem: | 1497 | circumstances in which reordering definitely _could_ be a problem: |
| 1371 | 1498 | ||
| 1372 | (*) Interprocessor interaction. | 1499 | (*) Interprocessor interaction. |
diff --git a/kernel/sched.c b/kernel/sched.c index b902e587a3a0..fd0c2cee3f35 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
| @@ -2458,6 +2458,17 @@ out: | |||
| 2458 | return success; | 2458 | return success; |
| 2459 | } | 2459 | } |
| 2460 | 2460 | ||
| 2461 | /** | ||
| 2462 | * wake_up_process - Wake up a specific process | ||
| 2463 | * @p: The process to be woken up. | ||
| 2464 | * | ||
| 2465 | * Attempt to wake up the nominated process and move it to the set of runnable | ||
| 2466 | * processes. Returns 1 if the process was woken up, 0 if it was already | ||
| 2467 | * running. | ||
| 2468 | * | ||
| 2469 | * It may be assumed that this function implies a write memory barrier before | ||
| 2470 | * changing the task state if and only if any tasks are woken up. | ||
| 2471 | */ | ||
| 2461 | int wake_up_process(struct task_struct *p) | 2472 | int wake_up_process(struct task_struct *p) |
| 2462 | { | 2473 | { |
| 2463 | return try_to_wake_up(p, TASK_ALL, 0); | 2474 | return try_to_wake_up(p, TASK_ALL, 0); |
| @@ -5241,6 +5252,9 @@ void __wake_up_common(wait_queue_head_t *q, unsigned int mode, | |||
| 5241 | * @mode: which threads | 5252 | * @mode: which threads |
| 5242 | * @nr_exclusive: how many wake-one or wake-many threads to wake up | 5253 | * @nr_exclusive: how many wake-one or wake-many threads to wake up |
| 5243 | * @key: is directly passed to the wakeup function | 5254 | * @key: is directly passed to the wakeup function |
| 5255 | * | ||
| 5256 | * It may be assumed that this function implies a write memory barrier before | ||
| 5257 | * changing the task state if and only if any tasks are woken up. | ||
| 5244 | */ | 5258 | */ |
| 5245 | void __wake_up(wait_queue_head_t *q, unsigned int mode, | 5259 | void __wake_up(wait_queue_head_t *q, unsigned int mode, |
| 5246 | int nr_exclusive, void *key) | 5260 | int nr_exclusive, void *key) |
| @@ -5279,6 +5293,9 @@ void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key) | |||
| 5279 | * with each other. This can prevent needless bouncing between CPUs. | 5293 | * with each other. This can prevent needless bouncing between CPUs. |
| 5280 | * | 5294 | * |
| 5281 | * On UP it can prevent extra preemption. | 5295 | * On UP it can prevent extra preemption. |
| 5296 | * | ||
| 5297 | * It may be assumed that this function implies a write memory barrier before | ||
| 5298 | * changing the task state if and only if any tasks are woken up. | ||
| 5282 | */ | 5299 | */ |
| 5283 | void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode, | 5300 | void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode, |
| 5284 | int nr_exclusive, void *key) | 5301 | int nr_exclusive, void *key) |
| @@ -5315,6 +5332,9 @@ EXPORT_SYMBOL_GPL(__wake_up_sync); /* For internal use only */ | |||
| 5315 | * awakened in the same order in which they were queued. | 5332 | * awakened in the same order in which they were queued. |
| 5316 | * | 5333 | * |
| 5317 | * See also complete_all(), wait_for_completion() and related routines. | 5334 | * See also complete_all(), wait_for_completion() and related routines. |
| 5335 | * | ||
| 5336 | * It may be assumed that this function implies a write memory barrier before | ||
| 5337 | * changing the task state if and only if any tasks are woken up. | ||
| 5318 | */ | 5338 | */ |
| 5319 | void complete(struct completion *x) | 5339 | void complete(struct completion *x) |
| 5320 | { | 5340 | { |
| @@ -5332,6 +5352,9 @@ EXPORT_SYMBOL(complete); | |||
| 5332 | * @x: holds the state of this particular completion | 5352 | * @x: holds the state of this particular completion |
| 5333 | * | 5353 | * |
| 5334 | * This will wake up all threads waiting on this particular completion event. | 5354 | * This will wake up all threads waiting on this particular completion event. |
| 5355 | * | ||
| 5356 | * It may be assumed that this function implies a write memory barrier before | ||
| 5357 | * changing the task state if and only if any tasks are woken up. | ||
| 5335 | */ | 5358 | */ |
| 5336 | void complete_all(struct completion *x) | 5359 | void complete_all(struct completion *x) |
| 5337 | { | 5360 | { |
