diff options
author | David S. Miller <davem@davemloft.net> | 2010-05-31 08:46:45 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2010-05-31 08:46:45 -0400 |
commit | 64960848abd18d0bcde3f53ffa7ed0b631e6b25d (patch) | |
tree | 8424a1c550a98ce09f127425fde9b7b5f2f5027a /ipc | |
parent | 2903037400a26e7c0cc93ab75a7d62abfacdf485 (diff) | |
parent | 67a3e12b05e055c0415c556a315a3d3eb637e29e (diff) |
Merge branch 'master' of /home/davem/src/GIT/linux-2.6/
Diffstat (limited to 'ipc')
-rw-r--r-- | ipc/msg.c | 12 | ||||
-rw-r--r-- | ipc/sem.c | 322 | ||||
-rw-r--r-- | ipc/shm.c | 11 | ||||
-rw-r--r-- | ipc/util.c | 4 |
4 files changed, 246 insertions, 103 deletions
@@ -345,19 +345,19 @@ copy_msqid_to_user(void __user *buf, struct msqid64_ds *in, int version) | |||
345 | out.msg_rtime = in->msg_rtime; | 345 | out.msg_rtime = in->msg_rtime; |
346 | out.msg_ctime = in->msg_ctime; | 346 | out.msg_ctime = in->msg_ctime; |
347 | 347 | ||
348 | if (in->msg_cbytes > USHORT_MAX) | 348 | if (in->msg_cbytes > USHRT_MAX) |
349 | out.msg_cbytes = USHORT_MAX; | 349 | out.msg_cbytes = USHRT_MAX; |
350 | else | 350 | else |
351 | out.msg_cbytes = in->msg_cbytes; | 351 | out.msg_cbytes = in->msg_cbytes; |
352 | out.msg_lcbytes = in->msg_cbytes; | 352 | out.msg_lcbytes = in->msg_cbytes; |
353 | 353 | ||
354 | if (in->msg_qnum > USHORT_MAX) | 354 | if (in->msg_qnum > USHRT_MAX) |
355 | out.msg_qnum = USHORT_MAX; | 355 | out.msg_qnum = USHRT_MAX; |
356 | else | 356 | else |
357 | out.msg_qnum = in->msg_qnum; | 357 | out.msg_qnum = in->msg_qnum; |
358 | 358 | ||
359 | if (in->msg_qbytes > USHORT_MAX) | 359 | if (in->msg_qbytes > USHRT_MAX) |
360 | out.msg_qbytes = USHORT_MAX; | 360 | out.msg_qbytes = USHRT_MAX; |
361 | else | 361 | else |
362 | out.msg_qbytes = in->msg_qbytes; | 362 | out.msg_qbytes = in->msg_qbytes; |
363 | out.msg_lqbytes = in->msg_qbytes; | 363 | out.msg_lqbytes = in->msg_qbytes; |
@@ -3,56 +3,6 @@ | |||
3 | * Copyright (C) 1992 Krishna Balasubramanian | 3 | * Copyright (C) 1992 Krishna Balasubramanian |
4 | * Copyright (C) 1995 Eric Schenk, Bruno Haible | 4 | * Copyright (C) 1995 Eric Schenk, Bruno Haible |
5 | * | 5 | * |
6 | * IMPLEMENTATION NOTES ON CODE REWRITE (Eric Schenk, January 1995): | ||
7 | * This code underwent a massive rewrite in order to solve some problems | ||
8 | * with the original code. In particular the original code failed to | ||
9 | * wake up processes that were waiting for semval to go to 0 if the | ||
10 | * value went to 0 and was then incremented rapidly enough. In solving | ||
11 | * this problem I have also modified the implementation so that it | ||
12 | * processes pending operations in a FIFO manner, thus give a guarantee | ||
13 | * that processes waiting for a lock on the semaphore won't starve | ||
14 | * unless another locking process fails to unlock. | ||
15 | * In addition the following two changes in behavior have been introduced: | ||
16 | * - The original implementation of semop returned the value | ||
17 | * last semaphore element examined on success. This does not | ||
18 | * match the manual page specifications, and effectively | ||
19 | * allows the user to read the semaphore even if they do not | ||
20 | * have read permissions. The implementation now returns 0 | ||
21 | * on success as stated in the manual page. | ||
22 | * - There is some confusion over whether the set of undo adjustments | ||
23 | * to be performed at exit should be done in an atomic manner. | ||
24 | * That is, if we are attempting to decrement the semval should we queue | ||
25 | * up and wait until we can do so legally? | ||
26 | * The original implementation attempted to do this. | ||
27 | * The current implementation does not do so. This is because I don't | ||
28 | * think it is the right thing (TM) to do, and because I couldn't | ||
29 | * see a clean way to get the old behavior with the new design. | ||
30 | * The POSIX standard and SVID should be consulted to determine | ||
31 | * what behavior is mandated. | ||
32 | * | ||
33 | * Further notes on refinement (Christoph Rohland, December 1998): | ||
34 | * - The POSIX standard says, that the undo adjustments simply should | ||
35 | * redo. So the current implementation is o.K. | ||
36 | * - The previous code had two flaws: | ||
37 | * 1) It actively gave the semaphore to the next waiting process | ||
38 | * sleeping on the semaphore. Since this process did not have the | ||
39 | * cpu this led to many unnecessary context switches and bad | ||
40 | * performance. Now we only check which process should be able to | ||
41 | * get the semaphore and if this process wants to reduce some | ||
42 | * semaphore value we simply wake it up without doing the | ||
43 | * operation. So it has to try to get it later. Thus e.g. the | ||
44 | * running process may reacquire the semaphore during the current | ||
45 | * time slice. If it only waits for zero or increases the semaphore, | ||
46 | * we do the operation in advance and wake it up. | ||
47 | * 2) It did not wake up all zero waiting processes. We try to do | ||
48 | * better but only get the semops right which only wait for zero or | ||
49 | * increase. If there are decrement operations in the operations | ||
50 | * array we do the same as before. | ||
51 | * | ||
52 | * With the incarnation of O(1) scheduler, it becomes unnecessary to perform | ||
53 | * check/retry algorithm for waking up blocked processes as the new scheduler | ||
54 | * is better at handling thread switch than the old one. | ||
55 | * | ||
56 | * /proc/sysvipc/sem support (c) 1999 Dragos Acostachioaie <dragos@iname.com> | 6 | * /proc/sysvipc/sem support (c) 1999 Dragos Acostachioaie <dragos@iname.com> |
57 | * | 7 | * |
58 | * SMP-threaded, sysctl's added | 8 | * SMP-threaded, sysctl's added |
@@ -61,6 +11,8 @@ | |||
61 | * (c) 2001 Red Hat Inc | 11 | * (c) 2001 Red Hat Inc |
62 | * Lockless wakeup | 12 | * Lockless wakeup |
63 | * (c) 2003 Manfred Spraul <manfred@colorfullife.com> | 13 | * (c) 2003 Manfred Spraul <manfred@colorfullife.com> |
14 | * Further wakeup optimizations, documentation | ||
15 | * (c) 2010 Manfred Spraul <manfred@colorfullife.com> | ||
64 | * | 16 | * |
65 | * support for audit of ipc object properties and permission changes | 17 | * support for audit of ipc object properties and permission changes |
66 | * Dustin Kirkland <dustin.kirkland@us.ibm.com> | 18 | * Dustin Kirkland <dustin.kirkland@us.ibm.com> |
@@ -68,6 +20,57 @@ | |||
68 | * namespaces support | 20 | * namespaces support |
69 | * OpenVZ, SWsoft Inc. | 21 | * OpenVZ, SWsoft Inc. |
70 | * Pavel Emelianov <xemul@openvz.org> | 22 | * Pavel Emelianov <xemul@openvz.org> |
23 | * | ||
24 | * Implementation notes: (May 2010) | ||
25 | * This file implements System V semaphores. | ||
26 | * | ||
27 | * User space visible behavior: | ||
28 | * - FIFO ordering for semop() operations (just FIFO, not starvation | ||
29 | * protection) | ||
30 | * - multiple semaphore operations that alter the same semaphore in | ||
31 | * one semop() are handled. | ||
32 | * - sem_ctime (time of last semctl()) is updated in the IPC_SET, SETVAL and | ||
33 | * SETALL calls. | ||
34 | * - two Linux specific semctl() commands: SEM_STAT, SEM_INFO. | ||
35 | * - undo adjustments at process exit are limited to 0..SEMVMX. | ||
36 | * - namespace are supported. | ||
37 | * - SEMMSL, SEMMNS, SEMOPM and SEMMNI can be configured at runtine by writing | ||
38 | * to /proc/sys/kernel/sem. | ||
39 | * - statistics about the usage are reported in /proc/sysvipc/sem. | ||
40 | * | ||
41 | * Internals: | ||
42 | * - scalability: | ||
43 | * - all global variables are read-mostly. | ||
44 | * - semop() calls and semctl(RMID) are synchronized by RCU. | ||
45 | * - most operations do write operations (actually: spin_lock calls) to | ||
46 | * the per-semaphore array structure. | ||
47 | * Thus: Perfect SMP scaling between independent semaphore arrays. | ||
48 | * If multiple semaphores in one array are used, then cache line | ||
49 | * trashing on the semaphore array spinlock will limit the scaling. | ||
50 | * - semncnt and semzcnt are calculated on demand in count_semncnt() and | ||
51 | * count_semzcnt() | ||
52 | * - the task that performs a successful semop() scans the list of all | ||
53 | * sleeping tasks and completes any pending operations that can be fulfilled. | ||
54 | * Semaphores are actively given to waiting tasks (necessary for FIFO). | ||
55 | * (see update_queue()) | ||
56 | * - To improve the scalability, the actual wake-up calls are performed after | ||
57 | * dropping all locks. (see wake_up_sem_queue_prepare(), | ||
58 | * wake_up_sem_queue_do()) | ||
59 | * - All work is done by the waker, the woken up task does not have to do | ||
60 | * anything - not even acquiring a lock or dropping a refcount. | ||
61 | * - A woken up task may not even touch the semaphore array anymore, it may | ||
62 | * have been destroyed already by a semctl(RMID). | ||
63 | * - The synchronizations between wake-ups due to a timeout/signal and a | ||
64 | * wake-up due to a completed semaphore operation is achieved by using an | ||
65 | * intermediate state (IN_WAKEUP). | ||
66 | * - UNDO values are stored in an array (one per process and per | ||
67 | * semaphore array, lazily allocated). For backwards compatibility, multiple | ||
68 | * modes for the UNDO variables are supported (per process, per thread) | ||
69 | * (see copy_semundo, CLONE_SYSVSEM) | ||
70 | * - There are two lists of the pending operations: a per-array list | ||
71 | * and per-semaphore list (stored in the array). This allows to achieve FIFO | ||
72 | * ordering without always scanning all pending operations. | ||
73 | * The worst-case behavior is nevertheless O(N^2) for N wakeups. | ||
71 | */ | 74 | */ |
72 | 75 | ||
73 | #include <linux/slab.h> | 76 | #include <linux/slab.h> |
@@ -381,7 +384,6 @@ static int try_atomic_semop (struct sem_array * sma, struct sembuf * sops, | |||
381 | sop--; | 384 | sop--; |
382 | } | 385 | } |
383 | 386 | ||
384 | sma->sem_otime = get_seconds(); | ||
385 | return 0; | 387 | return 0; |
386 | 388 | ||
387 | out_of_range: | 389 | out_of_range: |
@@ -404,25 +406,51 @@ undo: | |||
404 | return result; | 406 | return result; |
405 | } | 407 | } |
406 | 408 | ||
407 | /* | 409 | /** wake_up_sem_queue_prepare(q, error): Prepare wake-up |
408 | * Wake up a process waiting on the sem queue with a given error. | 410 | * @q: queue entry that must be signaled |
409 | * The queue is invalid (may not be accessed) after the function returns. | 411 | * @error: Error value for the signal |
412 | * | ||
413 | * Prepare the wake-up of the queue entry q. | ||
410 | */ | 414 | */ |
411 | static void wake_up_sem_queue(struct sem_queue *q, int error) | 415 | static void wake_up_sem_queue_prepare(struct list_head *pt, |
416 | struct sem_queue *q, int error) | ||
412 | { | 417 | { |
413 | /* | 418 | if (list_empty(pt)) { |
414 | * Hold preempt off so that we don't get preempted and have the | 419 | /* |
415 | * wakee busy-wait until we're scheduled back on. We're holding | 420 | * Hold preempt off so that we don't get preempted and have the |
416 | * locks here so it may not strictly be needed, however if the | 421 | * wakee busy-wait until we're scheduled back on. |
417 | * locks become preemptible then this prevents such a problem. | 422 | */ |
418 | */ | 423 | preempt_disable(); |
419 | preempt_disable(); | 424 | } |
420 | q->status = IN_WAKEUP; | 425 | q->status = IN_WAKEUP; |
421 | wake_up_process(q->sleeper); | 426 | q->pid = error; |
422 | /* hands-off: q can disappear immediately after writing q->status. */ | 427 | |
423 | smp_wmb(); | 428 | list_add_tail(&q->simple_list, pt); |
424 | q->status = error; | 429 | } |
425 | preempt_enable(); | 430 | |
431 | /** | ||
432 | * wake_up_sem_queue_do(pt) - do the actual wake-up | ||
433 | * @pt: list of tasks to be woken up | ||
434 | * | ||
435 | * Do the actual wake-up. | ||
436 | * The function is called without any locks held, thus the semaphore array | ||
437 | * could be destroyed already and the tasks can disappear as soon as the | ||
438 | * status is set to the actual return code. | ||
439 | */ | ||
440 | static void wake_up_sem_queue_do(struct list_head *pt) | ||
441 | { | ||
442 | struct sem_queue *q, *t; | ||
443 | int did_something; | ||
444 | |||
445 | did_something = !list_empty(pt); | ||
446 | list_for_each_entry_safe(q, t, pt, simple_list) { | ||
447 | wake_up_process(q->sleeper); | ||
448 | /* q can disappear immediately after writing q->status. */ | ||
449 | smp_wmb(); | ||
450 | q->status = q->pid; | ||
451 | } | ||
452 | if (did_something) | ||
453 | preempt_enable(); | ||
426 | } | 454 | } |
427 | 455 | ||
428 | static void unlink_queue(struct sem_array *sma, struct sem_queue *q) | 456 | static void unlink_queue(struct sem_array *sma, struct sem_queue *q) |
@@ -434,22 +462,90 @@ static void unlink_queue(struct sem_array *sma, struct sem_queue *q) | |||
434 | sma->complex_count--; | 462 | sma->complex_count--; |
435 | } | 463 | } |
436 | 464 | ||
465 | /** check_restart(sma, q) | ||
466 | * @sma: semaphore array | ||
467 | * @q: the operation that just completed | ||
468 | * | ||
469 | * update_queue is O(N^2) when it restarts scanning the whole queue of | ||
470 | * waiting operations. Therefore this function checks if the restart is | ||
471 | * really necessary. It is called after a previously waiting operation | ||
472 | * was completed. | ||
473 | */ | ||
474 | static int check_restart(struct sem_array *sma, struct sem_queue *q) | ||
475 | { | ||
476 | struct sem *curr; | ||
477 | struct sem_queue *h; | ||
478 | |||
479 | /* if the operation didn't modify the array, then no restart */ | ||
480 | if (q->alter == 0) | ||
481 | return 0; | ||
482 | |||
483 | /* pending complex operations are too difficult to analyse */ | ||
484 | if (sma->complex_count) | ||
485 | return 1; | ||
486 | |||
487 | /* we were a sleeping complex operation. Too difficult */ | ||
488 | if (q->nsops > 1) | ||
489 | return 1; | ||
490 | |||
491 | curr = sma->sem_base + q->sops[0].sem_num; | ||
492 | |||
493 | /* No-one waits on this queue */ | ||
494 | if (list_empty(&curr->sem_pending)) | ||
495 | return 0; | ||
496 | |||
497 | /* the new semaphore value */ | ||
498 | if (curr->semval) { | ||
499 | /* It is impossible that someone waits for the new value: | ||
500 | * - q is a previously sleeping simple operation that | ||
501 | * altered the array. It must be a decrement, because | ||
502 | * simple increments never sleep. | ||
503 | * - The value is not 0, thus wait-for-zero won't proceed. | ||
504 | * - If there are older (higher priority) decrements | ||
505 | * in the queue, then they have observed the original | ||
506 | * semval value and couldn't proceed. The operation | ||
507 | * decremented to value - thus they won't proceed either. | ||
508 | */ | ||
509 | BUG_ON(q->sops[0].sem_op >= 0); | ||
510 | return 0; | ||
511 | } | ||
512 | /* | ||
513 | * semval is 0. Check if there are wait-for-zero semops. | ||
514 | * They must be the first entries in the per-semaphore simple queue | ||
515 | */ | ||
516 | h = list_first_entry(&curr->sem_pending, struct sem_queue, simple_list); | ||
517 | BUG_ON(h->nsops != 1); | ||
518 | BUG_ON(h->sops[0].sem_num != q->sops[0].sem_num); | ||
519 | |||
520 | /* Yes, there is a wait-for-zero semop. Restart */ | ||
521 | if (h->sops[0].sem_op == 0) | ||
522 | return 1; | ||
523 | |||
524 | /* Again - no-one is waiting for the new value. */ | ||
525 | return 0; | ||
526 | } | ||
527 | |||
437 | 528 | ||
438 | /** | 529 | /** |
439 | * update_queue(sma, semnum): Look for tasks that can be completed. | 530 | * update_queue(sma, semnum): Look for tasks that can be completed. |
440 | * @sma: semaphore array. | 531 | * @sma: semaphore array. |
441 | * @semnum: semaphore that was modified. | 532 | * @semnum: semaphore that was modified. |
533 | * @pt: list head for the tasks that must be woken up. | ||
442 | * | 534 | * |
443 | * update_queue must be called after a semaphore in a semaphore array | 535 | * update_queue must be called after a semaphore in a semaphore array |
444 | * was modified. If multiple semaphore were modified, then @semnum | 536 | * was modified. If multiple semaphore were modified, then @semnum |
445 | * must be set to -1. | 537 | * must be set to -1. |
538 | * The tasks that must be woken up are added to @pt. The return code | ||
539 | * is stored in q->pid. | ||
540 | * The function return 1 if at least one semop was completed successfully. | ||
446 | */ | 541 | */ |
447 | static void update_queue(struct sem_array *sma, int semnum) | 542 | static int update_queue(struct sem_array *sma, int semnum, struct list_head *pt) |
448 | { | 543 | { |
449 | struct sem_queue *q; | 544 | struct sem_queue *q; |
450 | struct list_head *walk; | 545 | struct list_head *walk; |
451 | struct list_head *pending_list; | 546 | struct list_head *pending_list; |
452 | int offset; | 547 | int offset; |
548 | int semop_completed = 0; | ||
453 | 549 | ||
454 | /* if there are complex operations around, then knowing the semaphore | 550 | /* if there are complex operations around, then knowing the semaphore |
455 | * that was modified doesn't help us. Assume that multiple semaphores | 551 | * that was modified doesn't help us. Assume that multiple semaphores |
@@ -469,7 +565,7 @@ static void update_queue(struct sem_array *sma, int semnum) | |||
469 | again: | 565 | again: |
470 | walk = pending_list->next; | 566 | walk = pending_list->next; |
471 | while (walk != pending_list) { | 567 | while (walk != pending_list) { |
472 | int error, alter; | 568 | int error, restart; |
473 | 569 | ||
474 | q = (struct sem_queue *)((char *)walk - offset); | 570 | q = (struct sem_queue *)((char *)walk - offset); |
475 | walk = walk->next; | 571 | walk = walk->next; |
@@ -494,22 +590,58 @@ again: | |||
494 | 590 | ||
495 | unlink_queue(sma, q); | 591 | unlink_queue(sma, q); |
496 | 592 | ||
497 | /* | 593 | if (error) { |
498 | * The next operation that must be checked depends on the type | 594 | restart = 0; |
499 | * of the completed operation: | 595 | } else { |
500 | * - if the operation modified the array, then restart from the | 596 | semop_completed = 1; |
501 | * head of the queue and check for threads that might be | 597 | restart = check_restart(sma, q); |
502 | * waiting for the new semaphore values. | 598 | } |
503 | * - if the operation didn't modify the array, then just | 599 | |
504 | * continue. | 600 | wake_up_sem_queue_prepare(pt, q, error); |
505 | */ | 601 | if (restart) |
506 | alter = q->alter; | ||
507 | wake_up_sem_queue(q, error); | ||
508 | if (alter && !error) | ||
509 | goto again; | 602 | goto again; |
510 | } | 603 | } |
604 | return semop_completed; | ||
605 | } | ||
606 | |||
607 | /** | ||
608 | * do_smart_update(sma, sops, nsops, otime, pt) - optimized update_queue | ||
609 | * @sma: semaphore array | ||
610 | * @sops: operations that were performed | ||
611 | * @nsops: number of operations | ||
612 | * @otime: force setting otime | ||
613 | * @pt: list head of the tasks that must be woken up. | ||
614 | * | ||
615 | * do_smart_update() does the required called to update_queue, based on the | ||
616 | * actual changes that were performed on the semaphore array. | ||
617 | * Note that the function does not do the actual wake-up: the caller is | ||
618 | * responsible for calling wake_up_sem_queue_do(@pt). | ||
619 | * It is safe to perform this call after dropping all locks. | ||
620 | */ | ||
621 | static void do_smart_update(struct sem_array *sma, struct sembuf *sops, int nsops, | ||
622 | int otime, struct list_head *pt) | ||
623 | { | ||
624 | int i; | ||
625 | |||
626 | if (sma->complex_count || sops == NULL) { | ||
627 | if (update_queue(sma, -1, pt)) | ||
628 | otime = 1; | ||
629 | goto done; | ||
630 | } | ||
631 | |||
632 | for (i = 0; i < nsops; i++) { | ||
633 | if (sops[i].sem_op > 0 || | ||
634 | (sops[i].sem_op < 0 && | ||
635 | sma->sem_base[sops[i].sem_num].semval == 0)) | ||
636 | if (update_queue(sma, sops[i].sem_num, pt)) | ||
637 | otime = 1; | ||
638 | } | ||
639 | done: | ||
640 | if (otime) | ||
641 | sma->sem_otime = get_seconds(); | ||
511 | } | 642 | } |
512 | 643 | ||
644 | |||
513 | /* The following counts are associated to each semaphore: | 645 | /* The following counts are associated to each semaphore: |
514 | * semncnt number of tasks waiting on semval being nonzero | 646 | * semncnt number of tasks waiting on semval being nonzero |
515 | * semzcnt number of tasks waiting on semval being zero | 647 | * semzcnt number of tasks waiting on semval being zero |
@@ -572,6 +704,7 @@ static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) | |||
572 | struct sem_undo *un, *tu; | 704 | struct sem_undo *un, *tu; |
573 | struct sem_queue *q, *tq; | 705 | struct sem_queue *q, *tq; |
574 | struct sem_array *sma = container_of(ipcp, struct sem_array, sem_perm); | 706 | struct sem_array *sma = container_of(ipcp, struct sem_array, sem_perm); |
707 | struct list_head tasks; | ||
575 | 708 | ||
576 | /* Free the existing undo structures for this semaphore set. */ | 709 | /* Free the existing undo structures for this semaphore set. */ |
577 | assert_spin_locked(&sma->sem_perm.lock); | 710 | assert_spin_locked(&sma->sem_perm.lock); |
@@ -585,15 +718,17 @@ static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) | |||
585 | } | 718 | } |
586 | 719 | ||
587 | /* Wake up all pending processes and let them fail with EIDRM. */ | 720 | /* Wake up all pending processes and let them fail with EIDRM. */ |
721 | INIT_LIST_HEAD(&tasks); | ||
588 | list_for_each_entry_safe(q, tq, &sma->sem_pending, list) { | 722 | list_for_each_entry_safe(q, tq, &sma->sem_pending, list) { |
589 | unlink_queue(sma, q); | 723 | unlink_queue(sma, q); |
590 | wake_up_sem_queue(q, -EIDRM); | 724 | wake_up_sem_queue_prepare(&tasks, q, -EIDRM); |
591 | } | 725 | } |
592 | 726 | ||
593 | /* Remove the semaphore set from the IDR */ | 727 | /* Remove the semaphore set from the IDR */ |
594 | sem_rmid(ns, sma); | 728 | sem_rmid(ns, sma); |
595 | sem_unlock(sma); | 729 | sem_unlock(sma); |
596 | 730 | ||
731 | wake_up_sem_queue_do(&tasks); | ||
597 | ns->used_sems -= sma->sem_nsems; | 732 | ns->used_sems -= sma->sem_nsems; |
598 | security_sem_free(sma); | 733 | security_sem_free(sma); |
599 | ipc_rcu_putref(sma); | 734 | ipc_rcu_putref(sma); |
@@ -715,11 +850,13 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, | |||
715 | ushort fast_sem_io[SEMMSL_FAST]; | 850 | ushort fast_sem_io[SEMMSL_FAST]; |
716 | ushort* sem_io = fast_sem_io; | 851 | ushort* sem_io = fast_sem_io; |
717 | int nsems; | 852 | int nsems; |
853 | struct list_head tasks; | ||
718 | 854 | ||
719 | sma = sem_lock_check(ns, semid); | 855 | sma = sem_lock_check(ns, semid); |
720 | if (IS_ERR(sma)) | 856 | if (IS_ERR(sma)) |
721 | return PTR_ERR(sma); | 857 | return PTR_ERR(sma); |
722 | 858 | ||
859 | INIT_LIST_HEAD(&tasks); | ||
723 | nsems = sma->sem_nsems; | 860 | nsems = sma->sem_nsems; |
724 | 861 | ||
725 | err = -EACCES; | 862 | err = -EACCES; |
@@ -807,7 +944,7 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, | |||
807 | } | 944 | } |
808 | sma->sem_ctime = get_seconds(); | 945 | sma->sem_ctime = get_seconds(); |
809 | /* maybe some queued-up processes were waiting for this */ | 946 | /* maybe some queued-up processes were waiting for this */ |
810 | update_queue(sma, -1); | 947 | do_smart_update(sma, NULL, 0, 0, &tasks); |
811 | err = 0; | 948 | err = 0; |
812 | goto out_unlock; | 949 | goto out_unlock; |
813 | } | 950 | } |
@@ -849,13 +986,15 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, | |||
849 | curr->sempid = task_tgid_vnr(current); | 986 | curr->sempid = task_tgid_vnr(current); |
850 | sma->sem_ctime = get_seconds(); | 987 | sma->sem_ctime = get_seconds(); |
851 | /* maybe some queued-up processes were waiting for this */ | 988 | /* maybe some queued-up processes were waiting for this */ |
852 | update_queue(sma, semnum); | 989 | do_smart_update(sma, NULL, 0, 0, &tasks); |
853 | err = 0; | 990 | err = 0; |
854 | goto out_unlock; | 991 | goto out_unlock; |
855 | } | 992 | } |
856 | } | 993 | } |
857 | out_unlock: | 994 | out_unlock: |
858 | sem_unlock(sma); | 995 | sem_unlock(sma); |
996 | wake_up_sem_queue_do(&tasks); | ||
997 | |||
859 | out_free: | 998 | out_free: |
860 | if(sem_io != fast_sem_io) | 999 | if(sem_io != fast_sem_io) |
861 | ipc_free(sem_io, sizeof(ushort)*nsems); | 1000 | ipc_free(sem_io, sizeof(ushort)*nsems); |
@@ -1069,7 +1208,7 @@ static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid) | |||
1069 | /* step 1: figure out the size of the semaphore array */ | 1208 | /* step 1: figure out the size of the semaphore array */ |
1070 | sma = sem_lock_check(ns, semid); | 1209 | sma = sem_lock_check(ns, semid); |
1071 | if (IS_ERR(sma)) | 1210 | if (IS_ERR(sma)) |
1072 | return ERR_PTR(PTR_ERR(sma)); | 1211 | return ERR_CAST(sma); |
1073 | 1212 | ||
1074 | nsems = sma->sem_nsems; | 1213 | nsems = sma->sem_nsems; |
1075 | sem_getref_and_unlock(sma); | 1214 | sem_getref_and_unlock(sma); |
@@ -1129,6 +1268,7 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops, | |||
1129 | struct sem_queue queue; | 1268 | struct sem_queue queue; |
1130 | unsigned long jiffies_left = 0; | 1269 | unsigned long jiffies_left = 0; |
1131 | struct ipc_namespace *ns; | 1270 | struct ipc_namespace *ns; |
1271 | struct list_head tasks; | ||
1132 | 1272 | ||
1133 | ns = current->nsproxy->ipc_ns; | 1273 | ns = current->nsproxy->ipc_ns; |
1134 | 1274 | ||
@@ -1177,6 +1317,8 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops, | |||
1177 | } else | 1317 | } else |
1178 | un = NULL; | 1318 | un = NULL; |
1179 | 1319 | ||
1320 | INIT_LIST_HEAD(&tasks); | ||
1321 | |||
1180 | sma = sem_lock_check(ns, semid); | 1322 | sma = sem_lock_check(ns, semid); |
1181 | if (IS_ERR(sma)) { | 1323 | if (IS_ERR(sma)) { |
1182 | if (un) | 1324 | if (un) |
@@ -1225,7 +1367,7 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops, | |||
1225 | error = try_atomic_semop (sma, sops, nsops, un, task_tgid_vnr(current)); | 1367 | error = try_atomic_semop (sma, sops, nsops, un, task_tgid_vnr(current)); |
1226 | if (error <= 0) { | 1368 | if (error <= 0) { |
1227 | if (alter && error == 0) | 1369 | if (alter && error == 0) |
1228 | update_queue(sma, (nsops == 1) ? sops[0].sem_num : -1); | 1370 | do_smart_update(sma, sops, nsops, 1, &tasks); |
1229 | 1371 | ||
1230 | goto out_unlock_free; | 1372 | goto out_unlock_free; |
1231 | } | 1373 | } |
@@ -1302,6 +1444,8 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops, | |||
1302 | 1444 | ||
1303 | out_unlock_free: | 1445 | out_unlock_free: |
1304 | sem_unlock(sma); | 1446 | sem_unlock(sma); |
1447 | |||
1448 | wake_up_sem_queue_do(&tasks); | ||
1305 | out_free: | 1449 | out_free: |
1306 | if(sops != fast_sops) | 1450 | if(sops != fast_sops) |
1307 | kfree(sops); | 1451 | kfree(sops); |
@@ -1362,6 +1506,7 @@ void exit_sem(struct task_struct *tsk) | |||
1362 | for (;;) { | 1506 | for (;;) { |
1363 | struct sem_array *sma; | 1507 | struct sem_array *sma; |
1364 | struct sem_undo *un; | 1508 | struct sem_undo *un; |
1509 | struct list_head tasks; | ||
1365 | int semid; | 1510 | int semid; |
1366 | int i; | 1511 | int i; |
1367 | 1512 | ||
@@ -1425,10 +1570,11 @@ void exit_sem(struct task_struct *tsk) | |||
1425 | semaphore->sempid = task_tgid_vnr(current); | 1570 | semaphore->sempid = task_tgid_vnr(current); |
1426 | } | 1571 | } |
1427 | } | 1572 | } |
1428 | sma->sem_otime = get_seconds(); | ||
1429 | /* maybe some queued-up processes were waiting for this */ | 1573 | /* maybe some queued-up processes were waiting for this */ |
1430 | update_queue(sma, -1); | 1574 | INIT_LIST_HEAD(&tasks); |
1575 | do_smart_update(sma, NULL, 0, 1, &tasks); | ||
1431 | sem_unlock(sma); | 1576 | sem_unlock(sma); |
1577 | wake_up_sem_queue_do(&tasks); | ||
1432 | 1578 | ||
1433 | call_rcu(&un->rcu, free_un); | 1579 | call_rcu(&un->rcu, free_un); |
1434 | } | 1580 | } |
@@ -273,16 +273,13 @@ static int shm_release(struct inode *ino, struct file *file) | |||
273 | return 0; | 273 | return 0; |
274 | } | 274 | } |
275 | 275 | ||
276 | static int shm_fsync(struct file *file, struct dentry *dentry, int datasync) | 276 | static int shm_fsync(struct file *file, int datasync) |
277 | { | 277 | { |
278 | int (*fsync) (struct file *, struct dentry *, int datasync); | ||
279 | struct shm_file_data *sfd = shm_file_data(file); | 278 | struct shm_file_data *sfd = shm_file_data(file); |
280 | int ret = -EINVAL; | ||
281 | 279 | ||
282 | fsync = sfd->file->f_op->fsync; | 280 | if (!sfd->file->f_op->fsync) |
283 | if (fsync) | 281 | return -EINVAL; |
284 | ret = fsync(sfd->file, sfd->file->f_path.dentry, datasync); | 282 | return sfd->file->f_op->fsync(sfd->file, datasync); |
285 | return ret; | ||
286 | } | 283 | } |
287 | 284 | ||
288 | static unsigned long shm_get_unmapped_area(struct file *file, | 285 | static unsigned long shm_get_unmapped_area(struct file *file, |
diff --git a/ipc/util.c b/ipc/util.c index 79ce84e890f7..69a0cc13d966 100644 --- a/ipc/util.c +++ b/ipc/util.c | |||
@@ -124,8 +124,8 @@ void ipc_init_ids(struct ipc_ids *ids) | |||
124 | ids->seq = 0; | 124 | ids->seq = 0; |
125 | { | 125 | { |
126 | int seq_limit = INT_MAX/SEQ_MULTIPLIER; | 126 | int seq_limit = INT_MAX/SEQ_MULTIPLIER; |
127 | if (seq_limit > USHORT_MAX) | 127 | if (seq_limit > USHRT_MAX) |
128 | ids->seq_max = USHORT_MAX; | 128 | ids->seq_max = USHRT_MAX; |
129 | else | 129 | else |
130 | ids->seq_max = seq_limit; | 130 | ids->seq_max = seq_limit; |
131 | } | 131 | } |