diff options
author | Manfred Spraul <manfred@colorfullife.com> | 2008-07-25 04:48:06 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-07-25 13:53:42 -0400 |
commit | 380af1b33b3ff92df5cda96329b58f5d1b6b5a53 (patch) | |
tree | 9a47d66c18e4aae2093a708a7509c0f188ee0bd1 | |
parent | a1193f8ec091cd8fd309cc2982abe4499f6f2b4d (diff) |
ipc/sem.c: rewrite undo list locking
The attached patch:
- reverses the locking order of ulp->lock and sem_lock:
Previously, it was first ulp->lock, then inside sem_lock.
Now it's the other way around.
- converts the undo structure to rcu.
Benefits:
- With the old locking order, IPC_RMID could not kfree the undo structures.
The stale entries remained in the linked lists and were released later.
- The patch fixes a a race in semtimedop(): if both IPC_RMID and a semget() that
recreates exactly the same id happen between find_alloc_undo() and sem_lock,
then semtimedop() would access already kfree'd memory.
[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Manfred Spraul <manfred@colorfullife.com>
Reviewed-by: Nadia Derbey <Nadia.Derbey@bull.net>
Cc: Pierre Peiffer <peifferp@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | include/linux/sem.h | 6 | ||||
-rw-r--r-- | ipc/sem.c | 147 |
2 files changed, 98 insertions, 55 deletions
diff --git a/include/linux/sem.h b/include/linux/sem.h index d42599395d79..1b191c176bcd 100644 --- a/include/linux/sem.h +++ b/include/linux/sem.h | |||
@@ -78,6 +78,7 @@ struct seminfo { | |||
78 | 78 | ||
79 | #ifdef __KERNEL__ | 79 | #ifdef __KERNEL__ |
80 | #include <asm/atomic.h> | 80 | #include <asm/atomic.h> |
81 | #include <linux/rcupdate.h> | ||
81 | 82 | ||
82 | struct task_struct; | 83 | struct task_struct; |
83 | 84 | ||
@@ -114,7 +115,10 @@ struct sem_queue { | |||
114 | * when the process exits. | 115 | * when the process exits. |
115 | */ | 116 | */ |
116 | struct sem_undo { | 117 | struct sem_undo { |
117 | struct list_head list_proc; /* per-process list: all undos from one process */ | 118 | struct list_head list_proc; /* per-process list: all undos from one process. */ |
119 | /* rcu protected */ | ||
120 | struct rcu_head rcu; /* rcu struct for sem_undo() */ | ||
121 | struct sem_undo_list *ulp; /* sem_undo_list for the process */ | ||
118 | struct list_head list_id; /* per semaphore array list: all undos for one array */ | 122 | struct list_head list_id; /* per semaphore array list: all undos for one array */ |
119 | int semid; /* semaphore set identifier */ | 123 | int semid; /* semaphore set identifier */ |
120 | short * semadj; /* array of adjustments, one per semaphore */ | 124 | short * semadj; /* array of adjustments, one per semaphore */ |
@@ -504,27 +504,35 @@ static int count_semzcnt (struct sem_array * sma, ushort semnum) | |||
504 | return semzcnt; | 504 | return semzcnt; |
505 | } | 505 | } |
506 | 506 | ||
507 | void free_un(struct rcu_head *head) | ||
508 | { | ||
509 | struct sem_undo *un = container_of(head, struct sem_undo, rcu); | ||
510 | kfree(un); | ||
511 | } | ||
512 | |||
507 | /* Free a semaphore set. freeary() is called with sem_ids.rw_mutex locked | 513 | /* Free a semaphore set. freeary() is called with sem_ids.rw_mutex locked |
508 | * as a writer and the spinlock for this semaphore set hold. sem_ids.rw_mutex | 514 | * as a writer and the spinlock for this semaphore set hold. sem_ids.rw_mutex |
509 | * remains locked on exit. | 515 | * remains locked on exit. |
510 | */ | 516 | */ |
511 | static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) | 517 | static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) |
512 | { | 518 | { |
513 | struct sem_undo *un; | 519 | struct sem_undo *un, *tu; |
514 | struct sem_queue *q, *t; | 520 | struct sem_queue *q, *tq; |
515 | struct sem_array *sma = container_of(ipcp, struct sem_array, sem_perm); | 521 | struct sem_array *sma = container_of(ipcp, struct sem_array, sem_perm); |
516 | 522 | ||
517 | /* Invalidate the existing undo structures for this semaphore set. | 523 | /* Free the existing undo structures for this semaphore set. */ |
518 | * (They will be freed without any further action in exit_sem() | ||
519 | * or during the next semop.) | ||
520 | */ | ||
521 | assert_spin_locked(&sma->sem_perm.lock); | 524 | assert_spin_locked(&sma->sem_perm.lock); |
522 | list_for_each_entry(un, &sma->list_id, list_id) | 525 | list_for_each_entry_safe(un, tu, &sma->list_id, list_id) { |
526 | list_del(&un->list_id); | ||
527 | spin_lock(&un->ulp->lock); | ||
523 | un->semid = -1; | 528 | un->semid = -1; |
529 | list_del_rcu(&un->list_proc); | ||
530 | spin_unlock(&un->ulp->lock); | ||
531 | call_rcu(&un->rcu, free_un); | ||
532 | } | ||
524 | 533 | ||
525 | /* Wake up all pending processes and let them fail with EIDRM. */ | 534 | /* Wake up all pending processes and let them fail with EIDRM. */ |
526 | 535 | list_for_each_entry_safe(q, tq, &sma->sem_pending, list) { | |
527 | list_for_each_entry_safe(q, t, &sma->sem_pending, list) { | ||
528 | list_del(&q->list); | 536 | list_del(&q->list); |
529 | 537 | ||
530 | q->status = IN_WAKEUP; | 538 | q->status = IN_WAKEUP; |
@@ -948,16 +956,11 @@ static inline int get_undo_list(struct sem_undo_list **undo_listp) | |||
948 | 956 | ||
949 | static struct sem_undo *lookup_undo(struct sem_undo_list *ulp, int semid) | 957 | static struct sem_undo *lookup_undo(struct sem_undo_list *ulp, int semid) |
950 | { | 958 | { |
951 | struct sem_undo *walk, *tmp; | 959 | struct sem_undo *walk; |
952 | 960 | ||
953 | assert_spin_locked(&ulp->lock); | 961 | list_for_each_entry_rcu(walk, &ulp->list_proc, list_proc) { |
954 | list_for_each_entry_safe(walk, tmp, &ulp->list_proc, list_proc) { | ||
955 | if (walk->semid == semid) | 962 | if (walk->semid == semid) |
956 | return walk; | 963 | return walk; |
957 | if (walk->semid == -1) { | ||
958 | list_del(&walk->list_proc); | ||
959 | kfree(walk); | ||
960 | } | ||
961 | } | 964 | } |
962 | return NULL; | 965 | return NULL; |
963 | } | 966 | } |
@@ -970,6 +973,8 @@ static struct sem_undo *lookup_undo(struct sem_undo_list *ulp, int semid) | |||
970 | * The function looks up (and if not present creates) the undo structure. | 973 | * The function looks up (and if not present creates) the undo structure. |
971 | * The size of the undo structure depends on the size of the semaphore | 974 | * The size of the undo structure depends on the size of the semaphore |
972 | * array, thus the alloc path is not that straightforward. | 975 | * array, thus the alloc path is not that straightforward. |
976 | * Lifetime-rules: sem_undo is rcu-protected, on success, the function | ||
977 | * performs a rcu_read_lock(). | ||
973 | */ | 978 | */ |
974 | static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid) | 979 | static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid) |
975 | { | 980 | { |
@@ -983,11 +988,13 @@ static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid) | |||
983 | if (error) | 988 | if (error) |
984 | return ERR_PTR(error); | 989 | return ERR_PTR(error); |
985 | 990 | ||
991 | rcu_read_lock(); | ||
986 | spin_lock(&ulp->lock); | 992 | spin_lock(&ulp->lock); |
987 | un = lookup_undo(ulp, semid); | 993 | un = lookup_undo(ulp, semid); |
988 | spin_unlock(&ulp->lock); | 994 | spin_unlock(&ulp->lock); |
989 | if (likely(un!=NULL)) | 995 | if (likely(un!=NULL)) |
990 | goto out; | 996 | goto out; |
997 | rcu_read_unlock(); | ||
991 | 998 | ||
992 | /* no undo structure around - allocate one. */ | 999 | /* no undo structure around - allocate one. */ |
993 | /* step 1: figure out the size of the semaphore array */ | 1000 | /* step 1: figure out the size of the semaphore array */ |
@@ -1005,38 +1012,38 @@ static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid) | |||
1005 | return ERR_PTR(-ENOMEM); | 1012 | return ERR_PTR(-ENOMEM); |
1006 | } | 1013 | } |
1007 | 1014 | ||
1008 | /* step 3: Acquire the lock on the undo list pointer */ | 1015 | /* step 3: Acquire the lock on semaphore array */ |
1009 | spin_lock(&ulp->lock); | ||
1010 | |||
1011 | /* step 4: check for races: someone else allocated the undo struct, | ||
1012 | * semaphore array was destroyed. | ||
1013 | */ | ||
1014 | un = lookup_undo(ulp, semid); | ||
1015 | if (un) { | ||
1016 | spin_unlock(&ulp->lock); | ||
1017 | kfree(new); | ||
1018 | sem_putref(sma); | ||
1019 | goto out; | ||
1020 | } | ||
1021 | sem_lock_and_putref(sma); | 1016 | sem_lock_and_putref(sma); |
1022 | if (sma->sem_perm.deleted) { | 1017 | if (sma->sem_perm.deleted) { |
1023 | sem_unlock(sma); | 1018 | sem_unlock(sma); |
1024 | spin_unlock(&ulp->lock); | ||
1025 | kfree(new); | 1019 | kfree(new); |
1026 | un = ERR_PTR(-EIDRM); | 1020 | un = ERR_PTR(-EIDRM); |
1027 | goto out; | 1021 | goto out; |
1028 | } | 1022 | } |
1023 | spin_lock(&ulp->lock); | ||
1024 | |||
1025 | /* | ||
1026 | * step 4: check for races: did someone else allocate the undo struct? | ||
1027 | */ | ||
1028 | un = lookup_undo(ulp, semid); | ||
1029 | if (un) { | ||
1030 | kfree(new); | ||
1031 | goto success; | ||
1032 | } | ||
1029 | /* step 5: initialize & link new undo structure */ | 1033 | /* step 5: initialize & link new undo structure */ |
1030 | new->semadj = (short *) &new[1]; | 1034 | new->semadj = (short *) &new[1]; |
1035 | new->ulp = ulp; | ||
1031 | new->semid = semid; | 1036 | new->semid = semid; |
1032 | assert_spin_locked(&ulp->lock); | 1037 | assert_spin_locked(&ulp->lock); |
1033 | list_add(&new->list_proc, &ulp->list_proc); | 1038 | list_add_rcu(&new->list_proc, &ulp->list_proc); |
1034 | assert_spin_locked(&sma->sem_perm.lock); | 1039 | assert_spin_locked(&sma->sem_perm.lock); |
1035 | list_add(&new->list_id, &sma->list_id); | 1040 | list_add(&new->list_id, &sma->list_id); |
1041 | un = new; | ||
1036 | 1042 | ||
1037 | sem_unlock(sma); | 1043 | success: |
1038 | spin_unlock(&ulp->lock); | 1044 | spin_unlock(&ulp->lock); |
1039 | un = new; | 1045 | rcu_read_lock(); |
1046 | sem_unlock(sma); | ||
1040 | out: | 1047 | out: |
1041 | return un; | 1048 | return un; |
1042 | } | 1049 | } |
@@ -1103,6 +1110,8 @@ asmlinkage long sys_semtimedop(int semid, struct sembuf __user *tsops, | |||
1103 | 1110 | ||
1104 | sma = sem_lock_check(ns, semid); | 1111 | sma = sem_lock_check(ns, semid); |
1105 | if (IS_ERR(sma)) { | 1112 | if (IS_ERR(sma)) { |
1113 | if (un) | ||
1114 | rcu_read_unlock(); | ||
1106 | error = PTR_ERR(sma); | 1115 | error = PTR_ERR(sma); |
1107 | goto out_free; | 1116 | goto out_free; |
1108 | } | 1117 | } |
@@ -1111,10 +1120,26 @@ asmlinkage long sys_semtimedop(int semid, struct sembuf __user *tsops, | |||
1111 | * semid identifiers are not unique - find_alloc_undo may have | 1120 | * semid identifiers are not unique - find_alloc_undo may have |
1112 | * allocated an undo structure, it was invalidated by an RMID | 1121 | * allocated an undo structure, it was invalidated by an RMID |
1113 | * and now a new array with received the same id. Check and fail. | 1122 | * and now a new array with received the same id. Check and fail. |
1123 | * This case can be detected checking un->semid. The existance of | ||
1124 | * "un" itself is guaranteed by rcu. | ||
1114 | */ | 1125 | */ |
1115 | error = -EIDRM; | 1126 | error = -EIDRM; |
1116 | if (un && un->semid == -1) | 1127 | if (un) { |
1117 | goto out_unlock_free; | 1128 | if (un->semid == -1) { |
1129 | rcu_read_unlock(); | ||
1130 | goto out_unlock_free; | ||
1131 | } else { | ||
1132 | /* | ||
1133 | * rcu lock can be released, "un" cannot disappear: | ||
1134 | * - sem_lock is acquired, thus IPC_RMID is | ||
1135 | * impossible. | ||
1136 | * - exit_sem is impossible, it always operates on | ||
1137 | * current (or a dead task). | ||
1138 | */ | ||
1139 | |||
1140 | rcu_read_unlock(); | ||
1141 | } | ||
1142 | } | ||
1118 | 1143 | ||
1119 | error = -EFBIG; | 1144 | error = -EFBIG; |
1120 | if (max >= sma->sem_nsems) | 1145 | if (max >= sma->sem_nsems) |
@@ -1242,7 +1267,6 @@ int copy_semundo(unsigned long clone_flags, struct task_struct *tsk) | |||
1242 | void exit_sem(struct task_struct *tsk) | 1267 | void exit_sem(struct task_struct *tsk) |
1243 | { | 1268 | { |
1244 | struct sem_undo_list *ulp; | 1269 | struct sem_undo_list *ulp; |
1245 | struct sem_undo *un, *tmp; | ||
1246 | 1270 | ||
1247 | ulp = tsk->sysvsem.undo_list; | 1271 | ulp = tsk->sysvsem.undo_list; |
1248 | if (!ulp) | 1272 | if (!ulp) |
@@ -1252,28 +1276,47 @@ void exit_sem(struct task_struct *tsk) | |||
1252 | if (!atomic_dec_and_test(&ulp->refcnt)) | 1276 | if (!atomic_dec_and_test(&ulp->refcnt)) |
1253 | return; | 1277 | return; |
1254 | 1278 | ||
1255 | spin_lock(&ulp->lock); | 1279 | for (;;) { |
1256 | |||
1257 | list_for_each_entry_safe(un, tmp, &ulp->list_proc, list_proc) { | ||
1258 | struct sem_array *sma; | 1280 | struct sem_array *sma; |
1281 | struct sem_undo *un; | ||
1282 | int semid; | ||
1259 | int i; | 1283 | int i; |
1260 | 1284 | ||
1261 | if (un->semid == -1) | 1285 | rcu_read_lock(); |
1262 | goto free; | 1286 | un = list_entry(rcu_dereference(ulp->list_proc.next), |
1287 | struct sem_undo, list_proc); | ||
1288 | if (&un->list_proc == &ulp->list_proc) | ||
1289 | semid = -1; | ||
1290 | else | ||
1291 | semid = un->semid; | ||
1292 | rcu_read_unlock(); | ||
1263 | 1293 | ||
1264 | sma = sem_lock(tsk->nsproxy->ipc_ns, un->semid); | 1294 | if (semid == -1) |
1265 | if (IS_ERR(sma)) | 1295 | break; |
1266 | goto free; | ||
1267 | 1296 | ||
1268 | if (un->semid == -1) | 1297 | sma = sem_lock_check(tsk->nsproxy->ipc_ns, un->semid); |
1269 | goto unlock_free; | ||
1270 | 1298 | ||
1271 | BUG_ON(sem_checkid(sma, un->semid)); | 1299 | /* exit_sem raced with IPC_RMID, nothing to do */ |
1300 | if (IS_ERR(sma)) | ||
1301 | continue; | ||
1272 | 1302 | ||
1273 | /* remove un from sma->list_id */ | 1303 | un = lookup_undo(ulp, semid); |
1304 | if (un == NULL) { | ||
1305 | /* exit_sem raced with IPC_RMID+semget() that created | ||
1306 | * exactly the same semid. Nothing to do. | ||
1307 | */ | ||
1308 | sem_unlock(sma); | ||
1309 | continue; | ||
1310 | } | ||
1311 | |||
1312 | /* remove un from the linked lists */ | ||
1274 | assert_spin_locked(&sma->sem_perm.lock); | 1313 | assert_spin_locked(&sma->sem_perm.lock); |
1275 | list_del(&un->list_id); | 1314 | list_del(&un->list_id); |
1276 | 1315 | ||
1316 | spin_lock(&ulp->lock); | ||
1317 | list_del_rcu(&un->list_proc); | ||
1318 | spin_unlock(&ulp->lock); | ||
1319 | |||
1277 | /* perform adjustments registered in un */ | 1320 | /* perform adjustments registered in un */ |
1278 | for (i = 0; i < sma->sem_nsems; i++) { | 1321 | for (i = 0; i < sma->sem_nsems; i++) { |
1279 | struct sem * semaphore = &sma->sem_base[i]; | 1322 | struct sem * semaphore = &sma->sem_base[i]; |
@@ -1302,14 +1345,10 @@ void exit_sem(struct task_struct *tsk) | |||
1302 | sma->sem_otime = get_seconds(); | 1345 | sma->sem_otime = get_seconds(); |
1303 | /* maybe some queued-up processes were waiting for this */ | 1346 | /* maybe some queued-up processes were waiting for this */ |
1304 | update_queue(sma); | 1347 | update_queue(sma); |
1305 | unlock_free: | ||
1306 | sem_unlock(sma); | 1348 | sem_unlock(sma); |
1307 | free: | 1349 | |
1308 | assert_spin_locked(&ulp->lock); | 1350 | call_rcu(&un->rcu, free_un); |
1309 | list_del(&un->list_proc); | ||
1310 | kfree(un); | ||
1311 | } | 1351 | } |
1312 | spin_unlock(&ulp->lock); | ||
1313 | kfree(ulp); | 1352 | kfree(ulp); |
1314 | } | 1353 | } |
1315 | 1354 | ||