aboutsummaryrefslogtreecommitdiffstats
path: root/ipc
diff options
context:
space:
mode:
authorDavidlohr Bueso <davidlohr.bueso@hp.com>2013-04-30 22:15:29 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2013-05-01 11:12:58 -0400
commit16df3674efe39f3ab63e7052f1244dd3d50e7f84 (patch)
tree45ecd49ba72e0b84dbaac338d79173ba1ea4c972 /ipc
parent444d0f621b64716f7868dcbde448e0c66ece4e61 (diff)
ipc,sem: do not hold ipc lock more than necessary
Instead of holding the ipc lock for permissions and security checks, among others, only acquire it when necessary. Some numbers.... 1) With Rik's semop-multi.c microbenchmark we can see the following results: Baseline (3.9-rc1): cpus 4, threads: 256, semaphores: 128, test duration: 30 secs total operations: 151452270, ops/sec 5048409 + 59.40% a.out [kernel.kallsyms] [k] _raw_spin_lock + 6.14% a.out [kernel.kallsyms] [k] sys_semtimedop + 3.84% a.out [kernel.kallsyms] [k] avc_has_perm_flags + 3.64% a.out [kernel.kallsyms] [k] __audit_syscall_exit + 2.06% a.out [kernel.kallsyms] [k] copy_user_enhanced_fast_string + 1.86% a.out [kernel.kallsyms] [k] ipc_lock With this patchset: cpus 4, threads: 256, semaphores: 128, test duration: 30 secs total operations: 273156400, ops/sec 9105213 + 18.54% a.out [kernel.kallsyms] [k] _raw_spin_lock + 11.72% a.out [kernel.kallsyms] [k] sys_semtimedop + 7.70% a.out [kernel.kallsyms] [k] ipc_has_perm.isra.21 + 6.58% a.out [kernel.kallsyms] [k] avc_has_perm_flags + 6.54% a.out [kernel.kallsyms] [k] __audit_syscall_exit + 4.71% a.out [kernel.kallsyms] [k] ipc_obtain_object_check 2) While on an Oracle swingbench DSS (data mining) workload the improvements are not as exciting as with Rik's benchmark, we can see some positive numbers. For an 8 socket machine the following are the percentages of %sys time incurred in the ipc lock: Baseline (3.9-rc1): 100 swingbench users: 8,74% 400 swingbench users: 21,86% 800 swingbench users: 84,35% With this patchset: 100 swingbench users: 8,11% 400 swingbench users: 19,93% 800 swingbench users: 77,69% [riel@redhat.com: fix two locking bugs] [sasha.levin@oracle.com: prevent releasing RCU read lock twice in semctl_main] [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Davidlohr Bueso <davidlohr.bueso@hp.com> Signed-off-by: Rik van Riel <riel@redhat.com> Reviewed-by: Chegu Vinod <chegu_vinod@hp.com> Acked-by: Michel Lespinasse <walken@google.com> Cc: Rik van Riel <riel@redhat.com> Cc: Jason Low <jason.low2@hp.com> Cc: Emmanuel Benisty <benisty.e@gmail.com> Cc: Peter Hurley <peter@hurleysoftware.com> Cc: Stanislav Kinsbursky <skinsbursky@parallels.com> Tested-by: Sedat Dilek <sedat.dilek@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'ipc')
-rw-r--r--ipc/sem.c161
-rw-r--r--ipc/util.h5
2 files changed, 118 insertions, 48 deletions
diff --git a/ipc/sem.c b/ipc/sem.c
index 5b167d00efa6..cd1093cf7e8f 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -204,13 +204,34 @@ static inline struct sem_array *sem_lock(struct ipc_namespace *ns, int id)
204 return container_of(ipcp, struct sem_array, sem_perm); 204 return container_of(ipcp, struct sem_array, sem_perm);
205} 205}
206 206
207static inline struct sem_array *sem_obtain_object(struct ipc_namespace *ns, int id)
208{
209 struct kern_ipc_perm *ipcp = ipc_obtain_object(&sem_ids(ns), id);
210
211 if (IS_ERR(ipcp))
212 return ERR_CAST(ipcp);
213
214 return container_of(ipcp, struct sem_array, sem_perm);
215}
216
207static inline struct sem_array *sem_lock_check(struct ipc_namespace *ns, 217static inline struct sem_array *sem_lock_check(struct ipc_namespace *ns,
208 int id) 218 int id)
209{ 219{
210 struct kern_ipc_perm *ipcp = ipc_lock_check(&sem_ids(ns), id); 220 struct kern_ipc_perm *ipcp = ipc_lock_check(&sem_ids(ns), id);
211 221
212 if (IS_ERR(ipcp)) 222 if (IS_ERR(ipcp))
213 return (struct sem_array *)ipcp; 223 return ERR_CAST(ipcp);
224
225 return container_of(ipcp, struct sem_array, sem_perm);
226}
227
228static inline struct sem_array *sem_obtain_object_check(struct ipc_namespace *ns,
229 int id)
230{
231 struct kern_ipc_perm *ipcp = ipc_obtain_object_check(&sem_ids(ns), id);
232
233 if (IS_ERR(ipcp))
234 return ERR_CAST(ipcp);
214 235
215 return container_of(ipcp, struct sem_array, sem_perm); 236 return container_of(ipcp, struct sem_array, sem_perm);
216} 237}
@@ -234,6 +255,16 @@ static inline void sem_putref(struct sem_array *sma)
234 ipc_unlock(&(sma)->sem_perm); 255 ipc_unlock(&(sma)->sem_perm);
235} 256}
236 257
258/*
259 * Call inside the rcu read section.
260 */
261static inline void sem_getref(struct sem_array *sma)
262{
263 spin_lock(&(sma)->sem_perm.lock);
264 ipc_rcu_getref(sma);
265 ipc_unlock(&(sma)->sem_perm);
266}
267
237static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s) 268static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s)
238{ 269{
239 ipc_rmid(&sem_ids(ns), &s->sem_perm); 270 ipc_rmid(&sem_ids(ns), &s->sem_perm);
@@ -842,18 +873,25 @@ static int semctl_nolock(struct ipc_namespace *ns, int semid,
842 case SEM_STAT: 873 case SEM_STAT:
843 { 874 {
844 struct semid64_ds tbuf; 875 struct semid64_ds tbuf;
845 int id; 876 int id = 0;
877
878 memset(&tbuf, 0, sizeof(tbuf));
846 879
847 if (cmd == SEM_STAT) { 880 if (cmd == SEM_STAT) {
848 sma = sem_lock(ns, semid); 881 rcu_read_lock();
849 if (IS_ERR(sma)) 882 sma = sem_obtain_object(ns, semid);
850 return PTR_ERR(sma); 883 if (IS_ERR(sma)) {
884 err = PTR_ERR(sma);
885 goto out_unlock;
886 }
851 id = sma->sem_perm.id; 887 id = sma->sem_perm.id;
852 } else { 888 } else {
853 sma = sem_lock_check(ns, semid); 889 rcu_read_lock();
854 if (IS_ERR(sma)) 890 sma = sem_obtain_object_check(ns, semid);
855 return PTR_ERR(sma); 891 if (IS_ERR(sma)) {
856 id = 0; 892 err = PTR_ERR(sma);
893 goto out_unlock;
894 }
857 } 895 }
858 896
859 err = -EACCES; 897 err = -EACCES;
@@ -864,13 +902,11 @@ static int semctl_nolock(struct ipc_namespace *ns, int semid,
864 if (err) 902 if (err)
865 goto out_unlock; 903 goto out_unlock;
866 904
867 memset(&tbuf, 0, sizeof(tbuf));
868
869 kernel_to_ipc64_perm(&sma->sem_perm, &tbuf.sem_perm); 905 kernel_to_ipc64_perm(&sma->sem_perm, &tbuf.sem_perm);
870 tbuf.sem_otime = sma->sem_otime; 906 tbuf.sem_otime = sma->sem_otime;
871 tbuf.sem_ctime = sma->sem_ctime; 907 tbuf.sem_ctime = sma->sem_ctime;
872 tbuf.sem_nsems = sma->sem_nsems; 908 tbuf.sem_nsems = sma->sem_nsems;
873 sem_unlock(sma); 909 rcu_read_unlock();
874 if (copy_semid_to_user(p, &tbuf, version)) 910 if (copy_semid_to_user(p, &tbuf, version))
875 return -EFAULT; 911 return -EFAULT;
876 return id; 912 return id;
@@ -879,7 +915,7 @@ static int semctl_nolock(struct ipc_namespace *ns, int semid,
879 return -EINVAL; 915 return -EINVAL;
880 } 916 }
881out_unlock: 917out_unlock:
882 sem_unlock(sma); 918 rcu_read_unlock();
883 return err; 919 return err;
884} 920}
885 921
@@ -947,27 +983,34 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
947{ 983{
948 struct sem_array *sma; 984 struct sem_array *sma;
949 struct sem* curr; 985 struct sem* curr;
950 int err; 986 int err, nsems;
951 ushort fast_sem_io[SEMMSL_FAST]; 987 ushort fast_sem_io[SEMMSL_FAST];
952 ushort* sem_io = fast_sem_io; 988 ushort* sem_io = fast_sem_io;
953 int nsems;
954 struct list_head tasks; 989 struct list_head tasks;
955 990
956 sma = sem_lock_check(ns, semid); 991 INIT_LIST_HEAD(&tasks);
957 if (IS_ERR(sma)) 992
993 rcu_read_lock();
994 sma = sem_obtain_object_check(ns, semid);
995 if (IS_ERR(sma)) {
996 rcu_read_unlock();
958 return PTR_ERR(sma); 997 return PTR_ERR(sma);
998 }
959 999
960 INIT_LIST_HEAD(&tasks);
961 nsems = sma->sem_nsems; 1000 nsems = sma->sem_nsems;
962 1001
963 err = -EACCES; 1002 err = -EACCES;
964 if (ipcperms(ns, &sma->sem_perm, 1003 if (ipcperms(ns, &sma->sem_perm,
965 cmd == SETALL ? S_IWUGO : S_IRUGO)) 1004 cmd == SETALL ? S_IWUGO : S_IRUGO)) {
966 goto out_unlock; 1005 rcu_read_unlock();
1006 goto out_wakeup;
1007 }
967 1008
968 err = security_sem_semctl(sma, cmd); 1009 err = security_sem_semctl(sma, cmd);
969 if (err) 1010 if (err) {
970 goto out_unlock; 1011 rcu_read_unlock();
1012 goto out_wakeup;
1013 }
971 1014
972 err = -EACCES; 1015 err = -EACCES;
973 switch (cmd) { 1016 switch (cmd) {
@@ -977,7 +1020,7 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
977 int i; 1020 int i;
978 1021
979 if(nsems > SEMMSL_FAST) { 1022 if(nsems > SEMMSL_FAST) {
980 sem_getref_and_unlock(sma); 1023 sem_getref(sma);
981 1024
982 sem_io = ipc_alloc(sizeof(ushort)*nsems); 1025 sem_io = ipc_alloc(sizeof(ushort)*nsems);
983 if(sem_io == NULL) { 1026 if(sem_io == NULL) {
@@ -993,6 +1036,7 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
993 } 1036 }
994 } 1037 }
995 1038
1039 spin_lock(&sma->sem_perm.lock);
996 for (i = 0; i < sma->sem_nsems; i++) 1040 for (i = 0; i < sma->sem_nsems; i++)
997 sem_io[i] = sma->sem_base[i].semval; 1041 sem_io[i] = sma->sem_base[i].semval;
998 sem_unlock(sma); 1042 sem_unlock(sma);
@@ -1006,7 +1050,8 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
1006 int i; 1050 int i;
1007 struct sem_undo *un; 1051 struct sem_undo *un;
1008 1052
1009 sem_getref_and_unlock(sma); 1053 ipc_rcu_getref(sma);
1054 rcu_read_unlock();
1010 1055
1011 if(nsems > SEMMSL_FAST) { 1056 if(nsems > SEMMSL_FAST) {
1012 sem_io = ipc_alloc(sizeof(ushort)*nsems); 1057 sem_io = ipc_alloc(sizeof(ushort)*nsems);
@@ -1053,9 +1098,12 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
1053 /* GETVAL, GETPID, GETNCTN, GETZCNT: fall-through */ 1098 /* GETVAL, GETPID, GETNCTN, GETZCNT: fall-through */
1054 } 1099 }
1055 err = -EINVAL; 1100 err = -EINVAL;
1056 if(semnum < 0 || semnum >= nsems) 1101 if (semnum < 0 || semnum >= nsems) {
1057 goto out_unlock; 1102 rcu_read_unlock();
1103 goto out_wakeup;
1104 }
1058 1105
1106 spin_lock(&sma->sem_perm.lock);
1059 curr = &sma->sem_base[semnum]; 1107 curr = &sma->sem_base[semnum];
1060 1108
1061 switch (cmd) { 1109 switch (cmd) {
@@ -1072,10 +1120,11 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
1072 err = count_semzcnt(sma,semnum); 1120 err = count_semzcnt(sma,semnum);
1073 goto out_unlock; 1121 goto out_unlock;
1074 } 1122 }
1123
1075out_unlock: 1124out_unlock:
1076 sem_unlock(sma); 1125 sem_unlock(sma);
1126out_wakeup:
1077 wake_up_sem_queue_do(&tasks); 1127 wake_up_sem_queue_do(&tasks);
1078
1079out_free: 1128out_free:
1080 if(sem_io != fast_sem_io) 1129 if(sem_io != fast_sem_io)
1081 ipc_free(sem_io, sizeof(ushort)*nsems); 1130 ipc_free(sem_io, sizeof(ushort)*nsems);
@@ -1126,29 +1175,35 @@ static int semctl_down(struct ipc_namespace *ns, int semid,
1126 return -EFAULT; 1175 return -EFAULT;
1127 } 1176 }
1128 1177
1129 ipcp = ipcctl_pre_down(ns, &sem_ids(ns), semid, cmd, 1178 ipcp = ipcctl_pre_down_nolock(ns, &sem_ids(ns), semid, cmd,
1130 &semid64.sem_perm, 0); 1179 &semid64.sem_perm, 0);
1131 if (IS_ERR(ipcp)) 1180 if (IS_ERR(ipcp))
1132 return PTR_ERR(ipcp); 1181 return PTR_ERR(ipcp);
1133 1182
1134 sma = container_of(ipcp, struct sem_array, sem_perm); 1183 sma = container_of(ipcp, struct sem_array, sem_perm);
1135 1184
1136 err = security_sem_semctl(sma, cmd); 1185 err = security_sem_semctl(sma, cmd);
1137 if (err) 1186 if (err) {
1187 rcu_read_unlock();
1138 goto out_unlock; 1188 goto out_unlock;
1189 }
1139 1190
1140 switch(cmd){ 1191 switch(cmd){
1141 case IPC_RMID: 1192 case IPC_RMID:
1193 ipc_lock_object(&sma->sem_perm);
1142 freeary(ns, ipcp); 1194 freeary(ns, ipcp);
1143 goto out_up; 1195 goto out_up;
1144 case IPC_SET: 1196 case IPC_SET:
1197 ipc_lock_object(&sma->sem_perm);
1145 err = ipc_update_perm(&semid64.sem_perm, ipcp); 1198 err = ipc_update_perm(&semid64.sem_perm, ipcp);
1146 if (err) 1199 if (err)
1147 goto out_unlock; 1200 goto out_unlock;
1148 sma->sem_ctime = get_seconds(); 1201 sma->sem_ctime = get_seconds();
1149 break; 1202 break;
1150 default: 1203 default:
1204 rcu_read_unlock();
1151 err = -EINVAL; 1205 err = -EINVAL;
1206 goto out_up;
1152 } 1207 }
1153 1208
1154out_unlock: 1209out_unlock:
@@ -1277,16 +1332,18 @@ static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid)
1277 spin_unlock(&ulp->lock); 1332 spin_unlock(&ulp->lock);
1278 if (likely(un!=NULL)) 1333 if (likely(un!=NULL))
1279 goto out; 1334 goto out;
1280 rcu_read_unlock();
1281 1335
1282 /* no undo structure around - allocate one. */ 1336 /* no undo structure around - allocate one. */
1283 /* step 1: figure out the size of the semaphore array */ 1337 /* step 1: figure out the size of the semaphore array */
1284 sma = sem_lock_check(ns, semid); 1338 sma = sem_obtain_object_check(ns, semid);
1285 if (IS_ERR(sma)) 1339 if (IS_ERR(sma)) {
1340 rcu_read_unlock();
1286 return ERR_CAST(sma); 1341 return ERR_CAST(sma);
1342 }
1287 1343
1288 nsems = sma->sem_nsems; 1344 nsems = sma->sem_nsems;
1289 sem_getref_and_unlock(sma); 1345 ipc_rcu_getref(sma);
1346 rcu_read_unlock();
1290 1347
1291 /* step 2: allocate new undo structure */ 1348 /* step 2: allocate new undo structure */
1292 new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL); 1349 new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL);
@@ -1421,7 +1478,8 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
1421 1478
1422 INIT_LIST_HEAD(&tasks); 1479 INIT_LIST_HEAD(&tasks);
1423 1480
1424 sma = sem_lock_check(ns, semid); 1481 rcu_read_lock();
1482 sma = sem_obtain_object_check(ns, semid);
1425 if (IS_ERR(sma)) { 1483 if (IS_ERR(sma)) {
1426 if (un) 1484 if (un)
1427 rcu_read_unlock(); 1485 rcu_read_unlock();
@@ -1429,6 +1487,24 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
1429 goto out_free; 1487 goto out_free;
1430 } 1488 }
1431 1489
1490 error = -EFBIG;
1491 if (max >= sma->sem_nsems) {
1492 rcu_read_unlock();
1493 goto out_wakeup;
1494 }
1495
1496 error = -EACCES;
1497 if (ipcperms(ns, &sma->sem_perm, alter ? S_IWUGO : S_IRUGO)) {
1498 rcu_read_unlock();
1499 goto out_wakeup;
1500 }
1501
1502 error = security_sem_semop(sma, sops, nsops, alter);
1503 if (error) {
1504 rcu_read_unlock();
1505 goto out_wakeup;
1506 }
1507
1432 /* 1508 /*
1433 * semid identifiers are not unique - find_alloc_undo may have 1509 * semid identifiers are not unique - find_alloc_undo may have
1434 * allocated an undo structure, it was invalidated by an RMID 1510 * allocated an undo structure, it was invalidated by an RMID
@@ -1437,6 +1513,7 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
1437 * "un" itself is guaranteed by rcu. 1513 * "un" itself is guaranteed by rcu.
1438 */ 1514 */
1439 error = -EIDRM; 1515 error = -EIDRM;
1516 ipc_lock_object(&sma->sem_perm);
1440 if (un) { 1517 if (un) {
1441 if (un->semid == -1) { 1518 if (un->semid == -1) {
1442 rcu_read_unlock(); 1519 rcu_read_unlock();
@@ -1454,18 +1531,6 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
1454 } 1531 }
1455 } 1532 }
1456 1533
1457 error = -EFBIG;
1458 if (max >= sma->sem_nsems)
1459 goto out_unlock_free;
1460
1461 error = -EACCES;
1462 if (ipcperms(ns, &sma->sem_perm, alter ? S_IWUGO : S_IRUGO))
1463 goto out_unlock_free;
1464
1465 error = security_sem_semop(sma, sops, nsops, alter);
1466 if (error)
1467 goto out_unlock_free;
1468
1469 error = try_atomic_semop (sma, sops, nsops, un, task_tgid_vnr(current)); 1534 error = try_atomic_semop (sma, sops, nsops, un, task_tgid_vnr(current));
1470 if (error <= 0) { 1535 if (error <= 0) {
1471 if (alter && error == 0) 1536 if (alter && error == 0)
@@ -1568,7 +1633,7 @@ sleep_again:
1568 1633
1569out_unlock_free: 1634out_unlock_free:
1570 sem_unlock(sma); 1635 sem_unlock(sma);
1571 1636out_wakeup:
1572 wake_up_sem_queue_do(&tasks); 1637 wake_up_sem_queue_do(&tasks);
1573out_free: 1638out_free:
1574 if(sops != fast_sops) 1639 if(sops != fast_sops)
diff --git a/ipc/util.h b/ipc/util.h
index 13d92fea15a3..c36b9977c957 100644
--- a/ipc/util.h
+++ b/ipc/util.h
@@ -171,6 +171,11 @@ static inline void ipc_unlock(struct kern_ipc_perm *perm)
171 rcu_read_unlock(); 171 rcu_read_unlock();
172} 172}
173 173
174static inline void ipc_lock_object(struct kern_ipc_perm *perm)
175{
176 spin_lock(&perm->lock);
177}
178
174struct kern_ipc_perm *ipc_lock_check(struct ipc_ids *ids, int id); 179struct kern_ipc_perm *ipc_lock_check(struct ipc_ids *ids, int id);
175struct kern_ipc_perm *ipc_obtain_object_check(struct ipc_ids *ids, int id); 180struct kern_ipc_perm *ipc_obtain_object_check(struct ipc_ids *ids, int id);
176int ipcget(struct ipc_namespace *ns, struct ipc_ids *ids, 181int ipcget(struct ipc_namespace *ns, struct ipc_ids *ids,