diff options
author | Al Viro <viro@zeniv.linux.org.uk> | 2017-04-08 16:50:24 -0400 |
---|---|---|
committer | Al Viro <viro@zeniv.linux.org.uk> | 2017-04-17 12:52:22 -0400 |
commit | e99ca56ce03dd90991025878152bae8b53484147 (patch) | |
tree | 51c7a65ac9f03ed4f242b042ae7d7416bc4f1b0f /fs/select.c | |
parent | 2611dc1939569718c65ffd59c8fb9ba7474d026c (diff) |
move compat select-related syscalls to fs/select.c
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Diffstat (limited to 'fs/select.c')
-rw-r--r-- | fs/select.c | 421 |
1 files changed, 419 insertions, 2 deletions
diff --git a/fs/select.c b/fs/select.c index e2112270d75a..dd70937ddb60 100644 --- a/fs/select.c +++ b/fs/select.c | |||
@@ -338,6 +338,53 @@ sticky: | |||
338 | return ret; | 338 | return ret; |
339 | } | 339 | } |
340 | 340 | ||
341 | /* | ||
342 | * Scalable version of the fd_set. | ||
343 | */ | ||
344 | |||
345 | typedef struct { | ||
346 | unsigned long *in, *out, *ex; | ||
347 | unsigned long *res_in, *res_out, *res_ex; | ||
348 | } fd_set_bits; | ||
349 | |||
350 | /* | ||
351 | * How many longwords for "nr" bits? | ||
352 | */ | ||
353 | #define FDS_BITPERLONG (8*sizeof(long)) | ||
354 | #define FDS_LONGS(nr) (((nr)+FDS_BITPERLONG-1)/FDS_BITPERLONG) | ||
355 | #define FDS_BYTES(nr) (FDS_LONGS(nr)*sizeof(long)) | ||
356 | |||
357 | /* | ||
358 | * We do a VERIFY_WRITE here even though we are only reading this time: | ||
359 | * we'll write to it eventually.. | ||
360 | * | ||
361 | * Use "unsigned long" accesses to let user-mode fd_set's be long-aligned. | ||
362 | */ | ||
363 | static inline | ||
364 | int get_fd_set(unsigned long nr, void __user *ufdset, unsigned long *fdset) | ||
365 | { | ||
366 | nr = FDS_BYTES(nr); | ||
367 | if (ufdset) | ||
368 | return copy_from_user(fdset, ufdset, nr) ? -EFAULT : 0; | ||
369 | |||
370 | memset(fdset, 0, nr); | ||
371 | return 0; | ||
372 | } | ||
373 | |||
374 | static inline unsigned long __must_check | ||
375 | set_fd_set(unsigned long nr, void __user *ufdset, unsigned long *fdset) | ||
376 | { | ||
377 | if (ufdset) | ||
378 | return __copy_to_user(ufdset, fdset, FDS_BYTES(nr)); | ||
379 | return 0; | ||
380 | } | ||
381 | |||
382 | static inline | ||
383 | void zero_fd_set(unsigned long nr, unsigned long *fdset) | ||
384 | { | ||
385 | memset(fdset, 0, FDS_BYTES(nr)); | ||
386 | } | ||
387 | |||
341 | #define FDS_IN(fds, n) (fds->in + n) | 388 | #define FDS_IN(fds, n) (fds->in + n) |
342 | #define FDS_OUT(fds, n) (fds->out + n) | 389 | #define FDS_OUT(fds, n) (fds->out + n) |
343 | #define FDS_EX(fds, n) (fds->ex + n) | 390 | #define FDS_EX(fds, n) (fds->ex + n) |
@@ -401,7 +448,7 @@ static inline void wait_key_set(poll_table *wait, unsigned long in, | |||
401 | wait->_key |= POLLOUT_SET; | 448 | wait->_key |= POLLOUT_SET; |
402 | } | 449 | } |
403 | 450 | ||
404 | int do_select(int n, fd_set_bits *fds, struct timespec64 *end_time) | 451 | static int do_select(int n, fd_set_bits *fds, struct timespec64 *end_time) |
405 | { | 452 | { |
406 | ktime_t expire, *to = NULL; | 453 | ktime_t expire, *to = NULL; |
407 | struct poll_wqueues table; | 454 | struct poll_wqueues table; |
@@ -881,7 +928,7 @@ static int do_poll(struct poll_list *list, struct poll_wqueues *wait, | |||
881 | #define N_STACK_PPS ((sizeof(stack_pps) - sizeof(struct poll_list)) / \ | 928 | #define N_STACK_PPS ((sizeof(stack_pps) - sizeof(struct poll_list)) / \ |
882 | sizeof(struct pollfd)) | 929 | sizeof(struct pollfd)) |
883 | 930 | ||
884 | int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds, | 931 | static int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds, |
885 | struct timespec64 *end_time) | 932 | struct timespec64 *end_time) |
886 | { | 933 | { |
887 | struct poll_wqueues table; | 934 | struct poll_wqueues table; |
@@ -1053,3 +1100,373 @@ SYSCALL_DEFINE5(ppoll, struct pollfd __user *, ufds, unsigned int, nfds, | |||
1053 | 1100 | ||
1054 | return ret; | 1101 | return ret; |
1055 | } | 1102 | } |
1103 | |||
1104 | #ifdef CONFIG_COMPAT | ||
1105 | #define __COMPAT_NFDBITS (8 * sizeof(compat_ulong_t)) | ||
1106 | |||
1107 | static | ||
1108 | int compat_poll_select_copy_remaining(struct timespec *end_time, void __user *p, | ||
1109 | int timeval, int ret) | ||
1110 | { | ||
1111 | struct timespec ts; | ||
1112 | |||
1113 | if (!p) | ||
1114 | return ret; | ||
1115 | |||
1116 | if (current->personality & STICKY_TIMEOUTS) | ||
1117 | goto sticky; | ||
1118 | |||
1119 | /* No update for zero timeout */ | ||
1120 | if (!end_time->tv_sec && !end_time->tv_nsec) | ||
1121 | return ret; | ||
1122 | |||
1123 | ktime_get_ts(&ts); | ||
1124 | ts = timespec_sub(*end_time, ts); | ||
1125 | if (ts.tv_sec < 0) | ||
1126 | ts.tv_sec = ts.tv_nsec = 0; | ||
1127 | |||
1128 | if (timeval) { | ||
1129 | struct compat_timeval rtv; | ||
1130 | |||
1131 | rtv.tv_sec = ts.tv_sec; | ||
1132 | rtv.tv_usec = ts.tv_nsec / NSEC_PER_USEC; | ||
1133 | |||
1134 | if (!copy_to_user(p, &rtv, sizeof(rtv))) | ||
1135 | return ret; | ||
1136 | } else { | ||
1137 | struct compat_timespec rts; | ||
1138 | |||
1139 | rts.tv_sec = ts.tv_sec; | ||
1140 | rts.tv_nsec = ts.tv_nsec; | ||
1141 | |||
1142 | if (!copy_to_user(p, &rts, sizeof(rts))) | ||
1143 | return ret; | ||
1144 | } | ||
1145 | /* | ||
1146 | * If an application puts its timeval in read-only memory, we | ||
1147 | * don't want the Linux-specific update to the timeval to | ||
1148 | * cause a fault after the select has completed | ||
1149 | * successfully. However, because we're not updating the | ||
1150 | * timeval, we can't restart the system call. | ||
1151 | */ | ||
1152 | |||
1153 | sticky: | ||
1154 | if (ret == -ERESTARTNOHAND) | ||
1155 | ret = -EINTR; | ||
1156 | return ret; | ||
1157 | } | ||
1158 | |||
1159 | /* | ||
1160 | * Ooo, nasty. We need here to frob 32-bit unsigned longs to | ||
1161 | * 64-bit unsigned longs. | ||
1162 | */ | ||
1163 | static | ||
1164 | int compat_get_fd_set(unsigned long nr, compat_ulong_t __user *ufdset, | ||
1165 | unsigned long *fdset) | ||
1166 | { | ||
1167 | nr = DIV_ROUND_UP(nr, __COMPAT_NFDBITS); | ||
1168 | if (ufdset) { | ||
1169 | unsigned long odd; | ||
1170 | |||
1171 | if (!access_ok(VERIFY_WRITE, ufdset, nr*sizeof(compat_ulong_t))) | ||
1172 | return -EFAULT; | ||
1173 | |||
1174 | odd = nr & 1UL; | ||
1175 | nr &= ~1UL; | ||
1176 | while (nr) { | ||
1177 | unsigned long h, l; | ||
1178 | if (__get_user(l, ufdset) || __get_user(h, ufdset+1)) | ||
1179 | return -EFAULT; | ||
1180 | ufdset += 2; | ||
1181 | *fdset++ = h << 32 | l; | ||
1182 | nr -= 2; | ||
1183 | } | ||
1184 | if (odd && __get_user(*fdset, ufdset)) | ||
1185 | return -EFAULT; | ||
1186 | } else { | ||
1187 | /* Tricky, must clear full unsigned long in the | ||
1188 | * kernel fdset at the end, this makes sure that | ||
1189 | * actually happens. | ||
1190 | */ | ||
1191 | memset(fdset, 0, ((nr + 1) & ~1)*sizeof(compat_ulong_t)); | ||
1192 | } | ||
1193 | return 0; | ||
1194 | } | ||
1195 | |||
1196 | static | ||
1197 | int compat_set_fd_set(unsigned long nr, compat_ulong_t __user *ufdset, | ||
1198 | unsigned long *fdset) | ||
1199 | { | ||
1200 | unsigned long odd; | ||
1201 | nr = DIV_ROUND_UP(nr, __COMPAT_NFDBITS); | ||
1202 | |||
1203 | if (!ufdset) | ||
1204 | return 0; | ||
1205 | |||
1206 | odd = nr & 1UL; | ||
1207 | nr &= ~1UL; | ||
1208 | while (nr) { | ||
1209 | unsigned long h, l; | ||
1210 | l = *fdset++; | ||
1211 | h = l >> 32; | ||
1212 | if (__put_user(l, ufdset) || __put_user(h, ufdset+1)) | ||
1213 | return -EFAULT; | ||
1214 | ufdset += 2; | ||
1215 | nr -= 2; | ||
1216 | } | ||
1217 | if (odd && __put_user(*fdset, ufdset)) | ||
1218 | return -EFAULT; | ||
1219 | return 0; | ||
1220 | } | ||
1221 | |||
1222 | |||
1223 | /* | ||
1224 | * This is a virtual copy of sys_select from fs/select.c and probably | ||
1225 | * should be compared to it from time to time | ||
1226 | */ | ||
1227 | |||
1228 | /* | ||
1229 | * We can actually return ERESTARTSYS instead of EINTR, but I'd | ||
1230 | * like to be certain this leads to no problems. So I return | ||
1231 | * EINTR just for safety. | ||
1232 | * | ||
1233 | * Update: ERESTARTSYS breaks at least the xview clock binary, so | ||
1234 | * I'm trying ERESTARTNOHAND which restart only when you want to. | ||
1235 | */ | ||
1236 | static int compat_core_sys_select(int n, compat_ulong_t __user *inp, | ||
1237 | compat_ulong_t __user *outp, compat_ulong_t __user *exp, | ||
1238 | struct timespec *end_time) | ||
1239 | { | ||
1240 | fd_set_bits fds; | ||
1241 | void *bits; | ||
1242 | int size, max_fds, ret = -EINVAL; | ||
1243 | struct fdtable *fdt; | ||
1244 | long stack_fds[SELECT_STACK_ALLOC/sizeof(long)]; | ||
1245 | |||
1246 | if (n < 0) | ||
1247 | goto out_nofds; | ||
1248 | |||
1249 | /* max_fds can increase, so grab it once to avoid race */ | ||
1250 | rcu_read_lock(); | ||
1251 | fdt = files_fdtable(current->files); | ||
1252 | max_fds = fdt->max_fds; | ||
1253 | rcu_read_unlock(); | ||
1254 | if (n > max_fds) | ||
1255 | n = max_fds; | ||
1256 | |||
1257 | /* | ||
1258 | * We need 6 bitmaps (in/out/ex for both incoming and outgoing), | ||
1259 | * since we used fdset we need to allocate memory in units of | ||
1260 | * long-words. | ||
1261 | */ | ||
1262 | size = FDS_BYTES(n); | ||
1263 | bits = stack_fds; | ||
1264 | if (size > sizeof(stack_fds) / 6) { | ||
1265 | bits = kmalloc(6 * size, GFP_KERNEL); | ||
1266 | ret = -ENOMEM; | ||
1267 | if (!bits) | ||
1268 | goto out_nofds; | ||
1269 | } | ||
1270 | fds.in = (unsigned long *) bits; | ||
1271 | fds.out = (unsigned long *) (bits + size); | ||
1272 | fds.ex = (unsigned long *) (bits + 2*size); | ||
1273 | fds.res_in = (unsigned long *) (bits + 3*size); | ||
1274 | fds.res_out = (unsigned long *) (bits + 4*size); | ||
1275 | fds.res_ex = (unsigned long *) (bits + 5*size); | ||
1276 | |||
1277 | if ((ret = compat_get_fd_set(n, inp, fds.in)) || | ||
1278 | (ret = compat_get_fd_set(n, outp, fds.out)) || | ||
1279 | (ret = compat_get_fd_set(n, exp, fds.ex))) | ||
1280 | goto out; | ||
1281 | zero_fd_set(n, fds.res_in); | ||
1282 | zero_fd_set(n, fds.res_out); | ||
1283 | zero_fd_set(n, fds.res_ex); | ||
1284 | |||
1285 | ret = do_select(n, &fds, end_time); | ||
1286 | |||
1287 | if (ret < 0) | ||
1288 | goto out; | ||
1289 | if (!ret) { | ||
1290 | ret = -ERESTARTNOHAND; | ||
1291 | if (signal_pending(current)) | ||
1292 | goto out; | ||
1293 | ret = 0; | ||
1294 | } | ||
1295 | |||
1296 | if (compat_set_fd_set(n, inp, fds.res_in) || | ||
1297 | compat_set_fd_set(n, outp, fds.res_out) || | ||
1298 | compat_set_fd_set(n, exp, fds.res_ex)) | ||
1299 | ret = -EFAULT; | ||
1300 | out: | ||
1301 | if (bits != stack_fds) | ||
1302 | kfree(bits); | ||
1303 | out_nofds: | ||
1304 | return ret; | ||
1305 | } | ||
1306 | |||
1307 | COMPAT_SYSCALL_DEFINE5(select, int, n, compat_ulong_t __user *, inp, | ||
1308 | compat_ulong_t __user *, outp, compat_ulong_t __user *, exp, | ||
1309 | struct compat_timeval __user *, tvp) | ||
1310 | { | ||
1311 | struct timespec end_time, *to = NULL; | ||
1312 | struct compat_timeval tv; | ||
1313 | int ret; | ||
1314 | |||
1315 | if (tvp) { | ||
1316 | if (copy_from_user(&tv, tvp, sizeof(tv))) | ||
1317 | return -EFAULT; | ||
1318 | |||
1319 | to = &end_time; | ||
1320 | if (poll_select_set_timeout(to, | ||
1321 | tv.tv_sec + (tv.tv_usec / USEC_PER_SEC), | ||
1322 | (tv.tv_usec % USEC_PER_SEC) * NSEC_PER_USEC)) | ||
1323 | return -EINVAL; | ||
1324 | } | ||
1325 | |||
1326 | ret = compat_core_sys_select(n, inp, outp, exp, to); | ||
1327 | ret = compat_poll_select_copy_remaining(&end_time, tvp, 1, ret); | ||
1328 | |||
1329 | return ret; | ||
1330 | } | ||
1331 | |||
1332 | struct compat_sel_arg_struct { | ||
1333 | compat_ulong_t n; | ||
1334 | compat_uptr_t inp; | ||
1335 | compat_uptr_t outp; | ||
1336 | compat_uptr_t exp; | ||
1337 | compat_uptr_t tvp; | ||
1338 | }; | ||
1339 | |||
1340 | COMPAT_SYSCALL_DEFINE1(old_select, struct compat_sel_arg_struct __user *, arg) | ||
1341 | { | ||
1342 | struct compat_sel_arg_struct a; | ||
1343 | |||
1344 | if (copy_from_user(&a, arg, sizeof(a))) | ||
1345 | return -EFAULT; | ||
1346 | return compat_sys_select(a.n, compat_ptr(a.inp), compat_ptr(a.outp), | ||
1347 | compat_ptr(a.exp), compat_ptr(a.tvp)); | ||
1348 | } | ||
1349 | |||
1350 | static long do_compat_pselect(int n, compat_ulong_t __user *inp, | ||
1351 | compat_ulong_t __user *outp, compat_ulong_t __user *exp, | ||
1352 | struct compat_timespec __user *tsp, compat_sigset_t __user *sigmask, | ||
1353 | compat_size_t sigsetsize) | ||
1354 | { | ||
1355 | compat_sigset_t ss32; | ||
1356 | sigset_t ksigmask, sigsaved; | ||
1357 | struct compat_timespec ts; | ||
1358 | struct timespec end_time, *to = NULL; | ||
1359 | int ret; | ||
1360 | |||
1361 | if (tsp) { | ||
1362 | if (copy_from_user(&ts, tsp, sizeof(ts))) | ||
1363 | return -EFAULT; | ||
1364 | |||
1365 | to = &end_time; | ||
1366 | if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec)) | ||
1367 | return -EINVAL; | ||
1368 | } | ||
1369 | |||
1370 | if (sigmask) { | ||
1371 | if (sigsetsize != sizeof(compat_sigset_t)) | ||
1372 | return -EINVAL; | ||
1373 | if (copy_from_user(&ss32, sigmask, sizeof(ss32))) | ||
1374 | return -EFAULT; | ||
1375 | sigset_from_compat(&ksigmask, &ss32); | ||
1376 | |||
1377 | sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP)); | ||
1378 | sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); | ||
1379 | } | ||
1380 | |||
1381 | ret = compat_core_sys_select(n, inp, outp, exp, to); | ||
1382 | ret = compat_poll_select_copy_remaining(&end_time, tsp, 0, ret); | ||
1383 | |||
1384 | if (ret == -ERESTARTNOHAND) { | ||
1385 | /* | ||
1386 | * Don't restore the signal mask yet. Let do_signal() deliver | ||
1387 | * the signal on the way back to userspace, before the signal | ||
1388 | * mask is restored. | ||
1389 | */ | ||
1390 | if (sigmask) { | ||
1391 | memcpy(¤t->saved_sigmask, &sigsaved, | ||
1392 | sizeof(sigsaved)); | ||
1393 | set_restore_sigmask(); | ||
1394 | } | ||
1395 | } else if (sigmask) | ||
1396 | sigprocmask(SIG_SETMASK, &sigsaved, NULL); | ||
1397 | |||
1398 | return ret; | ||
1399 | } | ||
1400 | |||
1401 | COMPAT_SYSCALL_DEFINE6(pselect6, int, n, compat_ulong_t __user *, inp, | ||
1402 | compat_ulong_t __user *, outp, compat_ulong_t __user *, exp, | ||
1403 | struct compat_timespec __user *, tsp, void __user *, sig) | ||
1404 | { | ||
1405 | compat_size_t sigsetsize = 0; | ||
1406 | compat_uptr_t up = 0; | ||
1407 | |||
1408 | if (sig) { | ||
1409 | if (!access_ok(VERIFY_READ, sig, | ||
1410 | sizeof(compat_uptr_t)+sizeof(compat_size_t)) || | ||
1411 | __get_user(up, (compat_uptr_t __user *)sig) || | ||
1412 | __get_user(sigsetsize, | ||
1413 | (compat_size_t __user *)(sig+sizeof(up)))) | ||
1414 | return -EFAULT; | ||
1415 | } | ||
1416 | return do_compat_pselect(n, inp, outp, exp, tsp, compat_ptr(up), | ||
1417 | sigsetsize); | ||
1418 | } | ||
1419 | |||
1420 | COMPAT_SYSCALL_DEFINE5(ppoll, struct pollfd __user *, ufds, | ||
1421 | unsigned int, nfds, struct compat_timespec __user *, tsp, | ||
1422 | const compat_sigset_t __user *, sigmask, compat_size_t, sigsetsize) | ||
1423 | { | ||
1424 | compat_sigset_t ss32; | ||
1425 | sigset_t ksigmask, sigsaved; | ||
1426 | struct compat_timespec ts; | ||
1427 | struct timespec end_time, *to = NULL; | ||
1428 | int ret; | ||
1429 | |||
1430 | if (tsp) { | ||
1431 | if (copy_from_user(&ts, tsp, sizeof(ts))) | ||
1432 | return -EFAULT; | ||
1433 | |||
1434 | to = &end_time; | ||
1435 | if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec)) | ||
1436 | return -EINVAL; | ||
1437 | } | ||
1438 | |||
1439 | if (sigmask) { | ||
1440 | if (sigsetsize != sizeof(compat_sigset_t)) | ||
1441 | return -EINVAL; | ||
1442 | if (copy_from_user(&ss32, sigmask, sizeof(ss32))) | ||
1443 | return -EFAULT; | ||
1444 | sigset_from_compat(&ksigmask, &ss32); | ||
1445 | |||
1446 | sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP)); | ||
1447 | sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); | ||
1448 | } | ||
1449 | |||
1450 | ret = do_sys_poll(ufds, nfds, to); | ||
1451 | |||
1452 | /* We can restart this syscall, usually */ | ||
1453 | if (ret == -EINTR) { | ||
1454 | /* | ||
1455 | * Don't restore the signal mask yet. Let do_signal() deliver | ||
1456 | * the signal on the way back to userspace, before the signal | ||
1457 | * mask is restored. | ||
1458 | */ | ||
1459 | if (sigmask) { | ||
1460 | memcpy(¤t->saved_sigmask, &sigsaved, | ||
1461 | sizeof(sigsaved)); | ||
1462 | set_restore_sigmask(); | ||
1463 | } | ||
1464 | ret = -ERESTARTNOHAND; | ||
1465 | } else if (sigmask) | ||
1466 | sigprocmask(SIG_SETMASK, &sigsaved, NULL); | ||
1467 | |||
1468 | ret = compat_poll_select_copy_remaining(&end_time, tsp, 0, ret); | ||
1469 | |||
1470 | return ret; | ||
1471 | } | ||
1472 | #endif | ||