summaryrefslogtreecommitdiffstats
path: root/fs/select.c
diff options
context:
space:
mode:
authorAl Viro <viro@zeniv.linux.org.uk>2017-04-08 16:50:24 -0400
committerAl Viro <viro@zeniv.linux.org.uk>2017-04-17 12:52:22 -0400
commite99ca56ce03dd90991025878152bae8b53484147 (patch)
tree51c7a65ac9f03ed4f242b042ae7d7416bc4f1b0f /fs/select.c
parent2611dc1939569718c65ffd59c8fb9ba7474d026c (diff)
move compat select-related syscalls to fs/select.c
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Diffstat (limited to 'fs/select.c')
-rw-r--r--fs/select.c421
1 files changed, 419 insertions, 2 deletions
diff --git a/fs/select.c b/fs/select.c
index e2112270d75a..dd70937ddb60 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -338,6 +338,53 @@ sticky:
338 return ret; 338 return ret;
339} 339}
340 340
341/*
342 * Scalable version of the fd_set.
343 */
344
345typedef struct {
346 unsigned long *in, *out, *ex;
347 unsigned long *res_in, *res_out, *res_ex;
348} fd_set_bits;
349
350/*
351 * How many longwords for "nr" bits?
352 */
353#define FDS_BITPERLONG (8*sizeof(long))
354#define FDS_LONGS(nr) (((nr)+FDS_BITPERLONG-1)/FDS_BITPERLONG)
355#define FDS_BYTES(nr) (FDS_LONGS(nr)*sizeof(long))
356
357/*
358 * We do a VERIFY_WRITE here even though we are only reading this time:
359 * we'll write to it eventually..
360 *
361 * Use "unsigned long" accesses to let user-mode fd_set's be long-aligned.
362 */
363static inline
364int get_fd_set(unsigned long nr, void __user *ufdset, unsigned long *fdset)
365{
366 nr = FDS_BYTES(nr);
367 if (ufdset)
368 return copy_from_user(fdset, ufdset, nr) ? -EFAULT : 0;
369
370 memset(fdset, 0, nr);
371 return 0;
372}
373
374static inline unsigned long __must_check
375set_fd_set(unsigned long nr, void __user *ufdset, unsigned long *fdset)
376{
377 if (ufdset)
378 return __copy_to_user(ufdset, fdset, FDS_BYTES(nr));
379 return 0;
380}
381
382static inline
383void zero_fd_set(unsigned long nr, unsigned long *fdset)
384{
385 memset(fdset, 0, FDS_BYTES(nr));
386}
387
341#define FDS_IN(fds, n) (fds->in + n) 388#define FDS_IN(fds, n) (fds->in + n)
342#define FDS_OUT(fds, n) (fds->out + n) 389#define FDS_OUT(fds, n) (fds->out + n)
343#define FDS_EX(fds, n) (fds->ex + n) 390#define FDS_EX(fds, n) (fds->ex + n)
@@ -401,7 +448,7 @@ static inline void wait_key_set(poll_table *wait, unsigned long in,
401 wait->_key |= POLLOUT_SET; 448 wait->_key |= POLLOUT_SET;
402} 449}
403 450
404int do_select(int n, fd_set_bits *fds, struct timespec64 *end_time) 451static int do_select(int n, fd_set_bits *fds, struct timespec64 *end_time)
405{ 452{
406 ktime_t expire, *to = NULL; 453 ktime_t expire, *to = NULL;
407 struct poll_wqueues table; 454 struct poll_wqueues table;
@@ -881,7 +928,7 @@ static int do_poll(struct poll_list *list, struct poll_wqueues *wait,
881#define N_STACK_PPS ((sizeof(stack_pps) - sizeof(struct poll_list)) / \ 928#define N_STACK_PPS ((sizeof(stack_pps) - sizeof(struct poll_list)) / \
882 sizeof(struct pollfd)) 929 sizeof(struct pollfd))
883 930
884int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds, 931static int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds,
885 struct timespec64 *end_time) 932 struct timespec64 *end_time)
886{ 933{
887 struct poll_wqueues table; 934 struct poll_wqueues table;
@@ -1053,3 +1100,373 @@ SYSCALL_DEFINE5(ppoll, struct pollfd __user *, ufds, unsigned int, nfds,
1053 1100
1054 return ret; 1101 return ret;
1055} 1102}
1103
1104#ifdef CONFIG_COMPAT
1105#define __COMPAT_NFDBITS (8 * sizeof(compat_ulong_t))
1106
1107static
1108int compat_poll_select_copy_remaining(struct timespec *end_time, void __user *p,
1109 int timeval, int ret)
1110{
1111 struct timespec ts;
1112
1113 if (!p)
1114 return ret;
1115
1116 if (current->personality & STICKY_TIMEOUTS)
1117 goto sticky;
1118
1119 /* No update for zero timeout */
1120 if (!end_time->tv_sec && !end_time->tv_nsec)
1121 return ret;
1122
1123 ktime_get_ts(&ts);
1124 ts = timespec_sub(*end_time, ts);
1125 if (ts.tv_sec < 0)
1126 ts.tv_sec = ts.tv_nsec = 0;
1127
1128 if (timeval) {
1129 struct compat_timeval rtv;
1130
1131 rtv.tv_sec = ts.tv_sec;
1132 rtv.tv_usec = ts.tv_nsec / NSEC_PER_USEC;
1133
1134 if (!copy_to_user(p, &rtv, sizeof(rtv)))
1135 return ret;
1136 } else {
1137 struct compat_timespec rts;
1138
1139 rts.tv_sec = ts.tv_sec;
1140 rts.tv_nsec = ts.tv_nsec;
1141
1142 if (!copy_to_user(p, &rts, sizeof(rts)))
1143 return ret;
1144 }
1145 /*
1146 * If an application puts its timeval in read-only memory, we
1147 * don't want the Linux-specific update to the timeval to
1148 * cause a fault after the select has completed
1149 * successfully. However, because we're not updating the
1150 * timeval, we can't restart the system call.
1151 */
1152
1153sticky:
1154 if (ret == -ERESTARTNOHAND)
1155 ret = -EINTR;
1156 return ret;
1157}
1158
1159/*
1160 * Ooo, nasty. We need here to frob 32-bit unsigned longs to
1161 * 64-bit unsigned longs.
1162 */
1163static
1164int compat_get_fd_set(unsigned long nr, compat_ulong_t __user *ufdset,
1165 unsigned long *fdset)
1166{
1167 nr = DIV_ROUND_UP(nr, __COMPAT_NFDBITS);
1168 if (ufdset) {
1169 unsigned long odd;
1170
1171 if (!access_ok(VERIFY_WRITE, ufdset, nr*sizeof(compat_ulong_t)))
1172 return -EFAULT;
1173
1174 odd = nr & 1UL;
1175 nr &= ~1UL;
1176 while (nr) {
1177 unsigned long h, l;
1178 if (__get_user(l, ufdset) || __get_user(h, ufdset+1))
1179 return -EFAULT;
1180 ufdset += 2;
1181 *fdset++ = h << 32 | l;
1182 nr -= 2;
1183 }
1184 if (odd && __get_user(*fdset, ufdset))
1185 return -EFAULT;
1186 } else {
1187 /* Tricky, must clear full unsigned long in the
1188 * kernel fdset at the end, this makes sure that
1189 * actually happens.
1190 */
1191 memset(fdset, 0, ((nr + 1) & ~1)*sizeof(compat_ulong_t));
1192 }
1193 return 0;
1194}
1195
1196static
1197int compat_set_fd_set(unsigned long nr, compat_ulong_t __user *ufdset,
1198 unsigned long *fdset)
1199{
1200 unsigned long odd;
1201 nr = DIV_ROUND_UP(nr, __COMPAT_NFDBITS);
1202
1203 if (!ufdset)
1204 return 0;
1205
1206 odd = nr & 1UL;
1207 nr &= ~1UL;
1208 while (nr) {
1209 unsigned long h, l;
1210 l = *fdset++;
1211 h = l >> 32;
1212 if (__put_user(l, ufdset) || __put_user(h, ufdset+1))
1213 return -EFAULT;
1214 ufdset += 2;
1215 nr -= 2;
1216 }
1217 if (odd && __put_user(*fdset, ufdset))
1218 return -EFAULT;
1219 return 0;
1220}
1221
1222
1223/*
1224 * This is a virtual copy of sys_select from fs/select.c and probably
1225 * should be compared to it from time to time
1226 */
1227
1228/*
1229 * We can actually return ERESTARTSYS instead of EINTR, but I'd
1230 * like to be certain this leads to no problems. So I return
1231 * EINTR just for safety.
1232 *
1233 * Update: ERESTARTSYS breaks at least the xview clock binary, so
1234 * I'm trying ERESTARTNOHAND which restart only when you want to.
1235 */
1236static int compat_core_sys_select(int n, compat_ulong_t __user *inp,
1237 compat_ulong_t __user *outp, compat_ulong_t __user *exp,
1238 struct timespec *end_time)
1239{
1240 fd_set_bits fds;
1241 void *bits;
1242 int size, max_fds, ret = -EINVAL;
1243 struct fdtable *fdt;
1244 long stack_fds[SELECT_STACK_ALLOC/sizeof(long)];
1245
1246 if (n < 0)
1247 goto out_nofds;
1248
1249 /* max_fds can increase, so grab it once to avoid race */
1250 rcu_read_lock();
1251 fdt = files_fdtable(current->files);
1252 max_fds = fdt->max_fds;
1253 rcu_read_unlock();
1254 if (n > max_fds)
1255 n = max_fds;
1256
1257 /*
1258 * We need 6 bitmaps (in/out/ex for both incoming and outgoing),
1259 * since we used fdset we need to allocate memory in units of
1260 * long-words.
1261 */
1262 size = FDS_BYTES(n);
1263 bits = stack_fds;
1264 if (size > sizeof(stack_fds) / 6) {
1265 bits = kmalloc(6 * size, GFP_KERNEL);
1266 ret = -ENOMEM;
1267 if (!bits)
1268 goto out_nofds;
1269 }
1270 fds.in = (unsigned long *) bits;
1271 fds.out = (unsigned long *) (bits + size);
1272 fds.ex = (unsigned long *) (bits + 2*size);
1273 fds.res_in = (unsigned long *) (bits + 3*size);
1274 fds.res_out = (unsigned long *) (bits + 4*size);
1275 fds.res_ex = (unsigned long *) (bits + 5*size);
1276
1277 if ((ret = compat_get_fd_set(n, inp, fds.in)) ||
1278 (ret = compat_get_fd_set(n, outp, fds.out)) ||
1279 (ret = compat_get_fd_set(n, exp, fds.ex)))
1280 goto out;
1281 zero_fd_set(n, fds.res_in);
1282 zero_fd_set(n, fds.res_out);
1283 zero_fd_set(n, fds.res_ex);
1284
1285 ret = do_select(n, &fds, end_time);
1286
1287 if (ret < 0)
1288 goto out;
1289 if (!ret) {
1290 ret = -ERESTARTNOHAND;
1291 if (signal_pending(current))
1292 goto out;
1293 ret = 0;
1294 }
1295
1296 if (compat_set_fd_set(n, inp, fds.res_in) ||
1297 compat_set_fd_set(n, outp, fds.res_out) ||
1298 compat_set_fd_set(n, exp, fds.res_ex))
1299 ret = -EFAULT;
1300out:
1301 if (bits != stack_fds)
1302 kfree(bits);
1303out_nofds:
1304 return ret;
1305}
1306
1307COMPAT_SYSCALL_DEFINE5(select, int, n, compat_ulong_t __user *, inp,
1308 compat_ulong_t __user *, outp, compat_ulong_t __user *, exp,
1309 struct compat_timeval __user *, tvp)
1310{
1311 struct timespec end_time, *to = NULL;
1312 struct compat_timeval tv;
1313 int ret;
1314
1315 if (tvp) {
1316 if (copy_from_user(&tv, tvp, sizeof(tv)))
1317 return -EFAULT;
1318
1319 to = &end_time;
1320 if (poll_select_set_timeout(to,
1321 tv.tv_sec + (tv.tv_usec / USEC_PER_SEC),
1322 (tv.tv_usec % USEC_PER_SEC) * NSEC_PER_USEC))
1323 return -EINVAL;
1324 }
1325
1326 ret = compat_core_sys_select(n, inp, outp, exp, to);
1327 ret = compat_poll_select_copy_remaining(&end_time, tvp, 1, ret);
1328
1329 return ret;
1330}
1331
1332struct compat_sel_arg_struct {
1333 compat_ulong_t n;
1334 compat_uptr_t inp;
1335 compat_uptr_t outp;
1336 compat_uptr_t exp;
1337 compat_uptr_t tvp;
1338};
1339
1340COMPAT_SYSCALL_DEFINE1(old_select, struct compat_sel_arg_struct __user *, arg)
1341{
1342 struct compat_sel_arg_struct a;
1343
1344 if (copy_from_user(&a, arg, sizeof(a)))
1345 return -EFAULT;
1346 return compat_sys_select(a.n, compat_ptr(a.inp), compat_ptr(a.outp),
1347 compat_ptr(a.exp), compat_ptr(a.tvp));
1348}
1349
1350static long do_compat_pselect(int n, compat_ulong_t __user *inp,
1351 compat_ulong_t __user *outp, compat_ulong_t __user *exp,
1352 struct compat_timespec __user *tsp, compat_sigset_t __user *sigmask,
1353 compat_size_t sigsetsize)
1354{
1355 compat_sigset_t ss32;
1356 sigset_t ksigmask, sigsaved;
1357 struct compat_timespec ts;
1358 struct timespec end_time, *to = NULL;
1359 int ret;
1360
1361 if (tsp) {
1362 if (copy_from_user(&ts, tsp, sizeof(ts)))
1363 return -EFAULT;
1364
1365 to = &end_time;
1366 if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
1367 return -EINVAL;
1368 }
1369
1370 if (sigmask) {
1371 if (sigsetsize != sizeof(compat_sigset_t))
1372 return -EINVAL;
1373 if (copy_from_user(&ss32, sigmask, sizeof(ss32)))
1374 return -EFAULT;
1375 sigset_from_compat(&ksigmask, &ss32);
1376
1377 sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP));
1378 sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
1379 }
1380
1381 ret = compat_core_sys_select(n, inp, outp, exp, to);
1382 ret = compat_poll_select_copy_remaining(&end_time, tsp, 0, ret);
1383
1384 if (ret == -ERESTARTNOHAND) {
1385 /*
1386 * Don't restore the signal mask yet. Let do_signal() deliver
1387 * the signal on the way back to userspace, before the signal
1388 * mask is restored.
1389 */
1390 if (sigmask) {
1391 memcpy(&current->saved_sigmask, &sigsaved,
1392 sizeof(sigsaved));
1393 set_restore_sigmask();
1394 }
1395 } else if (sigmask)
1396 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
1397
1398 return ret;
1399}
1400
1401COMPAT_SYSCALL_DEFINE6(pselect6, int, n, compat_ulong_t __user *, inp,
1402 compat_ulong_t __user *, outp, compat_ulong_t __user *, exp,
1403 struct compat_timespec __user *, tsp, void __user *, sig)
1404{
1405 compat_size_t sigsetsize = 0;
1406 compat_uptr_t up = 0;
1407
1408 if (sig) {
1409 if (!access_ok(VERIFY_READ, sig,
1410 sizeof(compat_uptr_t)+sizeof(compat_size_t)) ||
1411 __get_user(up, (compat_uptr_t __user *)sig) ||
1412 __get_user(sigsetsize,
1413 (compat_size_t __user *)(sig+sizeof(up))))
1414 return -EFAULT;
1415 }
1416 return do_compat_pselect(n, inp, outp, exp, tsp, compat_ptr(up),
1417 sigsetsize);
1418}
1419
1420COMPAT_SYSCALL_DEFINE5(ppoll, struct pollfd __user *, ufds,
1421 unsigned int, nfds, struct compat_timespec __user *, tsp,
1422 const compat_sigset_t __user *, sigmask, compat_size_t, sigsetsize)
1423{
1424 compat_sigset_t ss32;
1425 sigset_t ksigmask, sigsaved;
1426 struct compat_timespec ts;
1427 struct timespec end_time, *to = NULL;
1428 int ret;
1429
1430 if (tsp) {
1431 if (copy_from_user(&ts, tsp, sizeof(ts)))
1432 return -EFAULT;
1433
1434 to = &end_time;
1435 if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
1436 return -EINVAL;
1437 }
1438
1439 if (sigmask) {
1440 if (sigsetsize != sizeof(compat_sigset_t))
1441 return -EINVAL;
1442 if (copy_from_user(&ss32, sigmask, sizeof(ss32)))
1443 return -EFAULT;
1444 sigset_from_compat(&ksigmask, &ss32);
1445
1446 sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP));
1447 sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
1448 }
1449
1450 ret = do_sys_poll(ufds, nfds, to);
1451
1452 /* We can restart this syscall, usually */
1453 if (ret == -EINTR) {
1454 /*
1455 * Don't restore the signal mask yet. Let do_signal() deliver
1456 * the signal on the way back to userspace, before the signal
1457 * mask is restored.
1458 */
1459 if (sigmask) {
1460 memcpy(&current->saved_sigmask, &sigsaved,
1461 sizeof(sigsaved));
1462 set_restore_sigmask();
1463 }
1464 ret = -ERESTARTNOHAND;
1465 } else if (sigmask)
1466 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
1467
1468 ret = compat_poll_select_copy_remaining(&end_time, tsp, 0, ret);
1469
1470 return ret;
1471}
1472#endif