diff options
Diffstat (limited to 'kernel/sys.c')
-rw-r--r-- | kernel/sys.c | 489 |
1 files changed, 342 insertions, 147 deletions
diff --git a/kernel/sys.c b/kernel/sys.c index ce8129192a26..dfce4debd138 100644 --- a/kernel/sys.c +++ b/kernel/sys.c | |||
@@ -62,28 +62,28 @@ | |||
62 | #include <asm/unistd.h> | 62 | #include <asm/unistd.h> |
63 | 63 | ||
64 | #ifndef SET_UNALIGN_CTL | 64 | #ifndef SET_UNALIGN_CTL |
65 | # define SET_UNALIGN_CTL(a,b) (-EINVAL) | 65 | # define SET_UNALIGN_CTL(a, b) (-EINVAL) |
66 | #endif | 66 | #endif |
67 | #ifndef GET_UNALIGN_CTL | 67 | #ifndef GET_UNALIGN_CTL |
68 | # define GET_UNALIGN_CTL(a,b) (-EINVAL) | 68 | # define GET_UNALIGN_CTL(a, b) (-EINVAL) |
69 | #endif | 69 | #endif |
70 | #ifndef SET_FPEMU_CTL | 70 | #ifndef SET_FPEMU_CTL |
71 | # define SET_FPEMU_CTL(a,b) (-EINVAL) | 71 | # define SET_FPEMU_CTL(a, b) (-EINVAL) |
72 | #endif | 72 | #endif |
73 | #ifndef GET_FPEMU_CTL | 73 | #ifndef GET_FPEMU_CTL |
74 | # define GET_FPEMU_CTL(a,b) (-EINVAL) | 74 | # define GET_FPEMU_CTL(a, b) (-EINVAL) |
75 | #endif | 75 | #endif |
76 | #ifndef SET_FPEXC_CTL | 76 | #ifndef SET_FPEXC_CTL |
77 | # define SET_FPEXC_CTL(a,b) (-EINVAL) | 77 | # define SET_FPEXC_CTL(a, b) (-EINVAL) |
78 | #endif | 78 | #endif |
79 | #ifndef GET_FPEXC_CTL | 79 | #ifndef GET_FPEXC_CTL |
80 | # define GET_FPEXC_CTL(a,b) (-EINVAL) | 80 | # define GET_FPEXC_CTL(a, b) (-EINVAL) |
81 | #endif | 81 | #endif |
82 | #ifndef GET_ENDIAN | 82 | #ifndef GET_ENDIAN |
83 | # define GET_ENDIAN(a,b) (-EINVAL) | 83 | # define GET_ENDIAN(a, b) (-EINVAL) |
84 | #endif | 84 | #endif |
85 | #ifndef SET_ENDIAN | 85 | #ifndef SET_ENDIAN |
86 | # define SET_ENDIAN(a,b) (-EINVAL) | 86 | # define SET_ENDIAN(a, b) (-EINVAL) |
87 | #endif | 87 | #endif |
88 | #ifndef GET_TSC_CTL | 88 | #ifndef GET_TSC_CTL |
89 | # define GET_TSC_CTL(a) (-EINVAL) | 89 | # define GET_TSC_CTL(a) (-EINVAL) |
@@ -182,39 +182,40 @@ SYSCALL_DEFINE3(setpriority, int, which, int, who, int, niceval) | |||
182 | rcu_read_lock(); | 182 | rcu_read_lock(); |
183 | read_lock(&tasklist_lock); | 183 | read_lock(&tasklist_lock); |
184 | switch (which) { | 184 | switch (which) { |
185 | case PRIO_PROCESS: | 185 | case PRIO_PROCESS: |
186 | if (who) | 186 | if (who) |
187 | p = find_task_by_vpid(who); | 187 | p = find_task_by_vpid(who); |
188 | else | 188 | else |
189 | p = current; | 189 | p = current; |
190 | if (p) | 190 | if (p) |
191 | error = set_one_prio(p, niceval, error); | 191 | error = set_one_prio(p, niceval, error); |
192 | break; | 192 | break; |
193 | case PRIO_PGRP: | 193 | case PRIO_PGRP: |
194 | if (who) | 194 | if (who) |
195 | pgrp = find_vpid(who); | 195 | pgrp = find_vpid(who); |
196 | else | 196 | else |
197 | pgrp = task_pgrp(current); | 197 | pgrp = task_pgrp(current); |
198 | do_each_pid_thread(pgrp, PIDTYPE_PGID, p) { | 198 | do_each_pid_thread(pgrp, PIDTYPE_PGID, p) { |
199 | error = set_one_prio(p, niceval, error); | 199 | error = set_one_prio(p, niceval, error); |
200 | } while_each_pid_thread(pgrp, PIDTYPE_PGID, p); | 200 | } while_each_pid_thread(pgrp, PIDTYPE_PGID, p); |
201 | break; | 201 | break; |
202 | case PRIO_USER: | 202 | case PRIO_USER: |
203 | uid = make_kuid(cred->user_ns, who); | 203 | uid = make_kuid(cred->user_ns, who); |
204 | user = cred->user; | 204 | user = cred->user; |
205 | if (!who) | 205 | if (!who) |
206 | uid = cred->uid; | 206 | uid = cred->uid; |
207 | else if (!uid_eq(uid, cred->uid) && | 207 | else if (!uid_eq(uid, cred->uid)) { |
208 | !(user = find_user(uid))) | 208 | user = find_user(uid); |
209 | if (!user) | ||
209 | goto out_unlock; /* No processes for this user */ | 210 | goto out_unlock; /* No processes for this user */ |
210 | 211 | } | |
211 | do_each_thread(g, p) { | 212 | do_each_thread(g, p) { |
212 | if (uid_eq(task_uid(p), uid)) | 213 | if (uid_eq(task_uid(p), uid)) |
213 | error = set_one_prio(p, niceval, error); | 214 | error = set_one_prio(p, niceval, error); |
214 | } while_each_thread(g, p); | 215 | } while_each_thread(g, p); |
215 | if (!uid_eq(uid, cred->uid)) | 216 | if (!uid_eq(uid, cred->uid)) |
216 | free_uid(user); /* For find_user() */ | 217 | free_uid(user); /* For find_user() */ |
217 | break; | 218 | break; |
218 | } | 219 | } |
219 | out_unlock: | 220 | out_unlock: |
220 | read_unlock(&tasklist_lock); | 221 | read_unlock(&tasklist_lock); |
@@ -244,47 +245,48 @@ SYSCALL_DEFINE2(getpriority, int, which, int, who) | |||
244 | rcu_read_lock(); | 245 | rcu_read_lock(); |
245 | read_lock(&tasklist_lock); | 246 | read_lock(&tasklist_lock); |
246 | switch (which) { | 247 | switch (which) { |
247 | case PRIO_PROCESS: | 248 | case PRIO_PROCESS: |
248 | if (who) | 249 | if (who) |
249 | p = find_task_by_vpid(who); | 250 | p = find_task_by_vpid(who); |
250 | else | 251 | else |
251 | p = current; | 252 | p = current; |
252 | if (p) { | 253 | if (p) { |
254 | niceval = nice_to_rlimit(task_nice(p)); | ||
255 | if (niceval > retval) | ||
256 | retval = niceval; | ||
257 | } | ||
258 | break; | ||
259 | case PRIO_PGRP: | ||
260 | if (who) | ||
261 | pgrp = find_vpid(who); | ||
262 | else | ||
263 | pgrp = task_pgrp(current); | ||
264 | do_each_pid_thread(pgrp, PIDTYPE_PGID, p) { | ||
265 | niceval = nice_to_rlimit(task_nice(p)); | ||
266 | if (niceval > retval) | ||
267 | retval = niceval; | ||
268 | } while_each_pid_thread(pgrp, PIDTYPE_PGID, p); | ||
269 | break; | ||
270 | case PRIO_USER: | ||
271 | uid = make_kuid(cred->user_ns, who); | ||
272 | user = cred->user; | ||
273 | if (!who) | ||
274 | uid = cred->uid; | ||
275 | else if (!uid_eq(uid, cred->uid)) { | ||
276 | user = find_user(uid); | ||
277 | if (!user) | ||
278 | goto out_unlock; /* No processes for this user */ | ||
279 | } | ||
280 | do_each_thread(g, p) { | ||
281 | if (uid_eq(task_uid(p), uid)) { | ||
253 | niceval = nice_to_rlimit(task_nice(p)); | 282 | niceval = nice_to_rlimit(task_nice(p)); |
254 | if (niceval > retval) | 283 | if (niceval > retval) |
255 | retval = niceval; | 284 | retval = niceval; |
256 | } | 285 | } |
257 | break; | 286 | } while_each_thread(g, p); |
258 | case PRIO_PGRP: | 287 | if (!uid_eq(uid, cred->uid)) |
259 | if (who) | 288 | free_uid(user); /* for find_user() */ |
260 | pgrp = find_vpid(who); | 289 | break; |
261 | else | ||
262 | pgrp = task_pgrp(current); | ||
263 | do_each_pid_thread(pgrp, PIDTYPE_PGID, p) { | ||
264 | niceval = nice_to_rlimit(task_nice(p)); | ||
265 | if (niceval > retval) | ||
266 | retval = niceval; | ||
267 | } while_each_pid_thread(pgrp, PIDTYPE_PGID, p); | ||
268 | break; | ||
269 | case PRIO_USER: | ||
270 | uid = make_kuid(cred->user_ns, who); | ||
271 | user = cred->user; | ||
272 | if (!who) | ||
273 | uid = cred->uid; | ||
274 | else if (!uid_eq(uid, cred->uid) && | ||
275 | !(user = find_user(uid))) | ||
276 | goto out_unlock; /* No processes for this user */ | ||
277 | |||
278 | do_each_thread(g, p) { | ||
279 | if (uid_eq(task_uid(p), uid)) { | ||
280 | niceval = nice_to_rlimit(task_nice(p)); | ||
281 | if (niceval > retval) | ||
282 | retval = niceval; | ||
283 | } | ||
284 | } while_each_thread(g, p); | ||
285 | if (!uid_eq(uid, cred->uid)) | ||
286 | free_uid(user); /* for find_user() */ | ||
287 | break; | ||
288 | } | 290 | } |
289 | out_unlock: | 291 | out_unlock: |
290 | read_unlock(&tasklist_lock); | 292 | read_unlock(&tasklist_lock); |
@@ -306,7 +308,7 @@ out_unlock: | |||
306 | * | 308 | * |
307 | * The general idea is that a program which uses just setregid() will be | 309 | * The general idea is that a program which uses just setregid() will be |
308 | * 100% compatible with BSD. A program which uses just setgid() will be | 310 | * 100% compatible with BSD. A program which uses just setgid() will be |
309 | * 100% compatible with POSIX with saved IDs. | 311 | * 100% compatible with POSIX with saved IDs. |
310 | * | 312 | * |
311 | * SMP: There are not races, the GIDs are checked only by filesystem | 313 | * SMP: There are not races, the GIDs are checked only by filesystem |
312 | * operations (as far as semantic preservation is concerned). | 314 | * operations (as far as semantic preservation is concerned). |
@@ -364,7 +366,7 @@ error: | |||
364 | } | 366 | } |
365 | 367 | ||
366 | /* | 368 | /* |
367 | * setgid() is implemented like SysV w/ SAVED_IDS | 369 | * setgid() is implemented like SysV w/ SAVED_IDS |
368 | * | 370 | * |
369 | * SMP: Same implicit races as above. | 371 | * SMP: Same implicit races as above. |
370 | */ | 372 | */ |
@@ -442,7 +444,7 @@ static int set_user(struct cred *new) | |||
442 | * | 444 | * |
443 | * The general idea is that a program which uses just setreuid() will be | 445 | * The general idea is that a program which uses just setreuid() will be |
444 | * 100% compatible with BSD. A program which uses just setuid() will be | 446 | * 100% compatible with BSD. A program which uses just setuid() will be |
445 | * 100% compatible with POSIX with saved IDs. | 447 | * 100% compatible with POSIX with saved IDs. |
446 | */ | 448 | */ |
447 | SYSCALL_DEFINE2(setreuid, uid_t, ruid, uid_t, euid) | 449 | SYSCALL_DEFINE2(setreuid, uid_t, ruid, uid_t, euid) |
448 | { | 450 | { |
@@ -503,17 +505,17 @@ error: | |||
503 | abort_creds(new); | 505 | abort_creds(new); |
504 | return retval; | 506 | return retval; |
505 | } | 507 | } |
506 | 508 | ||
507 | /* | 509 | /* |
508 | * setuid() is implemented like SysV with SAVED_IDS | 510 | * setuid() is implemented like SysV with SAVED_IDS |
509 | * | 511 | * |
510 | * Note that SAVED_ID's is deficient in that a setuid root program | 512 | * Note that SAVED_ID's is deficient in that a setuid root program |
511 | * like sendmail, for example, cannot set its uid to be a normal | 513 | * like sendmail, for example, cannot set its uid to be a normal |
512 | * user and then switch back, because if you're root, setuid() sets | 514 | * user and then switch back, because if you're root, setuid() sets |
513 | * the saved uid too. If you don't like this, blame the bright people | 515 | * the saved uid too. If you don't like this, blame the bright people |
514 | * in the POSIX committee and/or USG. Note that the BSD-style setreuid() | 516 | * in the POSIX committee and/or USG. Note that the BSD-style setreuid() |
515 | * will allow a root program to temporarily drop privileges and be able to | 517 | * will allow a root program to temporarily drop privileges and be able to |
516 | * regain them by swapping the real and effective uid. | 518 | * regain them by swapping the real and effective uid. |
517 | */ | 519 | */ |
518 | SYSCALL_DEFINE1(setuid, uid_t, uid) | 520 | SYSCALL_DEFINE1(setuid, uid_t, uid) |
519 | { | 521 | { |
@@ -637,10 +639,12 @@ SYSCALL_DEFINE3(getresuid, uid_t __user *, ruidp, uid_t __user *, euidp, uid_t _ | |||
637 | euid = from_kuid_munged(cred->user_ns, cred->euid); | 639 | euid = from_kuid_munged(cred->user_ns, cred->euid); |
638 | suid = from_kuid_munged(cred->user_ns, cred->suid); | 640 | suid = from_kuid_munged(cred->user_ns, cred->suid); |
639 | 641 | ||
640 | if (!(retval = put_user(ruid, ruidp)) && | 642 | retval = put_user(ruid, ruidp); |
641 | !(retval = put_user(euid, euidp))) | 643 | if (!retval) { |
642 | retval = put_user(suid, suidp); | 644 | retval = put_user(euid, euidp); |
643 | 645 | if (!retval) | |
646 | return put_user(suid, suidp); | ||
647 | } | ||
644 | return retval; | 648 | return retval; |
645 | } | 649 | } |
646 | 650 | ||
@@ -709,9 +713,12 @@ SYSCALL_DEFINE3(getresgid, gid_t __user *, rgidp, gid_t __user *, egidp, gid_t _ | |||
709 | egid = from_kgid_munged(cred->user_ns, cred->egid); | 713 | egid = from_kgid_munged(cred->user_ns, cred->egid); |
710 | sgid = from_kgid_munged(cred->user_ns, cred->sgid); | 714 | sgid = from_kgid_munged(cred->user_ns, cred->sgid); |
711 | 715 | ||
712 | if (!(retval = put_user(rgid, rgidp)) && | 716 | retval = put_user(rgid, rgidp); |
713 | !(retval = put_user(egid, egidp))) | 717 | if (!retval) { |
714 | retval = put_user(sgid, sgidp); | 718 | retval = put_user(egid, egidp); |
719 | if (!retval) | ||
720 | retval = put_user(sgid, sgidp); | ||
721 | } | ||
715 | 722 | ||
716 | return retval; | 723 | return retval; |
717 | } | 724 | } |
@@ -1284,7 +1291,6 @@ SYSCALL_DEFINE2(getrlimit, unsigned int, resource, struct rlimit __user *, rlim) | |||
1284 | /* | 1291 | /* |
1285 | * Back compatibility for getrlimit. Needed for some apps. | 1292 | * Back compatibility for getrlimit. Needed for some apps. |
1286 | */ | 1293 | */ |
1287 | |||
1288 | SYSCALL_DEFINE2(old_getrlimit, unsigned int, resource, | 1294 | SYSCALL_DEFINE2(old_getrlimit, unsigned int, resource, |
1289 | struct rlimit __user *, rlim) | 1295 | struct rlimit __user *, rlim) |
1290 | { | 1296 | { |
@@ -1299,7 +1305,7 @@ SYSCALL_DEFINE2(old_getrlimit, unsigned int, resource, | |||
1299 | x.rlim_cur = 0x7FFFFFFF; | 1305 | x.rlim_cur = 0x7FFFFFFF; |
1300 | if (x.rlim_max > 0x7FFFFFFF) | 1306 | if (x.rlim_max > 0x7FFFFFFF) |
1301 | x.rlim_max = 0x7FFFFFFF; | 1307 | x.rlim_max = 0x7FFFFFFF; |
1302 | return copy_to_user(rlim, &x, sizeof(x))?-EFAULT:0; | 1308 | return copy_to_user(rlim, &x, sizeof(x)) ? -EFAULT : 0; |
1303 | } | 1309 | } |
1304 | 1310 | ||
1305 | #endif | 1311 | #endif |
@@ -1527,7 +1533,7 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r) | |||
1527 | cputime_t tgutime, tgstime, utime, stime; | 1533 | cputime_t tgutime, tgstime, utime, stime; |
1528 | unsigned long maxrss = 0; | 1534 | unsigned long maxrss = 0; |
1529 | 1535 | ||
1530 | memset((char *) r, 0, sizeof *r); | 1536 | memset((char *)r, 0, sizeof (*r)); |
1531 | utime = stime = 0; | 1537 | utime = stime = 0; |
1532 | 1538 | ||
1533 | if (who == RUSAGE_THREAD) { | 1539 | if (who == RUSAGE_THREAD) { |
@@ -1541,41 +1547,41 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r) | |||
1541 | return; | 1547 | return; |
1542 | 1548 | ||
1543 | switch (who) { | 1549 | switch (who) { |
1544 | case RUSAGE_BOTH: | 1550 | case RUSAGE_BOTH: |
1545 | case RUSAGE_CHILDREN: | 1551 | case RUSAGE_CHILDREN: |
1546 | utime = p->signal->cutime; | 1552 | utime = p->signal->cutime; |
1547 | stime = p->signal->cstime; | 1553 | stime = p->signal->cstime; |
1548 | r->ru_nvcsw = p->signal->cnvcsw; | 1554 | r->ru_nvcsw = p->signal->cnvcsw; |
1549 | r->ru_nivcsw = p->signal->cnivcsw; | 1555 | r->ru_nivcsw = p->signal->cnivcsw; |
1550 | r->ru_minflt = p->signal->cmin_flt; | 1556 | r->ru_minflt = p->signal->cmin_flt; |
1551 | r->ru_majflt = p->signal->cmaj_flt; | 1557 | r->ru_majflt = p->signal->cmaj_flt; |
1552 | r->ru_inblock = p->signal->cinblock; | 1558 | r->ru_inblock = p->signal->cinblock; |
1553 | r->ru_oublock = p->signal->coublock; | 1559 | r->ru_oublock = p->signal->coublock; |
1554 | maxrss = p->signal->cmaxrss; | 1560 | maxrss = p->signal->cmaxrss; |
1555 | 1561 | ||
1556 | if (who == RUSAGE_CHILDREN) | 1562 | if (who == RUSAGE_CHILDREN) |
1557 | break; | ||
1558 | |||
1559 | case RUSAGE_SELF: | ||
1560 | thread_group_cputime_adjusted(p, &tgutime, &tgstime); | ||
1561 | utime += tgutime; | ||
1562 | stime += tgstime; | ||
1563 | r->ru_nvcsw += p->signal->nvcsw; | ||
1564 | r->ru_nivcsw += p->signal->nivcsw; | ||
1565 | r->ru_minflt += p->signal->min_flt; | ||
1566 | r->ru_majflt += p->signal->maj_flt; | ||
1567 | r->ru_inblock += p->signal->inblock; | ||
1568 | r->ru_oublock += p->signal->oublock; | ||
1569 | if (maxrss < p->signal->maxrss) | ||
1570 | maxrss = p->signal->maxrss; | ||
1571 | t = p; | ||
1572 | do { | ||
1573 | accumulate_thread_rusage(t, r); | ||
1574 | } while_each_thread(p, t); | ||
1575 | break; | 1563 | break; |
1576 | 1564 | ||
1577 | default: | 1565 | case RUSAGE_SELF: |
1578 | BUG(); | 1566 | thread_group_cputime_adjusted(p, &tgutime, &tgstime); |
1567 | utime += tgutime; | ||
1568 | stime += tgstime; | ||
1569 | r->ru_nvcsw += p->signal->nvcsw; | ||
1570 | r->ru_nivcsw += p->signal->nivcsw; | ||
1571 | r->ru_minflt += p->signal->min_flt; | ||
1572 | r->ru_majflt += p->signal->maj_flt; | ||
1573 | r->ru_inblock += p->signal->inblock; | ||
1574 | r->ru_oublock += p->signal->oublock; | ||
1575 | if (maxrss < p->signal->maxrss) | ||
1576 | maxrss = p->signal->maxrss; | ||
1577 | t = p; | ||
1578 | do { | ||
1579 | accumulate_thread_rusage(t, r); | ||
1580 | } while_each_thread(p, t); | ||
1581 | break; | ||
1582 | |||
1583 | default: | ||
1584 | BUG(); | ||
1579 | } | 1585 | } |
1580 | unlock_task_sighand(p, &flags); | 1586 | unlock_task_sighand(p, &flags); |
1581 | 1587 | ||
@@ -1585,6 +1591,7 @@ out: | |||
1585 | 1591 | ||
1586 | if (who != RUSAGE_CHILDREN) { | 1592 | if (who != RUSAGE_CHILDREN) { |
1587 | struct mm_struct *mm = get_task_mm(p); | 1593 | struct mm_struct *mm = get_task_mm(p); |
1594 | |||
1588 | if (mm) { | 1595 | if (mm) { |
1589 | setmax_mm_hiwater_rss(&maxrss, mm); | 1596 | setmax_mm_hiwater_rss(&maxrss, mm); |
1590 | mmput(mm); | 1597 | mmput(mm); |
@@ -1596,6 +1603,7 @@ out: | |||
1596 | int getrusage(struct task_struct *p, int who, struct rusage __user *ru) | 1603 | int getrusage(struct task_struct *p, int who, struct rusage __user *ru) |
1597 | { | 1604 | { |
1598 | struct rusage r; | 1605 | struct rusage r; |
1606 | |||
1599 | k_getrusage(p, who, &r); | 1607 | k_getrusage(p, who, &r); |
1600 | return copy_to_user(ru, &r, sizeof(r)) ? -EFAULT : 0; | 1608 | return copy_to_user(ru, &r, sizeof(r)) ? -EFAULT : 0; |
1601 | } | 1609 | } |
@@ -1628,12 +1636,14 @@ SYSCALL_DEFINE1(umask, int, mask) | |||
1628 | return mask; | 1636 | return mask; |
1629 | } | 1637 | } |
1630 | 1638 | ||
1631 | static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd) | 1639 | static int prctl_set_mm_exe_file_locked(struct mm_struct *mm, unsigned int fd) |
1632 | { | 1640 | { |
1633 | struct fd exe; | 1641 | struct fd exe; |
1634 | struct inode *inode; | 1642 | struct inode *inode; |
1635 | int err; | 1643 | int err; |
1636 | 1644 | ||
1645 | VM_BUG_ON_MM(!rwsem_is_locked(&mm->mmap_sem), mm); | ||
1646 | |||
1637 | exe = fdget(fd); | 1647 | exe = fdget(fd); |
1638 | if (!exe.file) | 1648 | if (!exe.file) |
1639 | return -EBADF; | 1649 | return -EBADF; |
@@ -1654,8 +1664,6 @@ static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd) | |||
1654 | if (err) | 1664 | if (err) |
1655 | goto exit; | 1665 | goto exit; |
1656 | 1666 | ||
1657 | down_write(&mm->mmap_sem); | ||
1658 | |||
1659 | /* | 1667 | /* |
1660 | * Forbid mm->exe_file change if old file still mapped. | 1668 | * Forbid mm->exe_file change if old file still mapped. |
1661 | */ | 1669 | */ |
@@ -1667,7 +1675,7 @@ static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd) | |||
1667 | if (vma->vm_file && | 1675 | if (vma->vm_file && |
1668 | path_equal(&vma->vm_file->f_path, | 1676 | path_equal(&vma->vm_file->f_path, |
1669 | &mm->exe_file->f_path)) | 1677 | &mm->exe_file->f_path)) |
1670 | goto exit_unlock; | 1678 | goto exit; |
1671 | } | 1679 | } |
1672 | 1680 | ||
1673 | /* | 1681 | /* |
@@ -1678,34 +1686,222 @@ static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd) | |||
1678 | */ | 1686 | */ |
1679 | err = -EPERM; | 1687 | err = -EPERM; |
1680 | if (test_and_set_bit(MMF_EXE_FILE_CHANGED, &mm->flags)) | 1688 | if (test_and_set_bit(MMF_EXE_FILE_CHANGED, &mm->flags)) |
1681 | goto exit_unlock; | 1689 | goto exit; |
1682 | 1690 | ||
1683 | err = 0; | 1691 | err = 0; |
1684 | set_mm_exe_file(mm, exe.file); /* this grabs a reference to exe.file */ | 1692 | set_mm_exe_file(mm, exe.file); /* this grabs a reference to exe.file */ |
1685 | exit_unlock: | ||
1686 | up_write(&mm->mmap_sem); | ||
1687 | |||
1688 | exit: | 1693 | exit: |
1689 | fdput(exe); | 1694 | fdput(exe); |
1690 | return err; | 1695 | return err; |
1691 | } | 1696 | } |
1692 | 1697 | ||
1698 | #ifdef CONFIG_CHECKPOINT_RESTORE | ||
1699 | /* | ||
1700 | * WARNING: we don't require any capability here so be very careful | ||
1701 | * in what is allowed for modification from userspace. | ||
1702 | */ | ||
1703 | static int validate_prctl_map(struct prctl_mm_map *prctl_map) | ||
1704 | { | ||
1705 | unsigned long mmap_max_addr = TASK_SIZE; | ||
1706 | struct mm_struct *mm = current->mm; | ||
1707 | int error = -EINVAL, i; | ||
1708 | |||
1709 | static const unsigned char offsets[] = { | ||
1710 | offsetof(struct prctl_mm_map, start_code), | ||
1711 | offsetof(struct prctl_mm_map, end_code), | ||
1712 | offsetof(struct prctl_mm_map, start_data), | ||
1713 | offsetof(struct prctl_mm_map, end_data), | ||
1714 | offsetof(struct prctl_mm_map, start_brk), | ||
1715 | offsetof(struct prctl_mm_map, brk), | ||
1716 | offsetof(struct prctl_mm_map, start_stack), | ||
1717 | offsetof(struct prctl_mm_map, arg_start), | ||
1718 | offsetof(struct prctl_mm_map, arg_end), | ||
1719 | offsetof(struct prctl_mm_map, env_start), | ||
1720 | offsetof(struct prctl_mm_map, env_end), | ||
1721 | }; | ||
1722 | |||
1723 | /* | ||
1724 | * Make sure the members are not somewhere outside | ||
1725 | * of allowed address space. | ||
1726 | */ | ||
1727 | for (i = 0; i < ARRAY_SIZE(offsets); i++) { | ||
1728 | u64 val = *(u64 *)((char *)prctl_map + offsets[i]); | ||
1729 | |||
1730 | if ((unsigned long)val >= mmap_max_addr || | ||
1731 | (unsigned long)val < mmap_min_addr) | ||
1732 | goto out; | ||
1733 | } | ||
1734 | |||
1735 | /* | ||
1736 | * Make sure the pairs are ordered. | ||
1737 | */ | ||
1738 | #define __prctl_check_order(__m1, __op, __m2) \ | ||
1739 | ((unsigned long)prctl_map->__m1 __op \ | ||
1740 | (unsigned long)prctl_map->__m2) ? 0 : -EINVAL | ||
1741 | error = __prctl_check_order(start_code, <, end_code); | ||
1742 | error |= __prctl_check_order(start_data, <, end_data); | ||
1743 | error |= __prctl_check_order(start_brk, <=, brk); | ||
1744 | error |= __prctl_check_order(arg_start, <=, arg_end); | ||
1745 | error |= __prctl_check_order(env_start, <=, env_end); | ||
1746 | if (error) | ||
1747 | goto out; | ||
1748 | #undef __prctl_check_order | ||
1749 | |||
1750 | error = -EINVAL; | ||
1751 | |||
1752 | /* | ||
1753 | * @brk should be after @end_data in traditional maps. | ||
1754 | */ | ||
1755 | if (prctl_map->start_brk <= prctl_map->end_data || | ||
1756 | prctl_map->brk <= prctl_map->end_data) | ||
1757 | goto out; | ||
1758 | |||
1759 | /* | ||
1760 | * Neither we should allow to override limits if they set. | ||
1761 | */ | ||
1762 | if (check_data_rlimit(rlimit(RLIMIT_DATA), prctl_map->brk, | ||
1763 | prctl_map->start_brk, prctl_map->end_data, | ||
1764 | prctl_map->start_data)) | ||
1765 | goto out; | ||
1766 | |||
1767 | /* | ||
1768 | * Someone is trying to cheat the auxv vector. | ||
1769 | */ | ||
1770 | if (prctl_map->auxv_size) { | ||
1771 | if (!prctl_map->auxv || prctl_map->auxv_size > sizeof(mm->saved_auxv)) | ||
1772 | goto out; | ||
1773 | } | ||
1774 | |||
1775 | /* | ||
1776 | * Finally, make sure the caller has the rights to | ||
1777 | * change /proc/pid/exe link: only local root should | ||
1778 | * be allowed to. | ||
1779 | */ | ||
1780 | if (prctl_map->exe_fd != (u32)-1) { | ||
1781 | struct user_namespace *ns = current_user_ns(); | ||
1782 | const struct cred *cred = current_cred(); | ||
1783 | |||
1784 | if (!uid_eq(cred->uid, make_kuid(ns, 0)) || | ||
1785 | !gid_eq(cred->gid, make_kgid(ns, 0))) | ||
1786 | goto out; | ||
1787 | } | ||
1788 | |||
1789 | error = 0; | ||
1790 | out: | ||
1791 | return error; | ||
1792 | } | ||
1793 | |||
1794 | static int prctl_set_mm_map(int opt, const void __user *addr, unsigned long data_size) | ||
1795 | { | ||
1796 | struct prctl_mm_map prctl_map = { .exe_fd = (u32)-1, }; | ||
1797 | unsigned long user_auxv[AT_VECTOR_SIZE]; | ||
1798 | struct mm_struct *mm = current->mm; | ||
1799 | int error; | ||
1800 | |||
1801 | BUILD_BUG_ON(sizeof(user_auxv) != sizeof(mm->saved_auxv)); | ||
1802 | BUILD_BUG_ON(sizeof(struct prctl_mm_map) > 256); | ||
1803 | |||
1804 | if (opt == PR_SET_MM_MAP_SIZE) | ||
1805 | return put_user((unsigned int)sizeof(prctl_map), | ||
1806 | (unsigned int __user *)addr); | ||
1807 | |||
1808 | if (data_size != sizeof(prctl_map)) | ||
1809 | return -EINVAL; | ||
1810 | |||
1811 | if (copy_from_user(&prctl_map, addr, sizeof(prctl_map))) | ||
1812 | return -EFAULT; | ||
1813 | |||
1814 | error = validate_prctl_map(&prctl_map); | ||
1815 | if (error) | ||
1816 | return error; | ||
1817 | |||
1818 | if (prctl_map.auxv_size) { | ||
1819 | memset(user_auxv, 0, sizeof(user_auxv)); | ||
1820 | if (copy_from_user(user_auxv, | ||
1821 | (const void __user *)prctl_map.auxv, | ||
1822 | prctl_map.auxv_size)) | ||
1823 | return -EFAULT; | ||
1824 | |||
1825 | /* Last entry must be AT_NULL as specification requires */ | ||
1826 | user_auxv[AT_VECTOR_SIZE - 2] = AT_NULL; | ||
1827 | user_auxv[AT_VECTOR_SIZE - 1] = AT_NULL; | ||
1828 | } | ||
1829 | |||
1830 | down_write(&mm->mmap_sem); | ||
1831 | if (prctl_map.exe_fd != (u32)-1) | ||
1832 | error = prctl_set_mm_exe_file_locked(mm, prctl_map.exe_fd); | ||
1833 | downgrade_write(&mm->mmap_sem); | ||
1834 | if (error) | ||
1835 | goto out; | ||
1836 | |||
1837 | /* | ||
1838 | * We don't validate if these members are pointing to | ||
1839 | * real present VMAs because application may have correspond | ||
1840 | * VMAs already unmapped and kernel uses these members for statistics | ||
1841 | * output in procfs mostly, except | ||
1842 | * | ||
1843 | * - @start_brk/@brk which are used in do_brk but kernel lookups | ||
1844 | * for VMAs when updating these memvers so anything wrong written | ||
1845 | * here cause kernel to swear at userspace program but won't lead | ||
1846 | * to any problem in kernel itself | ||
1847 | */ | ||
1848 | |||
1849 | mm->start_code = prctl_map.start_code; | ||
1850 | mm->end_code = prctl_map.end_code; | ||
1851 | mm->start_data = prctl_map.start_data; | ||
1852 | mm->end_data = prctl_map.end_data; | ||
1853 | mm->start_brk = prctl_map.start_brk; | ||
1854 | mm->brk = prctl_map.brk; | ||
1855 | mm->start_stack = prctl_map.start_stack; | ||
1856 | mm->arg_start = prctl_map.arg_start; | ||
1857 | mm->arg_end = prctl_map.arg_end; | ||
1858 | mm->env_start = prctl_map.env_start; | ||
1859 | mm->env_end = prctl_map.env_end; | ||
1860 | |||
1861 | /* | ||
1862 | * Note this update of @saved_auxv is lockless thus | ||
1863 | * if someone reads this member in procfs while we're | ||
1864 | * updating -- it may get partly updated results. It's | ||
1865 | * known and acceptable trade off: we leave it as is to | ||
1866 | * not introduce additional locks here making the kernel | ||
1867 | * more complex. | ||
1868 | */ | ||
1869 | if (prctl_map.auxv_size) | ||
1870 | memcpy(mm->saved_auxv, user_auxv, sizeof(user_auxv)); | ||
1871 | |||
1872 | error = 0; | ||
1873 | out: | ||
1874 | up_read(&mm->mmap_sem); | ||
1875 | return error; | ||
1876 | } | ||
1877 | #endif /* CONFIG_CHECKPOINT_RESTORE */ | ||
1878 | |||
1693 | static int prctl_set_mm(int opt, unsigned long addr, | 1879 | static int prctl_set_mm(int opt, unsigned long addr, |
1694 | unsigned long arg4, unsigned long arg5) | 1880 | unsigned long arg4, unsigned long arg5) |
1695 | { | 1881 | { |
1696 | unsigned long rlim = rlimit(RLIMIT_DATA); | ||
1697 | struct mm_struct *mm = current->mm; | 1882 | struct mm_struct *mm = current->mm; |
1698 | struct vm_area_struct *vma; | 1883 | struct vm_area_struct *vma; |
1699 | int error; | 1884 | int error; |
1700 | 1885 | ||
1701 | if (arg5 || (arg4 && opt != PR_SET_MM_AUXV)) | 1886 | if (arg5 || (arg4 && (opt != PR_SET_MM_AUXV && |
1887 | opt != PR_SET_MM_MAP && | ||
1888 | opt != PR_SET_MM_MAP_SIZE))) | ||
1702 | return -EINVAL; | 1889 | return -EINVAL; |
1703 | 1890 | ||
1891 | #ifdef CONFIG_CHECKPOINT_RESTORE | ||
1892 | if (opt == PR_SET_MM_MAP || opt == PR_SET_MM_MAP_SIZE) | ||
1893 | return prctl_set_mm_map(opt, (const void __user *)addr, arg4); | ||
1894 | #endif | ||
1895 | |||
1704 | if (!capable(CAP_SYS_RESOURCE)) | 1896 | if (!capable(CAP_SYS_RESOURCE)) |
1705 | return -EPERM; | 1897 | return -EPERM; |
1706 | 1898 | ||
1707 | if (opt == PR_SET_MM_EXE_FILE) | 1899 | if (opt == PR_SET_MM_EXE_FILE) { |
1708 | return prctl_set_mm_exe_file(mm, (unsigned int)addr); | 1900 | down_write(&mm->mmap_sem); |
1901 | error = prctl_set_mm_exe_file_locked(mm, (unsigned int)addr); | ||
1902 | up_write(&mm->mmap_sem); | ||
1903 | return error; | ||
1904 | } | ||
1709 | 1905 | ||
1710 | if (addr >= TASK_SIZE || addr < mmap_min_addr) | 1906 | if (addr >= TASK_SIZE || addr < mmap_min_addr) |
1711 | return -EINVAL; | 1907 | return -EINVAL; |
@@ -1733,9 +1929,8 @@ static int prctl_set_mm(int opt, unsigned long addr, | |||
1733 | if (addr <= mm->end_data) | 1929 | if (addr <= mm->end_data) |
1734 | goto out; | 1930 | goto out; |
1735 | 1931 | ||
1736 | if (rlim < RLIM_INFINITY && | 1932 | if (check_data_rlimit(rlimit(RLIMIT_DATA), mm->brk, addr, |
1737 | (mm->brk - addr) + | 1933 | mm->end_data, mm->start_data)) |
1738 | (mm->end_data - mm->start_data) > rlim) | ||
1739 | goto out; | 1934 | goto out; |
1740 | 1935 | ||
1741 | mm->start_brk = addr; | 1936 | mm->start_brk = addr; |
@@ -1745,9 +1940,8 @@ static int prctl_set_mm(int opt, unsigned long addr, | |||
1745 | if (addr <= mm->end_data) | 1940 | if (addr <= mm->end_data) |
1746 | goto out; | 1941 | goto out; |
1747 | 1942 | ||
1748 | if (rlim < RLIM_INFINITY && | 1943 | if (check_data_rlimit(rlimit(RLIMIT_DATA), addr, mm->start_brk, |
1749 | (addr - mm->start_brk) + | 1944 | mm->end_data, mm->start_data)) |
1750 | (mm->end_data - mm->start_data) > rlim) | ||
1751 | goto out; | 1945 | goto out; |
1752 | 1946 | ||
1753 | mm->brk = addr; | 1947 | mm->brk = addr; |
@@ -2023,6 +2217,7 @@ SYSCALL_DEFINE3(getcpu, unsigned __user *, cpup, unsigned __user *, nodep, | |||
2023 | { | 2217 | { |
2024 | int err = 0; | 2218 | int err = 0; |
2025 | int cpu = raw_smp_processor_id(); | 2219 | int cpu = raw_smp_processor_id(); |
2220 | |||
2026 | if (cpup) | 2221 | if (cpup) |
2027 | err |= put_user(cpu, cpup); | 2222 | err |= put_user(cpu, cpup); |
2028 | if (nodep) | 2223 | if (nodep) |
@@ -2135,7 +2330,7 @@ COMPAT_SYSCALL_DEFINE1(sysinfo, struct compat_sysinfo __user *, info) | |||
2135 | /* Check to see if any memory value is too large for 32-bit and scale | 2330 | /* Check to see if any memory value is too large for 32-bit and scale |
2136 | * down if needed | 2331 | * down if needed |
2137 | */ | 2332 | */ |
2138 | if ((s.totalram >> 32) || (s.totalswap >> 32)) { | 2333 | if (upper_32_bits(s.totalram) || upper_32_bits(s.totalswap)) { |
2139 | int bitcount = 0; | 2334 | int bitcount = 0; |
2140 | 2335 | ||
2141 | while (s.mem_unit < PAGE_SIZE) { | 2336 | while (s.mem_unit < PAGE_SIZE) { |