aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sys.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sys.c')
-rw-r--r--kernel/sys.c514
1 files changed, 352 insertions, 162 deletions
diff --git a/kernel/sys.c b/kernel/sys.c
index f91218a5463e..7ef7f6054c28 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -95,99 +95,304 @@ int cad_pid = 1;
95 * and the like. 95 * and the like.
96 */ 96 */
97 97
98static struct notifier_block *reboot_notifier_list; 98static BLOCKING_NOTIFIER_HEAD(reboot_notifier_list);
99static DEFINE_RWLOCK(notifier_lock); 99
100/*
101 * Notifier chain core routines. The exported routines below
102 * are layered on top of these, with appropriate locking added.
103 */
104
105static int notifier_chain_register(struct notifier_block **nl,
106 struct notifier_block *n)
107{
108 while ((*nl) != NULL) {
109 if (n->priority > (*nl)->priority)
110 break;
111 nl = &((*nl)->next);
112 }
113 n->next = *nl;
114 rcu_assign_pointer(*nl, n);
115 return 0;
116}
117
118static int notifier_chain_unregister(struct notifier_block **nl,
119 struct notifier_block *n)
120{
121 while ((*nl) != NULL) {
122 if ((*nl) == n) {
123 rcu_assign_pointer(*nl, n->next);
124 return 0;
125 }
126 nl = &((*nl)->next);
127 }
128 return -ENOENT;
129}
130
131static int __kprobes notifier_call_chain(struct notifier_block **nl,
132 unsigned long val, void *v)
133{
134 int ret = NOTIFY_DONE;
135 struct notifier_block *nb;
136
137 nb = rcu_dereference(*nl);
138 while (nb) {
139 ret = nb->notifier_call(nb, val, v);
140 if ((ret & NOTIFY_STOP_MASK) == NOTIFY_STOP_MASK)
141 break;
142 nb = rcu_dereference(nb->next);
143 }
144 return ret;
145}
146
147/*
148 * Atomic notifier chain routines. Registration and unregistration
149 * use a mutex, and call_chain is synchronized by RCU (no locks).
150 */
100 151
101/** 152/**
102 * notifier_chain_register - Add notifier to a notifier chain 153 * atomic_notifier_chain_register - Add notifier to an atomic notifier chain
103 * @list: Pointer to root list pointer 154 * @nh: Pointer to head of the atomic notifier chain
104 * @n: New entry in notifier chain 155 * @n: New entry in notifier chain
105 * 156 *
106 * Adds a notifier to a notifier chain. 157 * Adds a notifier to an atomic notifier chain.
107 * 158 *
108 * Currently always returns zero. 159 * Currently always returns zero.
109 */ 160 */
161
162int atomic_notifier_chain_register(struct atomic_notifier_head *nh,
163 struct notifier_block *n)
164{
165 unsigned long flags;
166 int ret;
167
168 spin_lock_irqsave(&nh->lock, flags);
169 ret = notifier_chain_register(&nh->head, n);
170 spin_unlock_irqrestore(&nh->lock, flags);
171 return ret;
172}
173
174EXPORT_SYMBOL_GPL(atomic_notifier_chain_register);
175
176/**
177 * atomic_notifier_chain_unregister - Remove notifier from an atomic notifier chain
178 * @nh: Pointer to head of the atomic notifier chain
179 * @n: Entry to remove from notifier chain
180 *
181 * Removes a notifier from an atomic notifier chain.
182 *
183 * Returns zero on success or %-ENOENT on failure.
184 */
185int atomic_notifier_chain_unregister(struct atomic_notifier_head *nh,
186 struct notifier_block *n)
187{
188 unsigned long flags;
189 int ret;
190
191 spin_lock_irqsave(&nh->lock, flags);
192 ret = notifier_chain_unregister(&nh->head, n);
193 spin_unlock_irqrestore(&nh->lock, flags);
194 synchronize_rcu();
195 return ret;
196}
197
198EXPORT_SYMBOL_GPL(atomic_notifier_chain_unregister);
199
200/**
201 * atomic_notifier_call_chain - Call functions in an atomic notifier chain
202 * @nh: Pointer to head of the atomic notifier chain
203 * @val: Value passed unmodified to notifier function
204 * @v: Pointer passed unmodified to notifier function
205 *
206 * Calls each function in a notifier chain in turn. The functions
207 * run in an atomic context, so they must not block.
208 * This routine uses RCU to synchronize with changes to the chain.
209 *
210 * If the return value of the notifier can be and'ed
211 * with %NOTIFY_STOP_MASK then atomic_notifier_call_chain
212 * will return immediately, with the return value of
213 * the notifier function which halted execution.
214 * Otherwise the return value is the return value
215 * of the last notifier function called.
216 */
110 217
111int notifier_chain_register(struct notifier_block **list, struct notifier_block *n) 218int atomic_notifier_call_chain(struct atomic_notifier_head *nh,
219 unsigned long val, void *v)
112{ 220{
113 write_lock(&notifier_lock); 221 int ret;
114 while(*list) 222
115 { 223 rcu_read_lock();
116 if(n->priority > (*list)->priority) 224 ret = notifier_call_chain(&nh->head, val, v);
117 break; 225 rcu_read_unlock();
118 list= &((*list)->next); 226 return ret;
119 }
120 n->next = *list;
121 *list=n;
122 write_unlock(&notifier_lock);
123 return 0;
124} 227}
125 228
126EXPORT_SYMBOL(notifier_chain_register); 229EXPORT_SYMBOL_GPL(atomic_notifier_call_chain);
230
231/*
232 * Blocking notifier chain routines. All access to the chain is
233 * synchronized by an rwsem.
234 */
127 235
128/** 236/**
129 * notifier_chain_unregister - Remove notifier from a notifier chain 237 * blocking_notifier_chain_register - Add notifier to a blocking notifier chain
130 * @nl: Pointer to root list pointer 238 * @nh: Pointer to head of the blocking notifier chain
131 * @n: New entry in notifier chain 239 * @n: New entry in notifier chain
132 * 240 *
133 * Removes a notifier from a notifier chain. 241 * Adds a notifier to a blocking notifier chain.
242 * Must be called in process context.
134 * 243 *
135 * Returns zero on success, or %-ENOENT on failure. 244 * Currently always returns zero.
136 */ 245 */
137 246
138int notifier_chain_unregister(struct notifier_block **nl, struct notifier_block *n) 247int blocking_notifier_chain_register(struct blocking_notifier_head *nh,
248 struct notifier_block *n)
139{ 249{
140 write_lock(&notifier_lock); 250 int ret;
141 while((*nl)!=NULL) 251
142 { 252 /*
143 if((*nl)==n) 253 * This code gets used during boot-up, when task switching is
144 { 254 * not yet working and interrupts must remain disabled. At
145 *nl=n->next; 255 * such times we must not call down_write().
146 write_unlock(&notifier_lock); 256 */
147 return 0; 257 if (unlikely(system_state == SYSTEM_BOOTING))
148 } 258 return notifier_chain_register(&nh->head, n);
149 nl=&((*nl)->next); 259
150 } 260 down_write(&nh->rwsem);
151 write_unlock(&notifier_lock); 261 ret = notifier_chain_register(&nh->head, n);
152 return -ENOENT; 262 up_write(&nh->rwsem);
263 return ret;
153} 264}
154 265
155EXPORT_SYMBOL(notifier_chain_unregister); 266EXPORT_SYMBOL_GPL(blocking_notifier_chain_register);
156 267
157/** 268/**
158 * notifier_call_chain - Call functions in a notifier chain 269 * blocking_notifier_chain_unregister - Remove notifier from a blocking notifier chain
159 * @n: Pointer to root pointer of notifier chain 270 * @nh: Pointer to head of the blocking notifier chain
271 * @n: Entry to remove from notifier chain
272 *
273 * Removes a notifier from a blocking notifier chain.
274 * Must be called from process context.
275 *
276 * Returns zero on success or %-ENOENT on failure.
277 */
278int blocking_notifier_chain_unregister(struct blocking_notifier_head *nh,
279 struct notifier_block *n)
280{
281 int ret;
282
283 /*
284 * This code gets used during boot-up, when task switching is
285 * not yet working and interrupts must remain disabled. At
286 * such times we must not call down_write().
287 */
288 if (unlikely(system_state == SYSTEM_BOOTING))
289 return notifier_chain_unregister(&nh->head, n);
290
291 down_write(&nh->rwsem);
292 ret = notifier_chain_unregister(&nh->head, n);
293 up_write(&nh->rwsem);
294 return ret;
295}
296
297EXPORT_SYMBOL_GPL(blocking_notifier_chain_unregister);
298
299/**
300 * blocking_notifier_call_chain - Call functions in a blocking notifier chain
301 * @nh: Pointer to head of the blocking notifier chain
160 * @val: Value passed unmodified to notifier function 302 * @val: Value passed unmodified to notifier function
161 * @v: Pointer passed unmodified to notifier function 303 * @v: Pointer passed unmodified to notifier function
162 * 304 *
163 * Calls each function in a notifier chain in turn. 305 * Calls each function in a notifier chain in turn. The functions
306 * run in a process context, so they are allowed to block.
164 * 307 *
165 * If the return value of the notifier can be and'd 308 * If the return value of the notifier can be and'ed
166 * with %NOTIFY_STOP_MASK, then notifier_call_chain 309 * with %NOTIFY_STOP_MASK then blocking_notifier_call_chain
167 * will return immediately, with the return value of 310 * will return immediately, with the return value of
168 * the notifier function which halted execution. 311 * the notifier function which halted execution.
169 * Otherwise, the return value is the return value 312 * Otherwise the return value is the return value
170 * of the last notifier function called. 313 * of the last notifier function called.
171 */ 314 */
172 315
173int __kprobes notifier_call_chain(struct notifier_block **n, unsigned long val, void *v) 316int blocking_notifier_call_chain(struct blocking_notifier_head *nh,
317 unsigned long val, void *v)
174{ 318{
175 int ret=NOTIFY_DONE; 319 int ret;
176 struct notifier_block *nb = *n;
177 320
178 while(nb) 321 down_read(&nh->rwsem);
179 { 322 ret = notifier_call_chain(&nh->head, val, v);
180 ret=nb->notifier_call(nb,val,v); 323 up_read(&nh->rwsem);
181 if(ret&NOTIFY_STOP_MASK)
182 {
183 return ret;
184 }
185 nb=nb->next;
186 }
187 return ret; 324 return ret;
188} 325}
189 326
190EXPORT_SYMBOL(notifier_call_chain); 327EXPORT_SYMBOL_GPL(blocking_notifier_call_chain);
328
329/*
330 * Raw notifier chain routines. There is no protection;
331 * the caller must provide it. Use at your own risk!
332 */
333
334/**
335 * raw_notifier_chain_register - Add notifier to a raw notifier chain
336 * @nh: Pointer to head of the raw notifier chain
337 * @n: New entry in notifier chain
338 *
339 * Adds a notifier to a raw notifier chain.
340 * All locking must be provided by the caller.
341 *
342 * Currently always returns zero.
343 */
344
345int raw_notifier_chain_register(struct raw_notifier_head *nh,
346 struct notifier_block *n)
347{
348 return notifier_chain_register(&nh->head, n);
349}
350
351EXPORT_SYMBOL_GPL(raw_notifier_chain_register);
352
353/**
354 * raw_notifier_chain_unregister - Remove notifier from a raw notifier chain
355 * @nh: Pointer to head of the raw notifier chain
356 * @n: Entry to remove from notifier chain
357 *
358 * Removes a notifier from a raw notifier chain.
359 * All locking must be provided by the caller.
360 *
361 * Returns zero on success or %-ENOENT on failure.
362 */
363int raw_notifier_chain_unregister(struct raw_notifier_head *nh,
364 struct notifier_block *n)
365{
366 return notifier_chain_unregister(&nh->head, n);
367}
368
369EXPORT_SYMBOL_GPL(raw_notifier_chain_unregister);
370
371/**
372 * raw_notifier_call_chain - Call functions in a raw notifier chain
373 * @nh: Pointer to head of the raw notifier chain
374 * @val: Value passed unmodified to notifier function
375 * @v: Pointer passed unmodified to notifier function
376 *
377 * Calls each function in a notifier chain in turn. The functions
378 * run in an undefined context.
379 * All locking must be provided by the caller.
380 *
381 * If the return value of the notifier can be and'ed
382 * with %NOTIFY_STOP_MASK then raw_notifier_call_chain
383 * will return immediately, with the return value of
384 * the notifier function which halted execution.
385 * Otherwise the return value is the return value
386 * of the last notifier function called.
387 */
388
389int raw_notifier_call_chain(struct raw_notifier_head *nh,
390 unsigned long val, void *v)
391{
392 return notifier_call_chain(&nh->head, val, v);
393}
394
395EXPORT_SYMBOL_GPL(raw_notifier_call_chain);
191 396
192/** 397/**
193 * register_reboot_notifier - Register function to be called at reboot time 398 * register_reboot_notifier - Register function to be called at reboot time
@@ -196,13 +401,13 @@ EXPORT_SYMBOL(notifier_call_chain);
196 * Registers a function with the list of functions 401 * Registers a function with the list of functions
197 * to be called at reboot time. 402 * to be called at reboot time.
198 * 403 *
199 * Currently always returns zero, as notifier_chain_register 404 * Currently always returns zero, as blocking_notifier_chain_register
200 * always returns zero. 405 * always returns zero.
201 */ 406 */
202 407
203int register_reboot_notifier(struct notifier_block * nb) 408int register_reboot_notifier(struct notifier_block * nb)
204{ 409{
205 return notifier_chain_register(&reboot_notifier_list, nb); 410 return blocking_notifier_chain_register(&reboot_notifier_list, nb);
206} 411}
207 412
208EXPORT_SYMBOL(register_reboot_notifier); 413EXPORT_SYMBOL(register_reboot_notifier);
@@ -219,23 +424,11 @@ EXPORT_SYMBOL(register_reboot_notifier);
219 424
220int unregister_reboot_notifier(struct notifier_block * nb) 425int unregister_reboot_notifier(struct notifier_block * nb)
221{ 426{
222 return notifier_chain_unregister(&reboot_notifier_list, nb); 427 return blocking_notifier_chain_unregister(&reboot_notifier_list, nb);
223} 428}
224 429
225EXPORT_SYMBOL(unregister_reboot_notifier); 430EXPORT_SYMBOL(unregister_reboot_notifier);
226 431
227#ifndef CONFIG_SECURITY
228int capable(int cap)
229{
230 if (cap_raised(current->cap_effective, cap)) {
231 current->flags |= PF_SUPERPRIV;
232 return 1;
233 }
234 return 0;
235}
236EXPORT_SYMBOL(capable);
237#endif
238
239static int set_one_prio(struct task_struct *p, int niceval, int error) 432static int set_one_prio(struct task_struct *p, int niceval, int error)
240{ 433{
241 int no_nice; 434 int no_nice;
@@ -392,7 +585,7 @@ EXPORT_SYMBOL_GPL(emergency_restart);
392 585
393void kernel_restart_prepare(char *cmd) 586void kernel_restart_prepare(char *cmd)
394{ 587{
395 notifier_call_chain(&reboot_notifier_list, SYS_RESTART, cmd); 588 blocking_notifier_call_chain(&reboot_notifier_list, SYS_RESTART, cmd);
396 system_state = SYSTEM_RESTART; 589 system_state = SYSTEM_RESTART;
397 device_shutdown(); 590 device_shutdown();
398} 591}
@@ -442,7 +635,7 @@ EXPORT_SYMBOL_GPL(kernel_kexec);
442 635
443void kernel_shutdown_prepare(enum system_states state) 636void kernel_shutdown_prepare(enum system_states state)
444{ 637{
445 notifier_call_chain(&reboot_notifier_list, 638 blocking_notifier_call_chain(&reboot_notifier_list,
446 (state == SYSTEM_HALT)?SYS_HALT:SYS_POWER_OFF, NULL); 639 (state == SYSTEM_HALT)?SYS_HALT:SYS_POWER_OFF, NULL);
447 system_state = state; 640 system_state = state;
448 device_shutdown(); 641 device_shutdown();
@@ -1009,69 +1202,24 @@ asmlinkage long sys_times(struct tms __user * tbuf)
1009 */ 1202 */
1010 if (tbuf) { 1203 if (tbuf) {
1011 struct tms tmp; 1204 struct tms tmp;
1205 struct task_struct *tsk = current;
1206 struct task_struct *t;
1012 cputime_t utime, stime, cutime, cstime; 1207 cputime_t utime, stime, cutime, cstime;
1013 1208
1014#ifdef CONFIG_SMP 1209 spin_lock_irq(&tsk->sighand->siglock);
1015 if (thread_group_empty(current)) { 1210 utime = tsk->signal->utime;
1016 /* 1211 stime = tsk->signal->stime;
1017 * Single thread case without the use of any locks. 1212 t = tsk;
1018 * 1213 do {
1019 * We may race with release_task if two threads are 1214 utime = cputime_add(utime, t->utime);
1020 * executing. However, release task first adds up the 1215 stime = cputime_add(stime, t->stime);
1021 * counters (__exit_signal) before removing the task 1216 t = next_thread(t);
1022 * from the process tasklist (__unhash_process). 1217 } while (t != tsk);
1023 * __exit_signal also acquires and releases the
1024 * siglock which results in the proper memory ordering
1025 * so that the list modifications are always visible
1026 * after the counters have been updated.
1027 *
1028 * If the counters have been updated by the second thread
1029 * but the thread has not yet been removed from the list
1030 * then the other branch will be executing which will
1031 * block on tasklist_lock until the exit handling of the
1032 * other task is finished.
1033 *
1034 * This also implies that the sighand->siglock cannot
1035 * be held by another processor. So we can also
1036 * skip acquiring that lock.
1037 */
1038 utime = cputime_add(current->signal->utime, current->utime);
1039 stime = cputime_add(current->signal->utime, current->stime);
1040 cutime = current->signal->cutime;
1041 cstime = current->signal->cstime;
1042 } else
1043#endif
1044 {
1045 1218
1046 /* Process with multiple threads */ 1219 cutime = tsk->signal->cutime;
1047 struct task_struct *tsk = current; 1220 cstime = tsk->signal->cstime;
1048 struct task_struct *t; 1221 spin_unlock_irq(&tsk->sighand->siglock);
1049
1050 read_lock(&tasklist_lock);
1051 utime = tsk->signal->utime;
1052 stime = tsk->signal->stime;
1053 t = tsk;
1054 do {
1055 utime = cputime_add(utime, t->utime);
1056 stime = cputime_add(stime, t->stime);
1057 t = next_thread(t);
1058 } while (t != tsk);
1059 1222
1060 /*
1061 * While we have tasklist_lock read-locked, no dying thread
1062 * can be updating current->signal->[us]time. Instead,
1063 * we got their counts included in the live thread loop.
1064 * However, another thread can come in right now and
1065 * do a wait call that updates current->signal->c[us]time.
1066 * To make sure we always see that pair updated atomically,
1067 * we take the siglock around fetching them.
1068 */
1069 spin_lock_irq(&tsk->sighand->siglock);
1070 cutime = tsk->signal->cutime;
1071 cstime = tsk->signal->cstime;
1072 spin_unlock_irq(&tsk->sighand->siglock);
1073 read_unlock(&tasklist_lock);
1074 }
1075 tmp.tms_utime = cputime_to_clock_t(utime); 1223 tmp.tms_utime = cputime_to_clock_t(utime);
1076 tmp.tms_stime = cputime_to_clock_t(stime); 1224 tmp.tms_stime = cputime_to_clock_t(stime);
1077 tmp.tms_cutime = cputime_to_clock_t(cutime); 1225 tmp.tms_cutime = cputime_to_clock_t(cutime);
@@ -1227,7 +1375,7 @@ asmlinkage long sys_setsid(void)
1227 struct pid *pid; 1375 struct pid *pid;
1228 int err = -EPERM; 1376 int err = -EPERM;
1229 1377
1230 down(&tty_sem); 1378 mutex_lock(&tty_mutex);
1231 write_lock_irq(&tasklist_lock); 1379 write_lock_irq(&tasklist_lock);
1232 1380
1233 pid = find_pid(PIDTYPE_PGID, group_leader->pid); 1381 pid = find_pid(PIDTYPE_PGID, group_leader->pid);
@@ -1241,7 +1389,7 @@ asmlinkage long sys_setsid(void)
1241 err = process_group(group_leader); 1389 err = process_group(group_leader);
1242out: 1390out:
1243 write_unlock_irq(&tasklist_lock); 1391 write_unlock_irq(&tasklist_lock);
1244 up(&tty_sem); 1392 mutex_unlock(&tty_mutex);
1245 return err; 1393 return err;
1246} 1394}
1247 1395
@@ -1375,7 +1523,7 @@ static void groups_sort(struct group_info *group_info)
1375/* a simple bsearch */ 1523/* a simple bsearch */
1376int groups_search(struct group_info *group_info, gid_t grp) 1524int groups_search(struct group_info *group_info, gid_t grp)
1377{ 1525{
1378 int left, right; 1526 unsigned int left, right;
1379 1527
1380 if (!group_info) 1528 if (!group_info)
1381 return 0; 1529 return 0;
@@ -1383,7 +1531,7 @@ int groups_search(struct group_info *group_info, gid_t grp)
1383 left = 0; 1531 left = 0;
1384 right = group_info->ngroups; 1532 right = group_info->ngroups;
1385 while (left < right) { 1533 while (left < right) {
1386 int mid = (left+right)/2; 1534 unsigned int mid = (left+right)/2;
1387 int cmp = grp - GROUP_AT(group_info, mid); 1535 int cmp = grp - GROUP_AT(group_info, mid);
1388 if (cmp > 0) 1536 if (cmp > 0)
1389 left = mid + 1; 1537 left = mid + 1;
@@ -1433,7 +1581,6 @@ asmlinkage long sys_getgroups(int gidsetsize, gid_t __user *grouplist)
1433 return -EINVAL; 1581 return -EINVAL;
1434 1582
1435 /* no need to grab task_lock here; it cannot change */ 1583 /* no need to grab task_lock here; it cannot change */
1436 get_group_info(current->group_info);
1437 i = current->group_info->ngroups; 1584 i = current->group_info->ngroups;
1438 if (gidsetsize) { 1585 if (gidsetsize) {
1439 if (i > gidsetsize) { 1586 if (i > gidsetsize) {
@@ -1446,7 +1593,6 @@ asmlinkage long sys_getgroups(int gidsetsize, gid_t __user *grouplist)
1446 } 1593 }
1447 } 1594 }
1448out: 1595out:
1449 put_group_info(current->group_info);
1450 return i; 1596 return i;
1451} 1597}
1452 1598
@@ -1487,9 +1633,7 @@ int in_group_p(gid_t grp)
1487{ 1633{
1488 int retval = 1; 1634 int retval = 1;
1489 if (grp != current->fsgid) { 1635 if (grp != current->fsgid) {
1490 get_group_info(current->group_info);
1491 retval = groups_search(current->group_info, grp); 1636 retval = groups_search(current->group_info, grp);
1492 put_group_info(current->group_info);
1493 } 1637 }
1494 return retval; 1638 return retval;
1495} 1639}
@@ -1500,9 +1644,7 @@ int in_egroup_p(gid_t grp)
1500{ 1644{
1501 int retval = 1; 1645 int retval = 1;
1502 if (grp != current->egid) { 1646 if (grp != current->egid) {
1503 get_group_info(current->group_info);
1504 retval = groups_search(current->group_info, grp); 1647 retval = groups_search(current->group_info, grp);
1505 put_group_info(current->group_info);
1506 } 1648 }
1507 return retval; 1649 return retval;
1508} 1650}
@@ -1630,20 +1772,21 @@ asmlinkage long sys_old_getrlimit(unsigned int resource, struct rlimit __user *r
1630asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit __user *rlim) 1772asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit __user *rlim)
1631{ 1773{
1632 struct rlimit new_rlim, *old_rlim; 1774 struct rlimit new_rlim, *old_rlim;
1775 unsigned long it_prof_secs;
1633 int retval; 1776 int retval;
1634 1777
1635 if (resource >= RLIM_NLIMITS) 1778 if (resource >= RLIM_NLIMITS)
1636 return -EINVAL; 1779 return -EINVAL;
1637 if(copy_from_user(&new_rlim, rlim, sizeof(*rlim))) 1780 if (copy_from_user(&new_rlim, rlim, sizeof(*rlim)))
1638 return -EFAULT; 1781 return -EFAULT;
1639 if (new_rlim.rlim_cur > new_rlim.rlim_max) 1782 if (new_rlim.rlim_cur > new_rlim.rlim_max)
1640 return -EINVAL; 1783 return -EINVAL;
1641 old_rlim = current->signal->rlim + resource; 1784 old_rlim = current->signal->rlim + resource;
1642 if ((new_rlim.rlim_max > old_rlim->rlim_max) && 1785 if ((new_rlim.rlim_max > old_rlim->rlim_max) &&
1643 !capable(CAP_SYS_RESOURCE)) 1786 !capable(CAP_SYS_RESOURCE))
1644 return -EPERM; 1787 return -EPERM;
1645 if (resource == RLIMIT_NOFILE && new_rlim.rlim_max > NR_OPEN) 1788 if (resource == RLIMIT_NOFILE && new_rlim.rlim_max > NR_OPEN)
1646 return -EPERM; 1789 return -EPERM;
1647 1790
1648 retval = security_task_setrlimit(resource, &new_rlim); 1791 retval = security_task_setrlimit(resource, &new_rlim);
1649 if (retval) 1792 if (retval)
@@ -1653,19 +1796,40 @@ asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit __user *rlim)
1653 *old_rlim = new_rlim; 1796 *old_rlim = new_rlim;
1654 task_unlock(current->group_leader); 1797 task_unlock(current->group_leader);
1655 1798
1656 if (resource == RLIMIT_CPU && new_rlim.rlim_cur != RLIM_INFINITY && 1799 if (resource != RLIMIT_CPU)
1657 (cputime_eq(current->signal->it_prof_expires, cputime_zero) || 1800 goto out;
1658 new_rlim.rlim_cur <= cputime_to_secs( 1801
1659 current->signal->it_prof_expires))) { 1802 /*
1660 cputime_t cputime = secs_to_cputime(new_rlim.rlim_cur); 1803 * RLIMIT_CPU handling. Note that the kernel fails to return an error
1804 * code if it rejected the user's attempt to set RLIMIT_CPU. This is a
1805 * very long-standing error, and fixing it now risks breakage of
1806 * applications, so we live with it
1807 */
1808 if (new_rlim.rlim_cur == RLIM_INFINITY)
1809 goto out;
1810
1811 it_prof_secs = cputime_to_secs(current->signal->it_prof_expires);
1812 if (it_prof_secs == 0 || new_rlim.rlim_cur <= it_prof_secs) {
1813 unsigned long rlim_cur = new_rlim.rlim_cur;
1814 cputime_t cputime;
1815
1816 if (rlim_cur == 0) {
1817 /*
1818 * The caller is asking for an immediate RLIMIT_CPU
1819 * expiry. But we use the zero value to mean "it was
1820 * never set". So let's cheat and make it one second
1821 * instead
1822 */
1823 rlim_cur = 1;
1824 }
1825 cputime = secs_to_cputime(rlim_cur);
1661 read_lock(&tasklist_lock); 1826 read_lock(&tasklist_lock);
1662 spin_lock_irq(&current->sighand->siglock); 1827 spin_lock_irq(&current->sighand->siglock);
1663 set_process_cpu_timer(current, CPUCLOCK_PROF, 1828 set_process_cpu_timer(current, CPUCLOCK_PROF, &cputime, NULL);
1664 &cputime, NULL);
1665 spin_unlock_irq(&current->sighand->siglock); 1829 spin_unlock_irq(&current->sighand->siglock);
1666 read_unlock(&tasklist_lock); 1830 read_unlock(&tasklist_lock);
1667 } 1831 }
1668 1832out:
1669 return 0; 1833 return 0;
1670} 1834}
1671 1835
@@ -1677,9 +1841,6 @@ asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit __user *rlim)
1677 * a lot simpler! (Which we're not doing right now because we're not 1841 * a lot simpler! (Which we're not doing right now because we're not
1678 * measuring them yet). 1842 * measuring them yet).
1679 * 1843 *
1680 * This expects to be called with tasklist_lock read-locked or better,
1681 * and the siglock not locked. It may momentarily take the siglock.
1682 *
1683 * When sampling multiple threads for RUSAGE_SELF, under SMP we might have 1844 * When sampling multiple threads for RUSAGE_SELF, under SMP we might have
1684 * races with threads incrementing their own counters. But since word 1845 * races with threads incrementing their own counters. But since word
1685 * reads are atomic, we either get new values or old values and we don't 1846 * reads are atomic, we either get new values or old values and we don't
@@ -1687,6 +1848,25 @@ asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit __user *rlim)
1687 * the c* fields from p->signal from races with exit.c updating those 1848 * the c* fields from p->signal from races with exit.c updating those
1688 * fields when reaping, so a sample either gets all the additions of a 1849 * fields when reaping, so a sample either gets all the additions of a
1689 * given child after it's reaped, or none so this sample is before reaping. 1850 * given child after it's reaped, or none so this sample is before reaping.
1851 *
1852 * tasklist_lock locking optimisation:
1853 * If we are current and single threaded, we do not need to take the tasklist
1854 * lock or the siglock. No one else can take our signal_struct away,
1855 * no one else can reap the children to update signal->c* counters, and
1856 * no one else can race with the signal-> fields.
1857 * If we do not take the tasklist_lock, the signal-> fields could be read
1858 * out of order while another thread was just exiting. So we place a
1859 * read memory barrier when we avoid the lock. On the writer side,
1860 * write memory barrier is implied in __exit_signal as __exit_signal releases
1861 * the siglock spinlock after updating the signal-> fields.
1862 *
1863 * We don't really need the siglock when we access the non c* fields
1864 * of the signal_struct (for RUSAGE_SELF) even in multithreaded
1865 * case, since we take the tasklist lock for read and the non c* signal->
1866 * fields are updated only in __exit_signal, which is called with
1867 * tasklist_lock taken for write, hence these two threads cannot execute
1868 * concurrently.
1869 *
1690 */ 1870 */
1691 1871
1692static void k_getrusage(struct task_struct *p, int who, struct rusage *r) 1872static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
@@ -1694,13 +1874,23 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
1694 struct task_struct *t; 1874 struct task_struct *t;
1695 unsigned long flags; 1875 unsigned long flags;
1696 cputime_t utime, stime; 1876 cputime_t utime, stime;
1877 int need_lock = 0;
1697 1878
1698 memset((char *) r, 0, sizeof *r); 1879 memset((char *) r, 0, sizeof *r);
1880 utime = stime = cputime_zero;
1699 1881
1700 if (unlikely(!p->signal)) 1882 if (p != current || !thread_group_empty(p))
1701 return; 1883 need_lock = 1;
1702 1884
1703 utime = stime = cputime_zero; 1885 if (need_lock) {
1886 read_lock(&tasklist_lock);
1887 if (unlikely(!p->signal)) {
1888 read_unlock(&tasklist_lock);
1889 return;
1890 }
1891 } else
1892 /* See locking comments above */
1893 smp_rmb();
1704 1894
1705 switch (who) { 1895 switch (who) {
1706 case RUSAGE_BOTH: 1896 case RUSAGE_BOTH:
@@ -1740,6 +1930,8 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
1740 BUG(); 1930 BUG();
1741 } 1931 }
1742 1932
1933 if (need_lock)
1934 read_unlock(&tasklist_lock);
1743 cputime_to_timeval(utime, &r->ru_utime); 1935 cputime_to_timeval(utime, &r->ru_utime);
1744 cputime_to_timeval(stime, &r->ru_stime); 1936 cputime_to_timeval(stime, &r->ru_stime);
1745} 1937}
@@ -1747,9 +1939,7 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
1747int getrusage(struct task_struct *p, int who, struct rusage __user *ru) 1939int getrusage(struct task_struct *p, int who, struct rusage __user *ru)
1748{ 1940{
1749 struct rusage r; 1941 struct rusage r;
1750 read_lock(&tasklist_lock);
1751 k_getrusage(p, who, &r); 1942 k_getrusage(p, who, &r);
1752 read_unlock(&tasklist_lock);
1753 return copy_to_user(ru, &r, sizeof(r)) ? -EFAULT : 0; 1943 return copy_to_user(ru, &r, sizeof(r)) ? -EFAULT : 0;
1754} 1944}
1755 1945