diff options
Diffstat (limited to 'kernel/sys.c')
| -rw-r--r-- | kernel/sys.c | 514 | 
1 files changed, 352 insertions, 162 deletions
diff --git a/kernel/sys.c b/kernel/sys.c index f91218a5463e..7ef7f6054c28 100644 --- a/kernel/sys.c +++ b/kernel/sys.c  | |||
| @@ -95,99 +95,304 @@ int cad_pid = 1; | |||
| 95 | * and the like. | 95 | * and the like. | 
| 96 | */ | 96 | */ | 
| 97 | 97 | ||
| 98 | static struct notifier_block *reboot_notifier_list; | 98 | static BLOCKING_NOTIFIER_HEAD(reboot_notifier_list); | 
| 99 | static DEFINE_RWLOCK(notifier_lock); | 99 | |
| 100 | /* | ||
| 101 | * Notifier chain core routines. The exported routines below | ||
| 102 | * are layered on top of these, with appropriate locking added. | ||
| 103 | */ | ||
| 104 | |||
| 105 | static int notifier_chain_register(struct notifier_block **nl, | ||
| 106 | struct notifier_block *n) | ||
| 107 | { | ||
| 108 | while ((*nl) != NULL) { | ||
| 109 | if (n->priority > (*nl)->priority) | ||
| 110 | break; | ||
| 111 | nl = &((*nl)->next); | ||
| 112 | } | ||
| 113 | n->next = *nl; | ||
| 114 | rcu_assign_pointer(*nl, n); | ||
| 115 | return 0; | ||
| 116 | } | ||
| 117 | |||
| 118 | static int notifier_chain_unregister(struct notifier_block **nl, | ||
| 119 | struct notifier_block *n) | ||
| 120 | { | ||
| 121 | while ((*nl) != NULL) { | ||
| 122 | if ((*nl) == n) { | ||
| 123 | rcu_assign_pointer(*nl, n->next); | ||
| 124 | return 0; | ||
| 125 | } | ||
| 126 | nl = &((*nl)->next); | ||
| 127 | } | ||
| 128 | return -ENOENT; | ||
| 129 | } | ||
| 130 | |||
| 131 | static int __kprobes notifier_call_chain(struct notifier_block **nl, | ||
| 132 | unsigned long val, void *v) | ||
| 133 | { | ||
| 134 | int ret = NOTIFY_DONE; | ||
| 135 | struct notifier_block *nb; | ||
| 136 | |||
| 137 | nb = rcu_dereference(*nl); | ||
| 138 | while (nb) { | ||
| 139 | ret = nb->notifier_call(nb, val, v); | ||
| 140 | if ((ret & NOTIFY_STOP_MASK) == NOTIFY_STOP_MASK) | ||
| 141 | break; | ||
| 142 | nb = rcu_dereference(nb->next); | ||
| 143 | } | ||
| 144 | return ret; | ||
| 145 | } | ||
| 146 | |||
| 147 | /* | ||
| 148 | * Atomic notifier chain routines. Registration and unregistration | ||
| 149 | * use a mutex, and call_chain is synchronized by RCU (no locks). | ||
| 150 | */ | ||
| 100 | 151 | ||
| 101 | /** | 152 | /** | 
| 102 | * notifier_chain_register - Add notifier to a notifier chain | 153 | * atomic_notifier_chain_register - Add notifier to an atomic notifier chain | 
| 103 | * @list: Pointer to root list pointer | 154 | * @nh: Pointer to head of the atomic notifier chain | 
| 104 | * @n: New entry in notifier chain | 155 | * @n: New entry in notifier chain | 
| 105 | * | 156 | * | 
| 106 | * Adds a notifier to a notifier chain. | 157 | * Adds a notifier to an atomic notifier chain. | 
| 107 | * | 158 | * | 
| 108 | * Currently always returns zero. | 159 | * Currently always returns zero. | 
| 109 | */ | 160 | */ | 
| 161 | |||
| 162 | int atomic_notifier_chain_register(struct atomic_notifier_head *nh, | ||
| 163 | struct notifier_block *n) | ||
| 164 | { | ||
| 165 | unsigned long flags; | ||
| 166 | int ret; | ||
| 167 | |||
| 168 | spin_lock_irqsave(&nh->lock, flags); | ||
| 169 | ret = notifier_chain_register(&nh->head, n); | ||
| 170 | spin_unlock_irqrestore(&nh->lock, flags); | ||
| 171 | return ret; | ||
| 172 | } | ||
| 173 | |||
| 174 | EXPORT_SYMBOL_GPL(atomic_notifier_chain_register); | ||
| 175 | |||
| 176 | /** | ||
| 177 | * atomic_notifier_chain_unregister - Remove notifier from an atomic notifier chain | ||
| 178 | * @nh: Pointer to head of the atomic notifier chain | ||
| 179 | * @n: Entry to remove from notifier chain | ||
| 180 | * | ||
| 181 | * Removes a notifier from an atomic notifier chain. | ||
| 182 | * | ||
| 183 | * Returns zero on success or %-ENOENT on failure. | ||
| 184 | */ | ||
| 185 | int atomic_notifier_chain_unregister(struct atomic_notifier_head *nh, | ||
| 186 | struct notifier_block *n) | ||
| 187 | { | ||
| 188 | unsigned long flags; | ||
| 189 | int ret; | ||
| 190 | |||
| 191 | spin_lock_irqsave(&nh->lock, flags); | ||
| 192 | ret = notifier_chain_unregister(&nh->head, n); | ||
| 193 | spin_unlock_irqrestore(&nh->lock, flags); | ||
| 194 | synchronize_rcu(); | ||
| 195 | return ret; | ||
| 196 | } | ||
| 197 | |||
| 198 | EXPORT_SYMBOL_GPL(atomic_notifier_chain_unregister); | ||
| 199 | |||
| 200 | /** | ||
| 201 | * atomic_notifier_call_chain - Call functions in an atomic notifier chain | ||
| 202 | * @nh: Pointer to head of the atomic notifier chain | ||
| 203 | * @val: Value passed unmodified to notifier function | ||
| 204 | * @v: Pointer passed unmodified to notifier function | ||
| 205 | * | ||
| 206 | * Calls each function in a notifier chain in turn. The functions | ||
| 207 | * run in an atomic context, so they must not block. | ||
| 208 | * This routine uses RCU to synchronize with changes to the chain. | ||
| 209 | * | ||
| 210 | * If the return value of the notifier can be and'ed | ||
| 211 | * with %NOTIFY_STOP_MASK then atomic_notifier_call_chain | ||
| 212 | * will return immediately, with the return value of | ||
| 213 | * the notifier function which halted execution. | ||
| 214 | * Otherwise the return value is the return value | ||
| 215 | * of the last notifier function called. | ||
| 216 | */ | ||
| 110 | 217 | ||
| 111 | int notifier_chain_register(struct notifier_block **list, struct notifier_block *n) | 218 | int atomic_notifier_call_chain(struct atomic_notifier_head *nh, | 
| 219 | unsigned long val, void *v) | ||
| 112 | { | 220 | { | 
| 113 | write_lock(¬ifier_lock); | 221 | int ret; | 
| 114 | while(*list) | 222 | |
| 115 | { | 223 | rcu_read_lock(); | 
| 116 | if(n->priority > (*list)->priority) | 224 | ret = notifier_call_chain(&nh->head, val, v); | 
| 117 | break; | 225 | rcu_read_unlock(); | 
| 118 | list= &((*list)->next); | 226 | return ret; | 
| 119 | } | ||
| 120 | n->next = *list; | ||
| 121 | *list=n; | ||
| 122 | write_unlock(¬ifier_lock); | ||
| 123 | return 0; | ||
| 124 | } | 227 | } | 
| 125 | 228 | ||
| 126 | EXPORT_SYMBOL(notifier_chain_register); | 229 | EXPORT_SYMBOL_GPL(atomic_notifier_call_chain); | 
| 230 | |||
| 231 | /* | ||
| 232 | * Blocking notifier chain routines. All access to the chain is | ||
| 233 | * synchronized by an rwsem. | ||
| 234 | */ | ||
| 127 | 235 | ||
| 128 | /** | 236 | /** | 
| 129 | * notifier_chain_unregister - Remove notifier from a notifier chain | 237 | * blocking_notifier_chain_register - Add notifier to a blocking notifier chain | 
| 130 | * @nl: Pointer to root list pointer | 238 | * @nh: Pointer to head of the blocking notifier chain | 
| 131 | * @n: New entry in notifier chain | 239 | * @n: New entry in notifier chain | 
| 132 | * | 240 | * | 
| 133 | * Removes a notifier from a notifier chain. | 241 | * Adds a notifier to a blocking notifier chain. | 
| 242 | * Must be called in process context. | ||
| 134 | * | 243 | * | 
| 135 | * Returns zero on success, or %-ENOENT on failure. | 244 | * Currently always returns zero. | 
| 136 | */ | 245 | */ | 
| 137 | 246 | ||
| 138 | int notifier_chain_unregister(struct notifier_block **nl, struct notifier_block *n) | 247 | int blocking_notifier_chain_register(struct blocking_notifier_head *nh, | 
| 248 | struct notifier_block *n) | ||
| 139 | { | 249 | { | 
| 140 | write_lock(¬ifier_lock); | 250 | int ret; | 
| 141 | while((*nl)!=NULL) | 251 | |
| 142 | { | 252 | /* | 
| 143 | if((*nl)==n) | 253 | * This code gets used during boot-up, when task switching is | 
| 144 | { | 254 | * not yet working and interrupts must remain disabled. At | 
| 145 | *nl=n->next; | 255 | * such times we must not call down_write(). | 
| 146 | write_unlock(¬ifier_lock); | 256 | */ | 
| 147 | return 0; | 257 | if (unlikely(system_state == SYSTEM_BOOTING)) | 
| 148 | } | 258 | return notifier_chain_register(&nh->head, n); | 
| 149 | nl=&((*nl)->next); | 259 | |
| 150 | } | 260 | down_write(&nh->rwsem); | 
| 151 | write_unlock(¬ifier_lock); | 261 | ret = notifier_chain_register(&nh->head, n); | 
| 152 | return -ENOENT; | 262 | up_write(&nh->rwsem); | 
| 263 | return ret; | ||
| 153 | } | 264 | } | 
| 154 | 265 | ||
| 155 | EXPORT_SYMBOL(notifier_chain_unregister); | 266 | EXPORT_SYMBOL_GPL(blocking_notifier_chain_register); | 
| 156 | 267 | ||
| 157 | /** | 268 | /** | 
| 158 | * notifier_call_chain - Call functions in a notifier chain | 269 | * blocking_notifier_chain_unregister - Remove notifier from a blocking notifier chain | 
| 159 | * @n: Pointer to root pointer of notifier chain | 270 | * @nh: Pointer to head of the blocking notifier chain | 
| 271 | * @n: Entry to remove from notifier chain | ||
| 272 | * | ||
| 273 | * Removes a notifier from a blocking notifier chain. | ||
| 274 | * Must be called from process context. | ||
| 275 | * | ||
| 276 | * Returns zero on success or %-ENOENT on failure. | ||
| 277 | */ | ||
| 278 | int blocking_notifier_chain_unregister(struct blocking_notifier_head *nh, | ||
| 279 | struct notifier_block *n) | ||
| 280 | { | ||
| 281 | int ret; | ||
| 282 | |||
| 283 | /* | ||
| 284 | * This code gets used during boot-up, when task switching is | ||
| 285 | * not yet working and interrupts must remain disabled. At | ||
| 286 | * such times we must not call down_write(). | ||
| 287 | */ | ||
| 288 | if (unlikely(system_state == SYSTEM_BOOTING)) | ||
| 289 | return notifier_chain_unregister(&nh->head, n); | ||
| 290 | |||
| 291 | down_write(&nh->rwsem); | ||
| 292 | ret = notifier_chain_unregister(&nh->head, n); | ||
| 293 | up_write(&nh->rwsem); | ||
| 294 | return ret; | ||
| 295 | } | ||
| 296 | |||
| 297 | EXPORT_SYMBOL_GPL(blocking_notifier_chain_unregister); | ||
| 298 | |||
| 299 | /** | ||
| 300 | * blocking_notifier_call_chain - Call functions in a blocking notifier chain | ||
| 301 | * @nh: Pointer to head of the blocking notifier chain | ||
| 160 | * @val: Value passed unmodified to notifier function | 302 | * @val: Value passed unmodified to notifier function | 
| 161 | * @v: Pointer passed unmodified to notifier function | 303 | * @v: Pointer passed unmodified to notifier function | 
| 162 | * | 304 | * | 
| 163 | * Calls each function in a notifier chain in turn. | 305 | * Calls each function in a notifier chain in turn. The functions | 
| 306 | * run in a process context, so they are allowed to block. | ||
| 164 | * | 307 | * | 
| 165 | * If the return value of the notifier can be and'd | 308 | * If the return value of the notifier can be and'ed | 
| 166 | * with %NOTIFY_STOP_MASK, then notifier_call_chain | 309 | * with %NOTIFY_STOP_MASK then blocking_notifier_call_chain | 
| 167 | * will return immediately, with the return value of | 310 | * will return immediately, with the return value of | 
| 168 | * the notifier function which halted execution. | 311 | * the notifier function which halted execution. | 
| 169 | * Otherwise, the return value is the return value | 312 | * Otherwise the return value is the return value | 
| 170 | * of the last notifier function called. | 313 | * of the last notifier function called. | 
| 171 | */ | 314 | */ | 
| 172 | 315 | ||
| 173 | int __kprobes notifier_call_chain(struct notifier_block **n, unsigned long val, void *v) | 316 | int blocking_notifier_call_chain(struct blocking_notifier_head *nh, | 
| 317 | unsigned long val, void *v) | ||
| 174 | { | 318 | { | 
| 175 | int ret=NOTIFY_DONE; | 319 | int ret; | 
| 176 | struct notifier_block *nb = *n; | ||
| 177 | 320 | ||
| 178 | while(nb) | 321 | down_read(&nh->rwsem); | 
| 179 | { | 322 | ret = notifier_call_chain(&nh->head, val, v); | 
| 180 | ret=nb->notifier_call(nb,val,v); | 323 | up_read(&nh->rwsem); | 
| 181 | if(ret&NOTIFY_STOP_MASK) | ||
| 182 | { | ||
| 183 | return ret; | ||
| 184 | } | ||
| 185 | nb=nb->next; | ||
| 186 | } | ||
| 187 | return ret; | 324 | return ret; | 
| 188 | } | 325 | } | 
| 189 | 326 | ||
| 190 | EXPORT_SYMBOL(notifier_call_chain); | 327 | EXPORT_SYMBOL_GPL(blocking_notifier_call_chain); | 
| 328 | |||
| 329 | /* | ||
| 330 | * Raw notifier chain routines. There is no protection; | ||
| 331 | * the caller must provide it. Use at your own risk! | ||
| 332 | */ | ||
| 333 | |||
| 334 | /** | ||
| 335 | * raw_notifier_chain_register - Add notifier to a raw notifier chain | ||
| 336 | * @nh: Pointer to head of the raw notifier chain | ||
| 337 | * @n: New entry in notifier chain | ||
| 338 | * | ||
| 339 | * Adds a notifier to a raw notifier chain. | ||
| 340 | * All locking must be provided by the caller. | ||
| 341 | * | ||
| 342 | * Currently always returns zero. | ||
| 343 | */ | ||
| 344 | |||
| 345 | int raw_notifier_chain_register(struct raw_notifier_head *nh, | ||
| 346 | struct notifier_block *n) | ||
| 347 | { | ||
| 348 | return notifier_chain_register(&nh->head, n); | ||
| 349 | } | ||
| 350 | |||
| 351 | EXPORT_SYMBOL_GPL(raw_notifier_chain_register); | ||
| 352 | |||
| 353 | /** | ||
| 354 | * raw_notifier_chain_unregister - Remove notifier from a raw notifier chain | ||
| 355 | * @nh: Pointer to head of the raw notifier chain | ||
| 356 | * @n: Entry to remove from notifier chain | ||
| 357 | * | ||
| 358 | * Removes a notifier from a raw notifier chain. | ||
| 359 | * All locking must be provided by the caller. | ||
| 360 | * | ||
| 361 | * Returns zero on success or %-ENOENT on failure. | ||
| 362 | */ | ||
| 363 | int raw_notifier_chain_unregister(struct raw_notifier_head *nh, | ||
| 364 | struct notifier_block *n) | ||
| 365 | { | ||
| 366 | return notifier_chain_unregister(&nh->head, n); | ||
| 367 | } | ||
| 368 | |||
| 369 | EXPORT_SYMBOL_GPL(raw_notifier_chain_unregister); | ||
| 370 | |||
| 371 | /** | ||
| 372 | * raw_notifier_call_chain - Call functions in a raw notifier chain | ||
| 373 | * @nh: Pointer to head of the raw notifier chain | ||
| 374 | * @val: Value passed unmodified to notifier function | ||
| 375 | * @v: Pointer passed unmodified to notifier function | ||
| 376 | * | ||
| 377 | * Calls each function in a notifier chain in turn. The functions | ||
| 378 | * run in an undefined context. | ||
| 379 | * All locking must be provided by the caller. | ||
| 380 | * | ||
| 381 | * If the return value of the notifier can be and'ed | ||
| 382 | * with %NOTIFY_STOP_MASK then raw_notifier_call_chain | ||
| 383 | * will return immediately, with the return value of | ||
| 384 | * the notifier function which halted execution. | ||
| 385 | * Otherwise the return value is the return value | ||
| 386 | * of the last notifier function called. | ||
| 387 | */ | ||
| 388 | |||
| 389 | int raw_notifier_call_chain(struct raw_notifier_head *nh, | ||
| 390 | unsigned long val, void *v) | ||
| 391 | { | ||
| 392 | return notifier_call_chain(&nh->head, val, v); | ||
| 393 | } | ||
| 394 | |||
| 395 | EXPORT_SYMBOL_GPL(raw_notifier_call_chain); | ||
| 191 | 396 | ||
| 192 | /** | 397 | /** | 
| 193 | * register_reboot_notifier - Register function to be called at reboot time | 398 | * register_reboot_notifier - Register function to be called at reboot time | 
| @@ -196,13 +401,13 @@ EXPORT_SYMBOL(notifier_call_chain); | |||
| 196 | * Registers a function with the list of functions | 401 | * Registers a function with the list of functions | 
| 197 | * to be called at reboot time. | 402 | * to be called at reboot time. | 
| 198 | * | 403 | * | 
| 199 | * Currently always returns zero, as notifier_chain_register | 404 | * Currently always returns zero, as blocking_notifier_chain_register | 
| 200 | * always returns zero. | 405 | * always returns zero. | 
| 201 | */ | 406 | */ | 
| 202 | 407 | ||
| 203 | int register_reboot_notifier(struct notifier_block * nb) | 408 | int register_reboot_notifier(struct notifier_block * nb) | 
| 204 | { | 409 | { | 
| 205 | return notifier_chain_register(&reboot_notifier_list, nb); | 410 | return blocking_notifier_chain_register(&reboot_notifier_list, nb); | 
| 206 | } | 411 | } | 
| 207 | 412 | ||
| 208 | EXPORT_SYMBOL(register_reboot_notifier); | 413 | EXPORT_SYMBOL(register_reboot_notifier); | 
| @@ -219,23 +424,11 @@ EXPORT_SYMBOL(register_reboot_notifier); | |||
| 219 | 424 | ||
| 220 | int unregister_reboot_notifier(struct notifier_block * nb) | 425 | int unregister_reboot_notifier(struct notifier_block * nb) | 
| 221 | { | 426 | { | 
| 222 | return notifier_chain_unregister(&reboot_notifier_list, nb); | 427 | return blocking_notifier_chain_unregister(&reboot_notifier_list, nb); | 
| 223 | } | 428 | } | 
| 224 | 429 | ||
| 225 | EXPORT_SYMBOL(unregister_reboot_notifier); | 430 | EXPORT_SYMBOL(unregister_reboot_notifier); | 
| 226 | 431 | ||
| 227 | #ifndef CONFIG_SECURITY | ||
| 228 | int capable(int cap) | ||
| 229 | { | ||
| 230 | if (cap_raised(current->cap_effective, cap)) { | ||
| 231 | current->flags |= PF_SUPERPRIV; | ||
| 232 | return 1; | ||
| 233 | } | ||
| 234 | return 0; | ||
| 235 | } | ||
| 236 | EXPORT_SYMBOL(capable); | ||
| 237 | #endif | ||
| 238 | |||
| 239 | static int set_one_prio(struct task_struct *p, int niceval, int error) | 432 | static int set_one_prio(struct task_struct *p, int niceval, int error) | 
| 240 | { | 433 | { | 
| 241 | int no_nice; | 434 | int no_nice; | 
| @@ -392,7 +585,7 @@ EXPORT_SYMBOL_GPL(emergency_restart); | |||
| 392 | 585 | ||
| 393 | void kernel_restart_prepare(char *cmd) | 586 | void kernel_restart_prepare(char *cmd) | 
| 394 | { | 587 | { | 
| 395 | notifier_call_chain(&reboot_notifier_list, SYS_RESTART, cmd); | 588 | blocking_notifier_call_chain(&reboot_notifier_list, SYS_RESTART, cmd); | 
| 396 | system_state = SYSTEM_RESTART; | 589 | system_state = SYSTEM_RESTART; | 
| 397 | device_shutdown(); | 590 | device_shutdown(); | 
| 398 | } | 591 | } | 
| @@ -442,7 +635,7 @@ EXPORT_SYMBOL_GPL(kernel_kexec); | |||
| 442 | 635 | ||
| 443 | void kernel_shutdown_prepare(enum system_states state) | 636 | void kernel_shutdown_prepare(enum system_states state) | 
| 444 | { | 637 | { | 
| 445 | notifier_call_chain(&reboot_notifier_list, | 638 | blocking_notifier_call_chain(&reboot_notifier_list, | 
| 446 | (state == SYSTEM_HALT)?SYS_HALT:SYS_POWER_OFF, NULL); | 639 | (state == SYSTEM_HALT)?SYS_HALT:SYS_POWER_OFF, NULL); | 
| 447 | system_state = state; | 640 | system_state = state; | 
| 448 | device_shutdown(); | 641 | device_shutdown(); | 
| @@ -1009,69 +1202,24 @@ asmlinkage long sys_times(struct tms __user * tbuf) | |||
| 1009 | */ | 1202 | */ | 
| 1010 | if (tbuf) { | 1203 | if (tbuf) { | 
| 1011 | struct tms tmp; | 1204 | struct tms tmp; | 
| 1205 | struct task_struct *tsk = current; | ||
| 1206 | struct task_struct *t; | ||
| 1012 | cputime_t utime, stime, cutime, cstime; | 1207 | cputime_t utime, stime, cutime, cstime; | 
| 1013 | 1208 | ||
| 1014 | #ifdef CONFIG_SMP | 1209 | spin_lock_irq(&tsk->sighand->siglock); | 
| 1015 | if (thread_group_empty(current)) { | 1210 | utime = tsk->signal->utime; | 
| 1016 | /* | 1211 | stime = tsk->signal->stime; | 
| 1017 | * Single thread case without the use of any locks. | 1212 | t = tsk; | 
| 1018 | * | 1213 | do { | 
| 1019 | * We may race with release_task if two threads are | 1214 | utime = cputime_add(utime, t->utime); | 
| 1020 | * executing. However, release task first adds up the | 1215 | stime = cputime_add(stime, t->stime); | 
| 1021 | * counters (__exit_signal) before removing the task | 1216 | t = next_thread(t); | 
| 1022 | * from the process tasklist (__unhash_process). | 1217 | } while (t != tsk); | 
| 1023 | * __exit_signal also acquires and releases the | ||
| 1024 | * siglock which results in the proper memory ordering | ||
| 1025 | * so that the list modifications are always visible | ||
| 1026 | * after the counters have been updated. | ||
| 1027 | * | ||
| 1028 | * If the counters have been updated by the second thread | ||
| 1029 | * but the thread has not yet been removed from the list | ||
| 1030 | * then the other branch will be executing which will | ||
| 1031 | * block on tasklist_lock until the exit handling of the | ||
| 1032 | * other task is finished. | ||
| 1033 | * | ||
| 1034 | * This also implies that the sighand->siglock cannot | ||
| 1035 | * be held by another processor. So we can also | ||
| 1036 | * skip acquiring that lock. | ||
| 1037 | */ | ||
| 1038 | utime = cputime_add(current->signal->utime, current->utime); | ||
| 1039 | stime = cputime_add(current->signal->utime, current->stime); | ||
| 1040 | cutime = current->signal->cutime; | ||
| 1041 | cstime = current->signal->cstime; | ||
| 1042 | } else | ||
| 1043 | #endif | ||
| 1044 | { | ||
| 1045 | 1218 | ||
| 1046 | /* Process with multiple threads */ | 1219 | cutime = tsk->signal->cutime; | 
| 1047 | struct task_struct *tsk = current; | 1220 | cstime = tsk->signal->cstime; | 
| 1048 | struct task_struct *t; | 1221 | spin_unlock_irq(&tsk->sighand->siglock); | 
| 1049 | |||
| 1050 | read_lock(&tasklist_lock); | ||
| 1051 | utime = tsk->signal->utime; | ||
| 1052 | stime = tsk->signal->stime; | ||
| 1053 | t = tsk; | ||
| 1054 | do { | ||
| 1055 | utime = cputime_add(utime, t->utime); | ||
| 1056 | stime = cputime_add(stime, t->stime); | ||
| 1057 | t = next_thread(t); | ||
| 1058 | } while (t != tsk); | ||
| 1059 | 1222 | ||
| 1060 | /* | ||
| 1061 | * While we have tasklist_lock read-locked, no dying thread | ||
| 1062 | * can be updating current->signal->[us]time. Instead, | ||
| 1063 | * we got their counts included in the live thread loop. | ||
| 1064 | * However, another thread can come in right now and | ||
| 1065 | * do a wait call that updates current->signal->c[us]time. | ||
| 1066 | * To make sure we always see that pair updated atomically, | ||
| 1067 | * we take the siglock around fetching them. | ||
| 1068 | */ | ||
| 1069 | spin_lock_irq(&tsk->sighand->siglock); | ||
| 1070 | cutime = tsk->signal->cutime; | ||
| 1071 | cstime = tsk->signal->cstime; | ||
| 1072 | spin_unlock_irq(&tsk->sighand->siglock); | ||
| 1073 | read_unlock(&tasklist_lock); | ||
| 1074 | } | ||
| 1075 | tmp.tms_utime = cputime_to_clock_t(utime); | 1223 | tmp.tms_utime = cputime_to_clock_t(utime); | 
| 1076 | tmp.tms_stime = cputime_to_clock_t(stime); | 1224 | tmp.tms_stime = cputime_to_clock_t(stime); | 
| 1077 | tmp.tms_cutime = cputime_to_clock_t(cutime); | 1225 | tmp.tms_cutime = cputime_to_clock_t(cutime); | 
| @@ -1227,7 +1375,7 @@ asmlinkage long sys_setsid(void) | |||
| 1227 | struct pid *pid; | 1375 | struct pid *pid; | 
| 1228 | int err = -EPERM; | 1376 | int err = -EPERM; | 
| 1229 | 1377 | ||
| 1230 | down(&tty_sem); | 1378 | mutex_lock(&tty_mutex); | 
| 1231 | write_lock_irq(&tasklist_lock); | 1379 | write_lock_irq(&tasklist_lock); | 
| 1232 | 1380 | ||
| 1233 | pid = find_pid(PIDTYPE_PGID, group_leader->pid); | 1381 | pid = find_pid(PIDTYPE_PGID, group_leader->pid); | 
| @@ -1241,7 +1389,7 @@ asmlinkage long sys_setsid(void) | |||
| 1241 | err = process_group(group_leader); | 1389 | err = process_group(group_leader); | 
| 1242 | out: | 1390 | out: | 
| 1243 | write_unlock_irq(&tasklist_lock); | 1391 | write_unlock_irq(&tasklist_lock); | 
| 1244 | up(&tty_sem); | 1392 | mutex_unlock(&tty_mutex); | 
| 1245 | return err; | 1393 | return err; | 
| 1246 | } | 1394 | } | 
| 1247 | 1395 | ||
| @@ -1375,7 +1523,7 @@ static void groups_sort(struct group_info *group_info) | |||
| 1375 | /* a simple bsearch */ | 1523 | /* a simple bsearch */ | 
| 1376 | int groups_search(struct group_info *group_info, gid_t grp) | 1524 | int groups_search(struct group_info *group_info, gid_t grp) | 
| 1377 | { | 1525 | { | 
| 1378 | int left, right; | 1526 | unsigned int left, right; | 
| 1379 | 1527 | ||
| 1380 | if (!group_info) | 1528 | if (!group_info) | 
| 1381 | return 0; | 1529 | return 0; | 
| @@ -1383,7 +1531,7 @@ int groups_search(struct group_info *group_info, gid_t grp) | |||
| 1383 | left = 0; | 1531 | left = 0; | 
| 1384 | right = group_info->ngroups; | 1532 | right = group_info->ngroups; | 
| 1385 | while (left < right) { | 1533 | while (left < right) { | 
| 1386 | int mid = (left+right)/2; | 1534 | unsigned int mid = (left+right)/2; | 
| 1387 | int cmp = grp - GROUP_AT(group_info, mid); | 1535 | int cmp = grp - GROUP_AT(group_info, mid); | 
| 1388 | if (cmp > 0) | 1536 | if (cmp > 0) | 
| 1389 | left = mid + 1; | 1537 | left = mid + 1; | 
| @@ -1433,7 +1581,6 @@ asmlinkage long sys_getgroups(int gidsetsize, gid_t __user *grouplist) | |||
| 1433 | return -EINVAL; | 1581 | return -EINVAL; | 
| 1434 | 1582 | ||
| 1435 | /* no need to grab task_lock here; it cannot change */ | 1583 | /* no need to grab task_lock here; it cannot change */ | 
| 1436 | get_group_info(current->group_info); | ||
| 1437 | i = current->group_info->ngroups; | 1584 | i = current->group_info->ngroups; | 
| 1438 | if (gidsetsize) { | 1585 | if (gidsetsize) { | 
| 1439 | if (i > gidsetsize) { | 1586 | if (i > gidsetsize) { | 
| @@ -1446,7 +1593,6 @@ asmlinkage long sys_getgroups(int gidsetsize, gid_t __user *grouplist) | |||
| 1446 | } | 1593 | } | 
| 1447 | } | 1594 | } | 
| 1448 | out: | 1595 | out: | 
| 1449 | put_group_info(current->group_info); | ||
| 1450 | return i; | 1596 | return i; | 
| 1451 | } | 1597 | } | 
| 1452 | 1598 | ||
| @@ -1487,9 +1633,7 @@ int in_group_p(gid_t grp) | |||
| 1487 | { | 1633 | { | 
| 1488 | int retval = 1; | 1634 | int retval = 1; | 
| 1489 | if (grp != current->fsgid) { | 1635 | if (grp != current->fsgid) { | 
| 1490 | get_group_info(current->group_info); | ||
| 1491 | retval = groups_search(current->group_info, grp); | 1636 | retval = groups_search(current->group_info, grp); | 
| 1492 | put_group_info(current->group_info); | ||
| 1493 | } | 1637 | } | 
| 1494 | return retval; | 1638 | return retval; | 
| 1495 | } | 1639 | } | 
| @@ -1500,9 +1644,7 @@ int in_egroup_p(gid_t grp) | |||
| 1500 | { | 1644 | { | 
| 1501 | int retval = 1; | 1645 | int retval = 1; | 
| 1502 | if (grp != current->egid) { | 1646 | if (grp != current->egid) { | 
| 1503 | get_group_info(current->group_info); | ||
| 1504 | retval = groups_search(current->group_info, grp); | 1647 | retval = groups_search(current->group_info, grp); | 
| 1505 | put_group_info(current->group_info); | ||
| 1506 | } | 1648 | } | 
| 1507 | return retval; | 1649 | return retval; | 
| 1508 | } | 1650 | } | 
| @@ -1630,20 +1772,21 @@ asmlinkage long sys_old_getrlimit(unsigned int resource, struct rlimit __user *r | |||
| 1630 | asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit __user *rlim) | 1772 | asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit __user *rlim) | 
| 1631 | { | 1773 | { | 
| 1632 | struct rlimit new_rlim, *old_rlim; | 1774 | struct rlimit new_rlim, *old_rlim; | 
| 1775 | unsigned long it_prof_secs; | ||
| 1633 | int retval; | 1776 | int retval; | 
| 1634 | 1777 | ||
| 1635 | if (resource >= RLIM_NLIMITS) | 1778 | if (resource >= RLIM_NLIMITS) | 
| 1636 | return -EINVAL; | 1779 | return -EINVAL; | 
| 1637 | if(copy_from_user(&new_rlim, rlim, sizeof(*rlim))) | 1780 | if (copy_from_user(&new_rlim, rlim, sizeof(*rlim))) | 
| 1638 | return -EFAULT; | 1781 | return -EFAULT; | 
| 1639 | if (new_rlim.rlim_cur > new_rlim.rlim_max) | 1782 | if (new_rlim.rlim_cur > new_rlim.rlim_max) | 
| 1640 | return -EINVAL; | 1783 | return -EINVAL; | 
| 1641 | old_rlim = current->signal->rlim + resource; | 1784 | old_rlim = current->signal->rlim + resource; | 
| 1642 | if ((new_rlim.rlim_max > old_rlim->rlim_max) && | 1785 | if ((new_rlim.rlim_max > old_rlim->rlim_max) && | 
| 1643 | !capable(CAP_SYS_RESOURCE)) | 1786 | !capable(CAP_SYS_RESOURCE)) | 
| 1644 | return -EPERM; | 1787 | return -EPERM; | 
| 1645 | if (resource == RLIMIT_NOFILE && new_rlim.rlim_max > NR_OPEN) | 1788 | if (resource == RLIMIT_NOFILE && new_rlim.rlim_max > NR_OPEN) | 
| 1646 | return -EPERM; | 1789 | return -EPERM; | 
| 1647 | 1790 | ||
| 1648 | retval = security_task_setrlimit(resource, &new_rlim); | 1791 | retval = security_task_setrlimit(resource, &new_rlim); | 
| 1649 | if (retval) | 1792 | if (retval) | 
| @@ -1653,19 +1796,40 @@ asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit __user *rlim) | |||
| 1653 | *old_rlim = new_rlim; | 1796 | *old_rlim = new_rlim; | 
| 1654 | task_unlock(current->group_leader); | 1797 | task_unlock(current->group_leader); | 
| 1655 | 1798 | ||
| 1656 | if (resource == RLIMIT_CPU && new_rlim.rlim_cur != RLIM_INFINITY && | 1799 | if (resource != RLIMIT_CPU) | 
| 1657 | (cputime_eq(current->signal->it_prof_expires, cputime_zero) || | 1800 | goto out; | 
| 1658 | new_rlim.rlim_cur <= cputime_to_secs( | 1801 | |
| 1659 | current->signal->it_prof_expires))) { | 1802 | /* | 
| 1660 | cputime_t cputime = secs_to_cputime(new_rlim.rlim_cur); | 1803 | * RLIMIT_CPU handling. Note that the kernel fails to return an error | 
| 1804 | * code if it rejected the user's attempt to set RLIMIT_CPU. This is a | ||
| 1805 | * very long-standing error, and fixing it now risks breakage of | ||
| 1806 | * applications, so we live with it | ||
| 1807 | */ | ||
| 1808 | if (new_rlim.rlim_cur == RLIM_INFINITY) | ||
| 1809 | goto out; | ||
| 1810 | |||
| 1811 | it_prof_secs = cputime_to_secs(current->signal->it_prof_expires); | ||
| 1812 | if (it_prof_secs == 0 || new_rlim.rlim_cur <= it_prof_secs) { | ||
| 1813 | unsigned long rlim_cur = new_rlim.rlim_cur; | ||
| 1814 | cputime_t cputime; | ||
| 1815 | |||
| 1816 | if (rlim_cur == 0) { | ||
| 1817 | /* | ||
| 1818 | * The caller is asking for an immediate RLIMIT_CPU | ||
| 1819 | * expiry. But we use the zero value to mean "it was | ||
| 1820 | * never set". So let's cheat and make it one second | ||
| 1821 | * instead | ||
| 1822 | */ | ||
| 1823 | rlim_cur = 1; | ||
| 1824 | } | ||
| 1825 | cputime = secs_to_cputime(rlim_cur); | ||
| 1661 | read_lock(&tasklist_lock); | 1826 | read_lock(&tasklist_lock); | 
| 1662 | spin_lock_irq(¤t->sighand->siglock); | 1827 | spin_lock_irq(¤t->sighand->siglock); | 
| 1663 | set_process_cpu_timer(current, CPUCLOCK_PROF, | 1828 | set_process_cpu_timer(current, CPUCLOCK_PROF, &cputime, NULL); | 
| 1664 | &cputime, NULL); | ||
| 1665 | spin_unlock_irq(¤t->sighand->siglock); | 1829 | spin_unlock_irq(¤t->sighand->siglock); | 
| 1666 | read_unlock(&tasklist_lock); | 1830 | read_unlock(&tasklist_lock); | 
| 1667 | } | 1831 | } | 
| 1668 | 1832 | out: | |
| 1669 | return 0; | 1833 | return 0; | 
| 1670 | } | 1834 | } | 
| 1671 | 1835 | ||
| @@ -1677,9 +1841,6 @@ asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit __user *rlim) | |||
| 1677 | * a lot simpler! (Which we're not doing right now because we're not | 1841 | * a lot simpler! (Which we're not doing right now because we're not | 
| 1678 | * measuring them yet). | 1842 | * measuring them yet). | 
| 1679 | * | 1843 | * | 
| 1680 | * This expects to be called with tasklist_lock read-locked or better, | ||
| 1681 | * and the siglock not locked. It may momentarily take the siglock. | ||
| 1682 | * | ||
| 1683 | * When sampling multiple threads for RUSAGE_SELF, under SMP we might have | 1844 | * When sampling multiple threads for RUSAGE_SELF, under SMP we might have | 
| 1684 | * races with threads incrementing their own counters. But since word | 1845 | * races with threads incrementing their own counters. But since word | 
| 1685 | * reads are atomic, we either get new values or old values and we don't | 1846 | * reads are atomic, we either get new values or old values and we don't | 
| @@ -1687,6 +1848,25 @@ asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit __user *rlim) | |||
| 1687 | * the c* fields from p->signal from races with exit.c updating those | 1848 | * the c* fields from p->signal from races with exit.c updating those | 
| 1688 | * fields when reaping, so a sample either gets all the additions of a | 1849 | * fields when reaping, so a sample either gets all the additions of a | 
| 1689 | * given child after it's reaped, or none so this sample is before reaping. | 1850 | * given child after it's reaped, or none so this sample is before reaping. | 
| 1851 | * | ||
| 1852 | * tasklist_lock locking optimisation: | ||
| 1853 | * If we are current and single threaded, we do not need to take the tasklist | ||
| 1854 | * lock or the siglock. No one else can take our signal_struct away, | ||
| 1855 | * no one else can reap the children to update signal->c* counters, and | ||
| 1856 | * no one else can race with the signal-> fields. | ||
| 1857 | * If we do not take the tasklist_lock, the signal-> fields could be read | ||
| 1858 | * out of order while another thread was just exiting. So we place a | ||
| 1859 | * read memory barrier when we avoid the lock. On the writer side, | ||
| 1860 | * write memory barrier is implied in __exit_signal as __exit_signal releases | ||
| 1861 | * the siglock spinlock after updating the signal-> fields. | ||
| 1862 | * | ||
| 1863 | * We don't really need the siglock when we access the non c* fields | ||
| 1864 | * of the signal_struct (for RUSAGE_SELF) even in multithreaded | ||
| 1865 | * case, since we take the tasklist lock for read and the non c* signal-> | ||
| 1866 | * fields are updated only in __exit_signal, which is called with | ||
| 1867 | * tasklist_lock taken for write, hence these two threads cannot execute | ||
| 1868 | * concurrently. | ||
| 1869 | * | ||
| 1690 | */ | 1870 | */ | 
| 1691 | 1871 | ||
| 1692 | static void k_getrusage(struct task_struct *p, int who, struct rusage *r) | 1872 | static void k_getrusage(struct task_struct *p, int who, struct rusage *r) | 
| @@ -1694,13 +1874,23 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r) | |||
| 1694 | struct task_struct *t; | 1874 | struct task_struct *t; | 
| 1695 | unsigned long flags; | 1875 | unsigned long flags; | 
| 1696 | cputime_t utime, stime; | 1876 | cputime_t utime, stime; | 
| 1877 | int need_lock = 0; | ||
| 1697 | 1878 | ||
| 1698 | memset((char *) r, 0, sizeof *r); | 1879 | memset((char *) r, 0, sizeof *r); | 
| 1880 | utime = stime = cputime_zero; | ||
| 1699 | 1881 | ||
| 1700 | if (unlikely(!p->signal)) | 1882 | if (p != current || !thread_group_empty(p)) | 
| 1701 | return; | 1883 | need_lock = 1; | 
| 1702 | 1884 | ||
| 1703 | utime = stime = cputime_zero; | 1885 | if (need_lock) { | 
| 1886 | read_lock(&tasklist_lock); | ||
| 1887 | if (unlikely(!p->signal)) { | ||
| 1888 | read_unlock(&tasklist_lock); | ||
| 1889 | return; | ||
| 1890 | } | ||
| 1891 | } else | ||
| 1892 | /* See locking comments above */ | ||
| 1893 | smp_rmb(); | ||
| 1704 | 1894 | ||
| 1705 | switch (who) { | 1895 | switch (who) { | 
| 1706 | case RUSAGE_BOTH: | 1896 | case RUSAGE_BOTH: | 
| @@ -1740,6 +1930,8 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r) | |||
| 1740 | BUG(); | 1930 | BUG(); | 
| 1741 | } | 1931 | } | 
| 1742 | 1932 | ||
| 1933 | if (need_lock) | ||
| 1934 | read_unlock(&tasklist_lock); | ||
| 1743 | cputime_to_timeval(utime, &r->ru_utime); | 1935 | cputime_to_timeval(utime, &r->ru_utime); | 
| 1744 | cputime_to_timeval(stime, &r->ru_stime); | 1936 | cputime_to_timeval(stime, &r->ru_stime); | 
| 1745 | } | 1937 | } | 
| @@ -1747,9 +1939,7 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r) | |||
| 1747 | int getrusage(struct task_struct *p, int who, struct rusage __user *ru) | 1939 | int getrusage(struct task_struct *p, int who, struct rusage __user *ru) | 
| 1748 | { | 1940 | { | 
| 1749 | struct rusage r; | 1941 | struct rusage r; | 
| 1750 | read_lock(&tasklist_lock); | ||
| 1751 | k_getrusage(p, who, &r); | 1942 | k_getrusage(p, who, &r); | 
| 1752 | read_unlock(&tasklist_lock); | ||
| 1753 | return copy_to_user(ru, &r, sizeof(r)) ? -EFAULT : 0; | 1943 | return copy_to_user(ru, &r, sizeof(r)) ? -EFAULT : 0; | 
| 1754 | } | 1944 | } | 
| 1755 | 1945 | ||
