diff options
Diffstat (limited to 'kernel')
44 files changed, 2121 insertions, 1216 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index ff1c11dc12..58908f9d15 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
@@ -12,6 +12,9 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \ | |||
12 | 12 | ||
13 | obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o | 13 | obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o |
14 | obj-$(CONFIG_FUTEX) += futex.o | 14 | obj-$(CONFIG_FUTEX) += futex.o |
15 | ifeq ($(CONFIG_COMPAT),y) | ||
16 | obj-$(CONFIG_FUTEX) += futex_compat.o | ||
17 | endif | ||
15 | obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o | 18 | obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o |
16 | obj-$(CONFIG_SMP) += cpu.o spinlock.o | 19 | obj-$(CONFIG_SMP) += cpu.o spinlock.o |
17 | obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o | 20 | obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o |
diff --git a/kernel/acct.c b/kernel/acct.c index 065d8b4e51..b327f4d201 100644 --- a/kernel/acct.c +++ b/kernel/acct.c | |||
@@ -449,8 +449,8 @@ static void do_acct_process(long exitcode, struct file *file) | |||
449 | /* calculate run_time in nsec*/ | 449 | /* calculate run_time in nsec*/ |
450 | do_posix_clock_monotonic_gettime(&uptime); | 450 | do_posix_clock_monotonic_gettime(&uptime); |
451 | run_time = (u64)uptime.tv_sec*NSEC_PER_SEC + uptime.tv_nsec; | 451 | run_time = (u64)uptime.tv_sec*NSEC_PER_SEC + uptime.tv_nsec; |
452 | run_time -= (u64)current->start_time.tv_sec*NSEC_PER_SEC | 452 | run_time -= (u64)current->group_leader->start_time.tv_sec * NSEC_PER_SEC |
453 | + current->start_time.tv_nsec; | 453 | + current->group_leader->start_time.tv_nsec; |
454 | /* convert nsec -> AHZ */ | 454 | /* convert nsec -> AHZ */ |
455 | elapsed = nsec_to_AHZ(run_time); | 455 | elapsed = nsec_to_AHZ(run_time); |
456 | #if ACCT_VERSION==3 | 456 | #if ACCT_VERSION==3 |
@@ -469,10 +469,10 @@ static void do_acct_process(long exitcode, struct file *file) | |||
469 | #endif | 469 | #endif |
470 | do_div(elapsed, AHZ); | 470 | do_div(elapsed, AHZ); |
471 | ac.ac_btime = xtime.tv_sec - elapsed; | 471 | ac.ac_btime = xtime.tv_sec - elapsed; |
472 | jiffies = cputime_to_jiffies(cputime_add(current->group_leader->utime, | 472 | jiffies = cputime_to_jiffies(cputime_add(current->utime, |
473 | current->signal->utime)); | 473 | current->signal->utime)); |
474 | ac.ac_utime = encode_comp_t(jiffies_to_AHZ(jiffies)); | 474 | ac.ac_utime = encode_comp_t(jiffies_to_AHZ(jiffies)); |
475 | jiffies = cputime_to_jiffies(cputime_add(current->group_leader->stime, | 475 | jiffies = cputime_to_jiffies(cputime_add(current->stime, |
476 | current->signal->stime)); | 476 | current->signal->stime)); |
477 | ac.ac_stime = encode_comp_t(jiffies_to_AHZ(jiffies)); | 477 | ac.ac_stime = encode_comp_t(jiffies_to_AHZ(jiffies)); |
478 | /* we really need to bite the bullet and change layout */ | 478 | /* we really need to bite the bullet and change layout */ |
@@ -522,9 +522,9 @@ static void do_acct_process(long exitcode, struct file *file) | |||
522 | ac.ac_io = encode_comp_t(0 /* current->io_usage */); /* %% */ | 522 | ac.ac_io = encode_comp_t(0 /* current->io_usage */); /* %% */ |
523 | ac.ac_rw = encode_comp_t(ac.ac_io / 1024); | 523 | ac.ac_rw = encode_comp_t(ac.ac_io / 1024); |
524 | ac.ac_minflt = encode_comp_t(current->signal->min_flt + | 524 | ac.ac_minflt = encode_comp_t(current->signal->min_flt + |
525 | current->group_leader->min_flt); | 525 | current->min_flt); |
526 | ac.ac_majflt = encode_comp_t(current->signal->maj_flt + | 526 | ac.ac_majflt = encode_comp_t(current->signal->maj_flt + |
527 | current->group_leader->maj_flt); | 527 | current->maj_flt); |
528 | ac.ac_swaps = encode_comp_t(0); | 528 | ac.ac_swaps = encode_comp_t(0); |
529 | ac.ac_exitcode = exitcode; | 529 | ac.ac_exitcode = exitcode; |
530 | 530 | ||
diff --git a/kernel/audit.c b/kernel/audit.c index 04fe2e301b..df57b493e1 100644 --- a/kernel/audit.c +++ b/kernel/audit.c | |||
@@ -55,6 +55,9 @@ | |||
55 | #include <net/netlink.h> | 55 | #include <net/netlink.h> |
56 | #include <linux/skbuff.h> | 56 | #include <linux/skbuff.h> |
57 | #include <linux/netlink.h> | 57 | #include <linux/netlink.h> |
58 | #include <linux/selinux.h> | ||
59 | |||
60 | #include "audit.h" | ||
58 | 61 | ||
59 | /* No auditing will take place until audit_initialized != 0. | 62 | /* No auditing will take place until audit_initialized != 0. |
60 | * (Initialization happens after skb_init is called.) */ | 63 | * (Initialization happens after skb_init is called.) */ |
@@ -227,49 +230,103 @@ void audit_log_lost(const char *message) | |||
227 | } | 230 | } |
228 | } | 231 | } |
229 | 232 | ||
230 | static int audit_set_rate_limit(int limit, uid_t loginuid) | 233 | static int audit_set_rate_limit(int limit, uid_t loginuid, u32 sid) |
231 | { | 234 | { |
232 | int old = audit_rate_limit; | 235 | int old = audit_rate_limit; |
233 | audit_rate_limit = limit; | 236 | |
234 | audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, | 237 | if (sid) { |
238 | char *ctx = NULL; | ||
239 | u32 len; | ||
240 | int rc; | ||
241 | if ((rc = selinux_ctxid_to_string(sid, &ctx, &len))) | ||
242 | return rc; | ||
243 | else | ||
244 | audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, | ||
245 | "audit_rate_limit=%d old=%d by auid=%u subj=%s", | ||
246 | limit, old, loginuid, ctx); | ||
247 | kfree(ctx); | ||
248 | } else | ||
249 | audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, | ||
235 | "audit_rate_limit=%d old=%d by auid=%u", | 250 | "audit_rate_limit=%d old=%d by auid=%u", |
236 | audit_rate_limit, old, loginuid); | 251 | limit, old, loginuid); |
252 | audit_rate_limit = limit; | ||
237 | return old; | 253 | return old; |
238 | } | 254 | } |
239 | 255 | ||
240 | static int audit_set_backlog_limit(int limit, uid_t loginuid) | 256 | static int audit_set_backlog_limit(int limit, uid_t loginuid, u32 sid) |
241 | { | 257 | { |
242 | int old = audit_backlog_limit; | 258 | int old = audit_backlog_limit; |
243 | audit_backlog_limit = limit; | 259 | |
244 | audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, | 260 | if (sid) { |
261 | char *ctx = NULL; | ||
262 | u32 len; | ||
263 | int rc; | ||
264 | if ((rc = selinux_ctxid_to_string(sid, &ctx, &len))) | ||
265 | return rc; | ||
266 | else | ||
267 | audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, | ||
268 | "audit_backlog_limit=%d old=%d by auid=%u subj=%s", | ||
269 | limit, old, loginuid, ctx); | ||
270 | kfree(ctx); | ||
271 | } else | ||
272 | audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, | ||
245 | "audit_backlog_limit=%d old=%d by auid=%u", | 273 | "audit_backlog_limit=%d old=%d by auid=%u", |
246 | audit_backlog_limit, old, loginuid); | 274 | limit, old, loginuid); |
275 | audit_backlog_limit = limit; | ||
247 | return old; | 276 | return old; |
248 | } | 277 | } |
249 | 278 | ||
250 | static int audit_set_enabled(int state, uid_t loginuid) | 279 | static int audit_set_enabled(int state, uid_t loginuid, u32 sid) |
251 | { | 280 | { |
252 | int old = audit_enabled; | 281 | int old = audit_enabled; |
282 | |||
253 | if (state != 0 && state != 1) | 283 | if (state != 0 && state != 1) |
254 | return -EINVAL; | 284 | return -EINVAL; |
255 | audit_enabled = state; | 285 | |
256 | audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, | 286 | if (sid) { |
287 | char *ctx = NULL; | ||
288 | u32 len; | ||
289 | int rc; | ||
290 | if ((rc = selinux_ctxid_to_string(sid, &ctx, &len))) | ||
291 | return rc; | ||
292 | else | ||
293 | audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, | ||
294 | "audit_enabled=%d old=%d by auid=%u subj=%s", | ||
295 | state, old, loginuid, ctx); | ||
296 | kfree(ctx); | ||
297 | } else | ||
298 | audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, | ||
257 | "audit_enabled=%d old=%d by auid=%u", | 299 | "audit_enabled=%d old=%d by auid=%u", |
258 | audit_enabled, old, loginuid); | 300 | state, old, loginuid); |
301 | audit_enabled = state; | ||
259 | return old; | 302 | return old; |
260 | } | 303 | } |
261 | 304 | ||
262 | static int audit_set_failure(int state, uid_t loginuid) | 305 | static int audit_set_failure(int state, uid_t loginuid, u32 sid) |
263 | { | 306 | { |
264 | int old = audit_failure; | 307 | int old = audit_failure; |
308 | |||
265 | if (state != AUDIT_FAIL_SILENT | 309 | if (state != AUDIT_FAIL_SILENT |
266 | && state != AUDIT_FAIL_PRINTK | 310 | && state != AUDIT_FAIL_PRINTK |
267 | && state != AUDIT_FAIL_PANIC) | 311 | && state != AUDIT_FAIL_PANIC) |
268 | return -EINVAL; | 312 | return -EINVAL; |
269 | audit_failure = state; | 313 | |
270 | audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, | 314 | if (sid) { |
315 | char *ctx = NULL; | ||
316 | u32 len; | ||
317 | int rc; | ||
318 | if ((rc = selinux_ctxid_to_string(sid, &ctx, &len))) | ||
319 | return rc; | ||
320 | else | ||
321 | audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, | ||
322 | "audit_failure=%d old=%d by auid=%u subj=%s", | ||
323 | state, old, loginuid, ctx); | ||
324 | kfree(ctx); | ||
325 | } else | ||
326 | audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, | ||
271 | "audit_failure=%d old=%d by auid=%u", | 327 | "audit_failure=%d old=%d by auid=%u", |
272 | audit_failure, old, loginuid); | 328 | state, old, loginuid); |
329 | audit_failure = state; | ||
273 | return old; | 330 | return old; |
274 | } | 331 | } |
275 | 332 | ||
@@ -387,7 +444,7 @@ static int audit_netlink_ok(kernel_cap_t eff_cap, u16 msg_type) | |||
387 | 444 | ||
388 | static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | 445 | static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) |
389 | { | 446 | { |
390 | u32 uid, pid, seq; | 447 | u32 uid, pid, seq, sid; |
391 | void *data; | 448 | void *data; |
392 | struct audit_status *status_get, status_set; | 449 | struct audit_status *status_get, status_set; |
393 | int err; | 450 | int err; |
@@ -413,6 +470,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | |||
413 | pid = NETLINK_CREDS(skb)->pid; | 470 | pid = NETLINK_CREDS(skb)->pid; |
414 | uid = NETLINK_CREDS(skb)->uid; | 471 | uid = NETLINK_CREDS(skb)->uid; |
415 | loginuid = NETLINK_CB(skb).loginuid; | 472 | loginuid = NETLINK_CB(skb).loginuid; |
473 | sid = NETLINK_CB(skb).sid; | ||
416 | seq = nlh->nlmsg_seq; | 474 | seq = nlh->nlmsg_seq; |
417 | data = NLMSG_DATA(nlh); | 475 | data = NLMSG_DATA(nlh); |
418 | 476 | ||
@@ -433,25 +491,43 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | |||
433 | return -EINVAL; | 491 | return -EINVAL; |
434 | status_get = (struct audit_status *)data; | 492 | status_get = (struct audit_status *)data; |
435 | if (status_get->mask & AUDIT_STATUS_ENABLED) { | 493 | if (status_get->mask & AUDIT_STATUS_ENABLED) { |
436 | err = audit_set_enabled(status_get->enabled, loginuid); | 494 | err = audit_set_enabled(status_get->enabled, |
495 | loginuid, sid); | ||
437 | if (err < 0) return err; | 496 | if (err < 0) return err; |
438 | } | 497 | } |
439 | if (status_get->mask & AUDIT_STATUS_FAILURE) { | 498 | if (status_get->mask & AUDIT_STATUS_FAILURE) { |
440 | err = audit_set_failure(status_get->failure, loginuid); | 499 | err = audit_set_failure(status_get->failure, |
500 | loginuid, sid); | ||
441 | if (err < 0) return err; | 501 | if (err < 0) return err; |
442 | } | 502 | } |
443 | if (status_get->mask & AUDIT_STATUS_PID) { | 503 | if (status_get->mask & AUDIT_STATUS_PID) { |
444 | int old = audit_pid; | 504 | int old = audit_pid; |
505 | if (sid) { | ||
506 | char *ctx = NULL; | ||
507 | u32 len; | ||
508 | int rc; | ||
509 | if ((rc = selinux_ctxid_to_string( | ||
510 | sid, &ctx, &len))) | ||
511 | return rc; | ||
512 | else | ||
513 | audit_log(NULL, GFP_KERNEL, | ||
514 | AUDIT_CONFIG_CHANGE, | ||
515 | "audit_pid=%d old=%d by auid=%u subj=%s", | ||
516 | status_get->pid, old, | ||
517 | loginuid, ctx); | ||
518 | kfree(ctx); | ||
519 | } else | ||
520 | audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, | ||
521 | "audit_pid=%d old=%d by auid=%u", | ||
522 | status_get->pid, old, loginuid); | ||
445 | audit_pid = status_get->pid; | 523 | audit_pid = status_get->pid; |
446 | audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, | ||
447 | "audit_pid=%d old=%d by auid=%u", | ||
448 | audit_pid, old, loginuid); | ||
449 | } | 524 | } |
450 | if (status_get->mask & AUDIT_STATUS_RATE_LIMIT) | 525 | if (status_get->mask & AUDIT_STATUS_RATE_LIMIT) |
451 | audit_set_rate_limit(status_get->rate_limit, loginuid); | 526 | audit_set_rate_limit(status_get->rate_limit, |
527 | loginuid, sid); | ||
452 | if (status_get->mask & AUDIT_STATUS_BACKLOG_LIMIT) | 528 | if (status_get->mask & AUDIT_STATUS_BACKLOG_LIMIT) |
453 | audit_set_backlog_limit(status_get->backlog_limit, | 529 | audit_set_backlog_limit(status_get->backlog_limit, |
454 | loginuid); | 530 | loginuid, sid); |
455 | break; | 531 | break; |
456 | case AUDIT_USER: | 532 | case AUDIT_USER: |
457 | case AUDIT_FIRST_USER_MSG...AUDIT_LAST_USER_MSG: | 533 | case AUDIT_FIRST_USER_MSG...AUDIT_LAST_USER_MSG: |
@@ -465,8 +541,23 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | |||
465 | ab = audit_log_start(NULL, GFP_KERNEL, msg_type); | 541 | ab = audit_log_start(NULL, GFP_KERNEL, msg_type); |
466 | if (ab) { | 542 | if (ab) { |
467 | audit_log_format(ab, | 543 | audit_log_format(ab, |
468 | "user pid=%d uid=%u auid=%u msg='%.1024s'", | 544 | "user pid=%d uid=%u auid=%u", |
469 | pid, uid, loginuid, (char *)data); | 545 | pid, uid, loginuid); |
546 | if (sid) { | ||
547 | char *ctx = NULL; | ||
548 | u32 len; | ||
549 | if (selinux_ctxid_to_string( | ||
550 | sid, &ctx, &len)) { | ||
551 | audit_log_format(ab, | ||
552 | " ssid=%u", sid); | ||
553 | /* Maybe call audit_panic? */ | ||
554 | } else | ||
555 | audit_log_format(ab, | ||
556 | " subj=%s", ctx); | ||
557 | kfree(ctx); | ||
558 | } | ||
559 | audit_log_format(ab, " msg='%.1024s'", | ||
560 | (char *)data); | ||
470 | audit_set_pid(ab, pid); | 561 | audit_set_pid(ab, pid); |
471 | audit_log_end(ab); | 562 | audit_log_end(ab); |
472 | } | 563 | } |
@@ -480,7 +571,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | |||
480 | case AUDIT_LIST: | 571 | case AUDIT_LIST: |
481 | err = audit_receive_filter(nlh->nlmsg_type, NETLINK_CB(skb).pid, | 572 | err = audit_receive_filter(nlh->nlmsg_type, NETLINK_CB(skb).pid, |
482 | uid, seq, data, nlmsg_len(nlh), | 573 | uid, seq, data, nlmsg_len(nlh), |
483 | loginuid); | 574 | loginuid, sid); |
484 | break; | 575 | break; |
485 | case AUDIT_ADD_RULE: | 576 | case AUDIT_ADD_RULE: |
486 | case AUDIT_DEL_RULE: | 577 | case AUDIT_DEL_RULE: |
@@ -490,7 +581,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | |||
490 | case AUDIT_LIST_RULES: | 581 | case AUDIT_LIST_RULES: |
491 | err = audit_receive_filter(nlh->nlmsg_type, NETLINK_CB(skb).pid, | 582 | err = audit_receive_filter(nlh->nlmsg_type, NETLINK_CB(skb).pid, |
492 | uid, seq, data, nlmsg_len(nlh), | 583 | uid, seq, data, nlmsg_len(nlh), |
493 | loginuid); | 584 | loginuid, sid); |
494 | break; | 585 | break; |
495 | case AUDIT_SIGNAL_INFO: | 586 | case AUDIT_SIGNAL_INFO: |
496 | sig_data.uid = audit_sig_uid; | 587 | sig_data.uid = audit_sig_uid; |
@@ -564,6 +655,11 @@ static int __init audit_init(void) | |||
564 | skb_queue_head_init(&audit_skb_queue); | 655 | skb_queue_head_init(&audit_skb_queue); |
565 | audit_initialized = 1; | 656 | audit_initialized = 1; |
566 | audit_enabled = audit_default; | 657 | audit_enabled = audit_default; |
658 | |||
659 | /* Register the callback with selinux. This callback will be invoked | ||
660 | * when a new policy is loaded. */ | ||
661 | selinux_audit_set_callback(&selinux_audit_rule_update); | ||
662 | |||
567 | audit_log(NULL, GFP_KERNEL, AUDIT_KERNEL, "initialized"); | 663 | audit_log(NULL, GFP_KERNEL, AUDIT_KERNEL, "initialized"); |
568 | return 0; | 664 | return 0; |
569 | } | 665 | } |
@@ -578,7 +674,7 @@ static int __init audit_enable(char *str) | |||
578 | audit_initialized ? "" : " (after initialization)"); | 674 | audit_initialized ? "" : " (after initialization)"); |
579 | if (audit_initialized) | 675 | if (audit_initialized) |
580 | audit_enabled = audit_default; | 676 | audit_enabled = audit_default; |
581 | return 0; | 677 | return 1; |
582 | } | 678 | } |
583 | 679 | ||
584 | __setup("audit=", audit_enable); | 680 | __setup("audit=", audit_enable); |
diff --git a/kernel/audit.h b/kernel/audit.h index bc5392076e..6f733920fd 100644 --- a/kernel/audit.h +++ b/kernel/audit.h | |||
@@ -54,9 +54,11 @@ enum audit_state { | |||
54 | 54 | ||
55 | /* Rule lists */ | 55 | /* Rule lists */ |
56 | struct audit_field { | 56 | struct audit_field { |
57 | u32 type; | 57 | u32 type; |
58 | u32 val; | 58 | u32 val; |
59 | u32 op; | 59 | u32 op; |
60 | char *se_str; | ||
61 | struct selinux_audit_rule *se_rule; | ||
60 | }; | 62 | }; |
61 | 63 | ||
62 | struct audit_krule { | 64 | struct audit_krule { |
@@ -86,3 +88,5 @@ extern void audit_send_reply(int pid, int seq, int type, | |||
86 | extern void audit_log_lost(const char *message); | 88 | extern void audit_log_lost(const char *message); |
87 | extern void audit_panic(const char *message); | 89 | extern void audit_panic(const char *message); |
88 | extern struct mutex audit_netlink_mutex; | 90 | extern struct mutex audit_netlink_mutex; |
91 | |||
92 | extern int selinux_audit_rule_update(void); | ||
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index d3a8539f3a..7c134906d6 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c | |||
@@ -23,6 +23,7 @@ | |||
23 | #include <linux/audit.h> | 23 | #include <linux/audit.h> |
24 | #include <linux/kthread.h> | 24 | #include <linux/kthread.h> |
25 | #include <linux/netlink.h> | 25 | #include <linux/netlink.h> |
26 | #include <linux/selinux.h> | ||
26 | #include "audit.h" | 27 | #include "audit.h" |
27 | 28 | ||
28 | /* There are three lists of rules -- one to search at task creation | 29 | /* There are three lists of rules -- one to search at task creation |
@@ -42,6 +43,13 @@ struct list_head audit_filter_list[AUDIT_NR_FILTERS] = { | |||
42 | 43 | ||
43 | static inline void audit_free_rule(struct audit_entry *e) | 44 | static inline void audit_free_rule(struct audit_entry *e) |
44 | { | 45 | { |
46 | int i; | ||
47 | if (e->rule.fields) | ||
48 | for (i = 0; i < e->rule.field_count; i++) { | ||
49 | struct audit_field *f = &e->rule.fields[i]; | ||
50 | kfree(f->se_str); | ||
51 | selinux_audit_rule_free(f->se_rule); | ||
52 | } | ||
45 | kfree(e->rule.fields); | 53 | kfree(e->rule.fields); |
46 | kfree(e); | 54 | kfree(e); |
47 | } | 55 | } |
@@ -52,9 +60,29 @@ static inline void audit_free_rule_rcu(struct rcu_head *head) | |||
52 | audit_free_rule(e); | 60 | audit_free_rule(e); |
53 | } | 61 | } |
54 | 62 | ||
63 | /* Initialize an audit filterlist entry. */ | ||
64 | static inline struct audit_entry *audit_init_entry(u32 field_count) | ||
65 | { | ||
66 | struct audit_entry *entry; | ||
67 | struct audit_field *fields; | ||
68 | |||
69 | entry = kzalloc(sizeof(*entry), GFP_KERNEL); | ||
70 | if (unlikely(!entry)) | ||
71 | return NULL; | ||
72 | |||
73 | fields = kzalloc(sizeof(*fields) * field_count, GFP_KERNEL); | ||
74 | if (unlikely(!fields)) { | ||
75 | kfree(entry); | ||
76 | return NULL; | ||
77 | } | ||
78 | entry->rule.fields = fields; | ||
79 | |||
80 | return entry; | ||
81 | } | ||
82 | |||
55 | /* Unpack a filter field's string representation from user-space | 83 | /* Unpack a filter field's string representation from user-space |
56 | * buffer. */ | 84 | * buffer. */ |
57 | static __attribute__((unused)) char *audit_unpack_string(void **bufp, size_t *remain, size_t len) | 85 | static char *audit_unpack_string(void **bufp, size_t *remain, size_t len) |
58 | { | 86 | { |
59 | char *str; | 87 | char *str; |
60 | 88 | ||
@@ -84,7 +112,6 @@ static inline struct audit_entry *audit_to_entry_common(struct audit_rule *rule) | |||
84 | { | 112 | { |
85 | unsigned listnr; | 113 | unsigned listnr; |
86 | struct audit_entry *entry; | 114 | struct audit_entry *entry; |
87 | struct audit_field *fields; | ||
88 | int i, err; | 115 | int i, err; |
89 | 116 | ||
90 | err = -EINVAL; | 117 | err = -EINVAL; |
@@ -108,23 +135,14 @@ static inline struct audit_entry *audit_to_entry_common(struct audit_rule *rule) | |||
108 | goto exit_err; | 135 | goto exit_err; |
109 | 136 | ||
110 | err = -ENOMEM; | 137 | err = -ENOMEM; |
111 | entry = kmalloc(sizeof(*entry), GFP_KERNEL); | 138 | entry = audit_init_entry(rule->field_count); |
112 | if (unlikely(!entry)) | 139 | if (!entry) |
113 | goto exit_err; | ||
114 | fields = kmalloc(sizeof(*fields) * rule->field_count, GFP_KERNEL); | ||
115 | if (unlikely(!fields)) { | ||
116 | kfree(entry); | ||
117 | goto exit_err; | 140 | goto exit_err; |
118 | } | ||
119 | |||
120 | memset(&entry->rule, 0, sizeof(struct audit_krule)); | ||
121 | memset(fields, 0, sizeof(struct audit_field)); | ||
122 | 141 | ||
123 | entry->rule.flags = rule->flags & AUDIT_FILTER_PREPEND; | 142 | entry->rule.flags = rule->flags & AUDIT_FILTER_PREPEND; |
124 | entry->rule.listnr = listnr; | 143 | entry->rule.listnr = listnr; |
125 | entry->rule.action = rule->action; | 144 | entry->rule.action = rule->action; |
126 | entry->rule.field_count = rule->field_count; | 145 | entry->rule.field_count = rule->field_count; |
127 | entry->rule.fields = fields; | ||
128 | 146 | ||
129 | for (i = 0; i < AUDIT_BITMASK_SIZE; i++) | 147 | for (i = 0; i < AUDIT_BITMASK_SIZE; i++) |
130 | entry->rule.mask[i] = rule->mask[i]; | 148 | entry->rule.mask[i] = rule->mask[i]; |
@@ -150,15 +168,20 @@ static struct audit_entry *audit_rule_to_entry(struct audit_rule *rule) | |||
150 | for (i = 0; i < rule->field_count; i++) { | 168 | for (i = 0; i < rule->field_count; i++) { |
151 | struct audit_field *f = &entry->rule.fields[i]; | 169 | struct audit_field *f = &entry->rule.fields[i]; |
152 | 170 | ||
153 | if (rule->fields[i] & AUDIT_UNUSED_BITS) { | ||
154 | err = -EINVAL; | ||
155 | goto exit_free; | ||
156 | } | ||
157 | |||
158 | f->op = rule->fields[i] & (AUDIT_NEGATE|AUDIT_OPERATORS); | 171 | f->op = rule->fields[i] & (AUDIT_NEGATE|AUDIT_OPERATORS); |
159 | f->type = rule->fields[i] & ~(AUDIT_NEGATE|AUDIT_OPERATORS); | 172 | f->type = rule->fields[i] & ~(AUDIT_NEGATE|AUDIT_OPERATORS); |
160 | f->val = rule->values[i]; | 173 | f->val = rule->values[i]; |
161 | 174 | ||
175 | if (f->type & AUDIT_UNUSED_BITS || | ||
176 | f->type == AUDIT_SE_USER || | ||
177 | f->type == AUDIT_SE_ROLE || | ||
178 | f->type == AUDIT_SE_TYPE || | ||
179 | f->type == AUDIT_SE_SEN || | ||
180 | f->type == AUDIT_SE_CLR) { | ||
181 | err = -EINVAL; | ||
182 | goto exit_free; | ||
183 | } | ||
184 | |||
162 | entry->rule.vers_ops = (f->op & AUDIT_OPERATORS) ? 2 : 1; | 185 | entry->rule.vers_ops = (f->op & AUDIT_OPERATORS) ? 2 : 1; |
163 | 186 | ||
164 | /* Support for legacy operators where | 187 | /* Support for legacy operators where |
@@ -188,8 +211,9 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data, | |||
188 | int err = 0; | 211 | int err = 0; |
189 | struct audit_entry *entry; | 212 | struct audit_entry *entry; |
190 | void *bufp; | 213 | void *bufp; |
191 | /* size_t remain = datasz - sizeof(struct audit_rule_data); */ | 214 | size_t remain = datasz - sizeof(struct audit_rule_data); |
192 | int i; | 215 | int i; |
216 | char *str; | ||
193 | 217 | ||
194 | entry = audit_to_entry_common((struct audit_rule *)data); | 218 | entry = audit_to_entry_common((struct audit_rule *)data); |
195 | if (IS_ERR(entry)) | 219 | if (IS_ERR(entry)) |
@@ -207,10 +231,35 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data, | |||
207 | 231 | ||
208 | f->op = data->fieldflags[i] & AUDIT_OPERATORS; | 232 | f->op = data->fieldflags[i] & AUDIT_OPERATORS; |
209 | f->type = data->fields[i]; | 233 | f->type = data->fields[i]; |
234 | f->val = data->values[i]; | ||
235 | f->se_str = NULL; | ||
236 | f->se_rule = NULL; | ||
210 | switch(f->type) { | 237 | switch(f->type) { |
211 | /* call type-specific conversion routines here */ | 238 | case AUDIT_SE_USER: |
212 | default: | 239 | case AUDIT_SE_ROLE: |
213 | f->val = data->values[i]; | 240 | case AUDIT_SE_TYPE: |
241 | case AUDIT_SE_SEN: | ||
242 | case AUDIT_SE_CLR: | ||
243 | str = audit_unpack_string(&bufp, &remain, f->val); | ||
244 | if (IS_ERR(str)) | ||
245 | goto exit_free; | ||
246 | entry->rule.buflen += f->val; | ||
247 | |||
248 | err = selinux_audit_rule_init(f->type, f->op, str, | ||
249 | &f->se_rule); | ||
250 | /* Keep currently invalid fields around in case they | ||
251 | * become valid after a policy reload. */ | ||
252 | if (err == -EINVAL) { | ||
253 | printk(KERN_WARNING "audit rule for selinux " | ||
254 | "\'%s\' is invalid\n", str); | ||
255 | err = 0; | ||
256 | } | ||
257 | if (err) { | ||
258 | kfree(str); | ||
259 | goto exit_free; | ||
260 | } else | ||
261 | f->se_str = str; | ||
262 | break; | ||
214 | } | 263 | } |
215 | } | 264 | } |
216 | 265 | ||
@@ -286,7 +335,14 @@ static struct audit_rule_data *audit_krule_to_data(struct audit_krule *krule) | |||
286 | data->fields[i] = f->type; | 335 | data->fields[i] = f->type; |
287 | data->fieldflags[i] = f->op; | 336 | data->fieldflags[i] = f->op; |
288 | switch(f->type) { | 337 | switch(f->type) { |
289 | /* call type-specific conversion routines here */ | 338 | case AUDIT_SE_USER: |
339 | case AUDIT_SE_ROLE: | ||
340 | case AUDIT_SE_TYPE: | ||
341 | case AUDIT_SE_SEN: | ||
342 | case AUDIT_SE_CLR: | ||
343 | data->buflen += data->values[i] = | ||
344 | audit_pack_string(&bufp, f->se_str); | ||
345 | break; | ||
290 | default: | 346 | default: |
291 | data->values[i] = f->val; | 347 | data->values[i] = f->val; |
292 | } | 348 | } |
@@ -314,7 +370,14 @@ static int audit_compare_rule(struct audit_krule *a, struct audit_krule *b) | |||
314 | return 1; | 370 | return 1; |
315 | 371 | ||
316 | switch(a->fields[i].type) { | 372 | switch(a->fields[i].type) { |
317 | /* call type-specific comparison routines here */ | 373 | case AUDIT_SE_USER: |
374 | case AUDIT_SE_ROLE: | ||
375 | case AUDIT_SE_TYPE: | ||
376 | case AUDIT_SE_SEN: | ||
377 | case AUDIT_SE_CLR: | ||
378 | if (strcmp(a->fields[i].se_str, b->fields[i].se_str)) | ||
379 | return 1; | ||
380 | break; | ||
318 | default: | 381 | default: |
319 | if (a->fields[i].val != b->fields[i].val) | 382 | if (a->fields[i].val != b->fields[i].val) |
320 | return 1; | 383 | return 1; |
@@ -328,6 +391,81 @@ static int audit_compare_rule(struct audit_krule *a, struct audit_krule *b) | |||
328 | return 0; | 391 | return 0; |
329 | } | 392 | } |
330 | 393 | ||
394 | /* Duplicate selinux field information. The se_rule is opaque, so must be | ||
395 | * re-initialized. */ | ||
396 | static inline int audit_dupe_selinux_field(struct audit_field *df, | ||
397 | struct audit_field *sf) | ||
398 | { | ||
399 | int ret = 0; | ||
400 | char *se_str; | ||
401 | |||
402 | /* our own copy of se_str */ | ||
403 | se_str = kstrdup(sf->se_str, GFP_KERNEL); | ||
404 | if (unlikely(IS_ERR(se_str))) | ||
405 | return -ENOMEM; | ||
406 | df->se_str = se_str; | ||
407 | |||
408 | /* our own (refreshed) copy of se_rule */ | ||
409 | ret = selinux_audit_rule_init(df->type, df->op, df->se_str, | ||
410 | &df->se_rule); | ||
411 | /* Keep currently invalid fields around in case they | ||
412 | * become valid after a policy reload. */ | ||
413 | if (ret == -EINVAL) { | ||
414 | printk(KERN_WARNING "audit rule for selinux \'%s\' is " | ||
415 | "invalid\n", df->se_str); | ||
416 | ret = 0; | ||
417 | } | ||
418 | |||
419 | return ret; | ||
420 | } | ||
421 | |||
422 | /* Duplicate an audit rule. This will be a deep copy with the exception | ||
423 | * of the watch - that pointer is carried over. The selinux specific fields | ||
424 | * will be updated in the copy. The point is to be able to replace the old | ||
425 | * rule with the new rule in the filterlist, then free the old rule. */ | ||
426 | static struct audit_entry *audit_dupe_rule(struct audit_krule *old) | ||
427 | { | ||
428 | u32 fcount = old->field_count; | ||
429 | struct audit_entry *entry; | ||
430 | struct audit_krule *new; | ||
431 | int i, err = 0; | ||
432 | |||
433 | entry = audit_init_entry(fcount); | ||
434 | if (unlikely(!entry)) | ||
435 | return ERR_PTR(-ENOMEM); | ||
436 | |||
437 | new = &entry->rule; | ||
438 | new->vers_ops = old->vers_ops; | ||
439 | new->flags = old->flags; | ||
440 | new->listnr = old->listnr; | ||
441 | new->action = old->action; | ||
442 | for (i = 0; i < AUDIT_BITMASK_SIZE; i++) | ||
443 | new->mask[i] = old->mask[i]; | ||
444 | new->buflen = old->buflen; | ||
445 | new->field_count = old->field_count; | ||
446 | memcpy(new->fields, old->fields, sizeof(struct audit_field) * fcount); | ||
447 | |||
448 | /* deep copy this information, updating the se_rule fields, because | ||
449 | * the originals will all be freed when the old rule is freed. */ | ||
450 | for (i = 0; i < fcount; i++) { | ||
451 | switch (new->fields[i].type) { | ||
452 | case AUDIT_SE_USER: | ||
453 | case AUDIT_SE_ROLE: | ||
454 | case AUDIT_SE_TYPE: | ||
455 | case AUDIT_SE_SEN: | ||
456 | case AUDIT_SE_CLR: | ||
457 | err = audit_dupe_selinux_field(&new->fields[i], | ||
458 | &old->fields[i]); | ||
459 | } | ||
460 | if (err) { | ||
461 | audit_free_rule(entry); | ||
462 | return ERR_PTR(err); | ||
463 | } | ||
464 | } | ||
465 | |||
466 | return entry; | ||
467 | } | ||
468 | |||
331 | /* Add rule to given filterlist if not a duplicate. Protected by | 469 | /* Add rule to given filterlist if not a duplicate. Protected by |
332 | * audit_netlink_mutex. */ | 470 | * audit_netlink_mutex. */ |
333 | static inline int audit_add_rule(struct audit_entry *entry, | 471 | static inline int audit_add_rule(struct audit_entry *entry, |
@@ -448,9 +586,10 @@ static int audit_list_rules(void *_dest) | |||
448 | * @data: payload data | 586 | * @data: payload data |
449 | * @datasz: size of payload data | 587 | * @datasz: size of payload data |
450 | * @loginuid: loginuid of sender | 588 | * @loginuid: loginuid of sender |
589 | * @sid: SE Linux Security ID of sender | ||
451 | */ | 590 | */ |
452 | int audit_receive_filter(int type, int pid, int uid, int seq, void *data, | 591 | int audit_receive_filter(int type, int pid, int uid, int seq, void *data, |
453 | size_t datasz, uid_t loginuid) | 592 | size_t datasz, uid_t loginuid, u32 sid) |
454 | { | 593 | { |
455 | struct task_struct *tsk; | 594 | struct task_struct *tsk; |
456 | int *dest; | 595 | int *dest; |
@@ -493,9 +632,23 @@ int audit_receive_filter(int type, int pid, int uid, int seq, void *data, | |||
493 | 632 | ||
494 | err = audit_add_rule(entry, | 633 | err = audit_add_rule(entry, |
495 | &audit_filter_list[entry->rule.listnr]); | 634 | &audit_filter_list[entry->rule.listnr]); |
496 | audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, | 635 | if (sid) { |
497 | "auid=%u add rule to list=%d res=%d\n", | 636 | char *ctx = NULL; |
498 | loginuid, entry->rule.listnr, !err); | 637 | u32 len; |
638 | if (selinux_ctxid_to_string(sid, &ctx, &len)) { | ||
639 | /* Maybe call audit_panic? */ | ||
640 | audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, | ||
641 | "auid=%u ssid=%u add rule to list=%d res=%d", | ||
642 | loginuid, sid, entry->rule.listnr, !err); | ||
643 | } else | ||
644 | audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, | ||
645 | "auid=%u subj=%s add rule to list=%d res=%d", | ||
646 | loginuid, ctx, entry->rule.listnr, !err); | ||
647 | kfree(ctx); | ||
648 | } else | ||
649 | audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, | ||
650 | "auid=%u add rule to list=%d res=%d", | ||
651 | loginuid, entry->rule.listnr, !err); | ||
499 | 652 | ||
500 | if (err) | 653 | if (err) |
501 | audit_free_rule(entry); | 654 | audit_free_rule(entry); |
@@ -511,9 +664,24 @@ int audit_receive_filter(int type, int pid, int uid, int seq, void *data, | |||
511 | 664 | ||
512 | err = audit_del_rule(entry, | 665 | err = audit_del_rule(entry, |
513 | &audit_filter_list[entry->rule.listnr]); | 666 | &audit_filter_list[entry->rule.listnr]); |
514 | audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, | 667 | |
515 | "auid=%u remove rule from list=%d res=%d\n", | 668 | if (sid) { |
516 | loginuid, entry->rule.listnr, !err); | 669 | char *ctx = NULL; |
670 | u32 len; | ||
671 | if (selinux_ctxid_to_string(sid, &ctx, &len)) { | ||
672 | /* Maybe call audit_panic? */ | ||
673 | audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, | ||
674 | "auid=%u ssid=%u remove rule from list=%d res=%d", | ||
675 | loginuid, sid, entry->rule.listnr, !err); | ||
676 | } else | ||
677 | audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, | ||
678 | "auid=%u subj=%s remove rule from list=%d res=%d", | ||
679 | loginuid, ctx, entry->rule.listnr, !err); | ||
680 | kfree(ctx); | ||
681 | } else | ||
682 | audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, | ||
683 | "auid=%u remove rule from list=%d res=%d", | ||
684 | loginuid, entry->rule.listnr, !err); | ||
517 | 685 | ||
518 | audit_free_rule(entry); | 686 | audit_free_rule(entry); |
519 | break; | 687 | break; |
@@ -628,3 +796,62 @@ unlock_and_return: | |||
628 | rcu_read_unlock(); | 796 | rcu_read_unlock(); |
629 | return result; | 797 | return result; |
630 | } | 798 | } |
799 | |||
800 | /* Check to see if the rule contains any selinux fields. Returns 1 if there | ||
801 | are selinux fields specified in the rule, 0 otherwise. */ | ||
802 | static inline int audit_rule_has_selinux(struct audit_krule *rule) | ||
803 | { | ||
804 | int i; | ||
805 | |||
806 | for (i = 0; i < rule->field_count; i++) { | ||
807 | struct audit_field *f = &rule->fields[i]; | ||
808 | switch (f->type) { | ||
809 | case AUDIT_SE_USER: | ||
810 | case AUDIT_SE_ROLE: | ||
811 | case AUDIT_SE_TYPE: | ||
812 | case AUDIT_SE_SEN: | ||
813 | case AUDIT_SE_CLR: | ||
814 | return 1; | ||
815 | } | ||
816 | } | ||
817 | |||
818 | return 0; | ||
819 | } | ||
820 | |||
821 | /* This function will re-initialize the se_rule field of all applicable rules. | ||
822 | * It will traverse the filter lists serarching for rules that contain selinux | ||
823 | * specific filter fields. When such a rule is found, it is copied, the | ||
824 | * selinux field is re-initialized, and the old rule is replaced with the | ||
825 | * updated rule. */ | ||
826 | int selinux_audit_rule_update(void) | ||
827 | { | ||
828 | struct audit_entry *entry, *n, *nentry; | ||
829 | int i, err = 0; | ||
830 | |||
831 | /* audit_netlink_mutex synchronizes the writers */ | ||
832 | mutex_lock(&audit_netlink_mutex); | ||
833 | |||
834 | for (i = 0; i < AUDIT_NR_FILTERS; i++) { | ||
835 | list_for_each_entry_safe(entry, n, &audit_filter_list[i], list) { | ||
836 | if (!audit_rule_has_selinux(&entry->rule)) | ||
837 | continue; | ||
838 | |||
839 | nentry = audit_dupe_rule(&entry->rule); | ||
840 | if (unlikely(IS_ERR(nentry))) { | ||
841 | /* save the first error encountered for the | ||
842 | * return value */ | ||
843 | if (!err) | ||
844 | err = PTR_ERR(nentry); | ||
845 | audit_panic("error updating selinux filters"); | ||
846 | list_del_rcu(&entry->list); | ||
847 | } else { | ||
848 | list_replace_rcu(&entry->list, &nentry->list); | ||
849 | } | ||
850 | call_rcu(&entry->rcu, audit_free_rule_rcu); | ||
851 | } | ||
852 | } | ||
853 | |||
854 | mutex_unlock(&audit_netlink_mutex); | ||
855 | |||
856 | return err; | ||
857 | } | ||
diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 7f160df21a..1c03a4ed1b 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c | |||
@@ -58,6 +58,7 @@ | |||
58 | #include <linux/security.h> | 58 | #include <linux/security.h> |
59 | #include <linux/list.h> | 59 | #include <linux/list.h> |
60 | #include <linux/tty.h> | 60 | #include <linux/tty.h> |
61 | #include <linux/selinux.h> | ||
61 | 62 | ||
62 | #include "audit.h" | 63 | #include "audit.h" |
63 | 64 | ||
@@ -89,7 +90,7 @@ struct audit_names { | |||
89 | uid_t uid; | 90 | uid_t uid; |
90 | gid_t gid; | 91 | gid_t gid; |
91 | dev_t rdev; | 92 | dev_t rdev; |
92 | char *ctx; | 93 | u32 osid; |
93 | }; | 94 | }; |
94 | 95 | ||
95 | struct audit_aux_data { | 96 | struct audit_aux_data { |
@@ -106,7 +107,7 @@ struct audit_aux_data_ipcctl { | |||
106 | uid_t uid; | 107 | uid_t uid; |
107 | gid_t gid; | 108 | gid_t gid; |
108 | mode_t mode; | 109 | mode_t mode; |
109 | char *ctx; | 110 | u32 osid; |
110 | }; | 111 | }; |
111 | 112 | ||
112 | struct audit_aux_data_socketcall { | 113 | struct audit_aux_data_socketcall { |
@@ -167,7 +168,8 @@ static int audit_filter_rules(struct task_struct *tsk, | |||
167 | struct audit_context *ctx, | 168 | struct audit_context *ctx, |
168 | enum audit_state *state) | 169 | enum audit_state *state) |
169 | { | 170 | { |
170 | int i, j; | 171 | int i, j, need_sid = 1; |
172 | u32 sid; | ||
171 | 173 | ||
172 | for (i = 0; i < rule->field_count; i++) { | 174 | for (i = 0; i < rule->field_count; i++) { |
173 | struct audit_field *f = &rule->fields[i]; | 175 | struct audit_field *f = &rule->fields[i]; |
@@ -257,6 +259,27 @@ static int audit_filter_rules(struct task_struct *tsk, | |||
257 | if (ctx) | 259 | if (ctx) |
258 | result = audit_comparator(ctx->loginuid, f->op, f->val); | 260 | result = audit_comparator(ctx->loginuid, f->op, f->val); |
259 | break; | 261 | break; |
262 | case AUDIT_SE_USER: | ||
263 | case AUDIT_SE_ROLE: | ||
264 | case AUDIT_SE_TYPE: | ||
265 | case AUDIT_SE_SEN: | ||
266 | case AUDIT_SE_CLR: | ||
267 | /* NOTE: this may return negative values indicating | ||
268 | a temporary error. We simply treat this as a | ||
269 | match for now to avoid losing information that | ||
270 | may be wanted. An error message will also be | ||
271 | logged upon error */ | ||
272 | if (f->se_rule) { | ||
273 | if (need_sid) { | ||
274 | selinux_task_ctxid(tsk, &sid); | ||
275 | need_sid = 0; | ||
276 | } | ||
277 | result = selinux_audit_rule_match(sid, f->type, | ||
278 | f->op, | ||
279 | f->se_rule, | ||
280 | ctx); | ||
281 | } | ||
282 | break; | ||
260 | case AUDIT_ARG0: | 283 | case AUDIT_ARG0: |
261 | case AUDIT_ARG1: | 284 | case AUDIT_ARG1: |
262 | case AUDIT_ARG2: | 285 | case AUDIT_ARG2: |
@@ -329,7 +352,6 @@ static enum audit_state audit_filter_syscall(struct task_struct *tsk, | |||
329 | return AUDIT_BUILD_CONTEXT; | 352 | return AUDIT_BUILD_CONTEXT; |
330 | } | 353 | } |
331 | 354 | ||
332 | /* This should be called with task_lock() held. */ | ||
333 | static inline struct audit_context *audit_get_context(struct task_struct *tsk, | 355 | static inline struct audit_context *audit_get_context(struct task_struct *tsk, |
334 | int return_valid, | 356 | int return_valid, |
335 | int return_code) | 357 | int return_code) |
@@ -391,9 +413,6 @@ static inline void audit_free_names(struct audit_context *context) | |||
391 | #endif | 413 | #endif |
392 | 414 | ||
393 | for (i = 0; i < context->name_count; i++) { | 415 | for (i = 0; i < context->name_count; i++) { |
394 | char *p = context->names[i].ctx; | ||
395 | context->names[i].ctx = NULL; | ||
396 | kfree(p); | ||
397 | if (context->names[i].name) | 416 | if (context->names[i].name) |
398 | __putname(context->names[i].name); | 417 | __putname(context->names[i].name); |
399 | } | 418 | } |
@@ -416,11 +435,6 @@ static inline void audit_free_aux(struct audit_context *context) | |||
416 | dput(axi->dentry); | 435 | dput(axi->dentry); |
417 | mntput(axi->mnt); | 436 | mntput(axi->mnt); |
418 | } | 437 | } |
419 | if ( aux->type == AUDIT_IPC ) { | ||
420 | struct audit_aux_data_ipcctl *axi = (void *)aux; | ||
421 | if (axi->ctx) | ||
422 | kfree(axi->ctx); | ||
423 | } | ||
424 | 438 | ||
425 | context->aux = aux->next; | 439 | context->aux = aux->next; |
426 | kfree(aux); | 440 | kfree(aux); |
@@ -506,7 +520,7 @@ static inline void audit_free_context(struct audit_context *context) | |||
506 | printk(KERN_ERR "audit: freed %d contexts\n", count); | 520 | printk(KERN_ERR "audit: freed %d contexts\n", count); |
507 | } | 521 | } |
508 | 522 | ||
509 | static void audit_log_task_context(struct audit_buffer *ab, gfp_t gfp_mask) | 523 | static void audit_log_task_context(struct audit_buffer *ab) |
510 | { | 524 | { |
511 | char *ctx = NULL; | 525 | char *ctx = NULL; |
512 | ssize_t len = 0; | 526 | ssize_t len = 0; |
@@ -518,7 +532,7 @@ static void audit_log_task_context(struct audit_buffer *ab, gfp_t gfp_mask) | |||
518 | return; | 532 | return; |
519 | } | 533 | } |
520 | 534 | ||
521 | ctx = kmalloc(len, gfp_mask); | 535 | ctx = kmalloc(len, GFP_KERNEL); |
522 | if (!ctx) | 536 | if (!ctx) |
523 | goto error_path; | 537 | goto error_path; |
524 | 538 | ||
@@ -536,47 +550,46 @@ error_path: | |||
536 | return; | 550 | return; |
537 | } | 551 | } |
538 | 552 | ||
539 | static void audit_log_task_info(struct audit_buffer *ab, gfp_t gfp_mask) | 553 | static void audit_log_task_info(struct audit_buffer *ab, struct task_struct *tsk) |
540 | { | 554 | { |
541 | char name[sizeof(current->comm)]; | 555 | char name[sizeof(tsk->comm)]; |
542 | struct mm_struct *mm = current->mm; | 556 | struct mm_struct *mm = tsk->mm; |
543 | struct vm_area_struct *vma; | 557 | struct vm_area_struct *vma; |
544 | 558 | ||
545 | get_task_comm(name, current); | 559 | /* tsk == current */ |
560 | |||
561 | get_task_comm(name, tsk); | ||
546 | audit_log_format(ab, " comm="); | 562 | audit_log_format(ab, " comm="); |
547 | audit_log_untrustedstring(ab, name); | 563 | audit_log_untrustedstring(ab, name); |
548 | 564 | ||
549 | if (!mm) | 565 | if (mm) { |
550 | return; | 566 | down_read(&mm->mmap_sem); |
551 | 567 | vma = mm->mmap; | |
552 | /* | 568 | while (vma) { |
553 | * this is brittle; all callers that pass GFP_ATOMIC will have | 569 | if ((vma->vm_flags & VM_EXECUTABLE) && |
554 | * NULL current->mm and we won't get here. | 570 | vma->vm_file) { |
555 | */ | 571 | audit_log_d_path(ab, "exe=", |
556 | down_read(&mm->mmap_sem); | 572 | vma->vm_file->f_dentry, |
557 | vma = mm->mmap; | 573 | vma->vm_file->f_vfsmnt); |
558 | while (vma) { | 574 | break; |
559 | if ((vma->vm_flags & VM_EXECUTABLE) && | 575 | } |
560 | vma->vm_file) { | 576 | vma = vma->vm_next; |
561 | audit_log_d_path(ab, "exe=", | ||
562 | vma->vm_file->f_dentry, | ||
563 | vma->vm_file->f_vfsmnt); | ||
564 | break; | ||
565 | } | 577 | } |
566 | vma = vma->vm_next; | 578 | up_read(&mm->mmap_sem); |
567 | } | 579 | } |
568 | up_read(&mm->mmap_sem); | 580 | audit_log_task_context(ab); |
569 | audit_log_task_context(ab, gfp_mask); | ||
570 | } | 581 | } |
571 | 582 | ||
572 | static void audit_log_exit(struct audit_context *context, gfp_t gfp_mask) | 583 | static void audit_log_exit(struct audit_context *context, struct task_struct *tsk) |
573 | { | 584 | { |
574 | int i; | 585 | int i, call_panic = 0; |
575 | struct audit_buffer *ab; | 586 | struct audit_buffer *ab; |
576 | struct audit_aux_data *aux; | 587 | struct audit_aux_data *aux; |
577 | const char *tty; | 588 | const char *tty; |
578 | 589 | ||
579 | ab = audit_log_start(context, gfp_mask, AUDIT_SYSCALL); | 590 | /* tsk == current */ |
591 | |||
592 | ab = audit_log_start(context, GFP_KERNEL, AUDIT_SYSCALL); | ||
580 | if (!ab) | 593 | if (!ab) |
581 | return; /* audit_panic has been called */ | 594 | return; /* audit_panic has been called */ |
582 | audit_log_format(ab, "arch=%x syscall=%d", | 595 | audit_log_format(ab, "arch=%x syscall=%d", |
@@ -587,8 +600,8 @@ static void audit_log_exit(struct audit_context *context, gfp_t gfp_mask) | |||
587 | audit_log_format(ab, " success=%s exit=%ld", | 600 | audit_log_format(ab, " success=%s exit=%ld", |
588 | (context->return_valid==AUDITSC_SUCCESS)?"yes":"no", | 601 | (context->return_valid==AUDITSC_SUCCESS)?"yes":"no", |
589 | context->return_code); | 602 | context->return_code); |
590 | if (current->signal->tty && current->signal->tty->name) | 603 | if (tsk->signal && tsk->signal->tty && tsk->signal->tty->name) |
591 | tty = current->signal->tty->name; | 604 | tty = tsk->signal->tty->name; |
592 | else | 605 | else |
593 | tty = "(none)"; | 606 | tty = "(none)"; |
594 | audit_log_format(ab, | 607 | audit_log_format(ab, |
@@ -607,12 +620,12 @@ static void audit_log_exit(struct audit_context *context, gfp_t gfp_mask) | |||
607 | context->gid, | 620 | context->gid, |
608 | context->euid, context->suid, context->fsuid, | 621 | context->euid, context->suid, context->fsuid, |
609 | context->egid, context->sgid, context->fsgid, tty); | 622 | context->egid, context->sgid, context->fsgid, tty); |
610 | audit_log_task_info(ab, gfp_mask); | 623 | audit_log_task_info(ab, tsk); |
611 | audit_log_end(ab); | 624 | audit_log_end(ab); |
612 | 625 | ||
613 | for (aux = context->aux; aux; aux = aux->next) { | 626 | for (aux = context->aux; aux; aux = aux->next) { |
614 | 627 | ||
615 | ab = audit_log_start(context, gfp_mask, aux->type); | 628 | ab = audit_log_start(context, GFP_KERNEL, aux->type); |
616 | if (!ab) | 629 | if (!ab) |
617 | continue; /* audit_panic has been called */ | 630 | continue; /* audit_panic has been called */ |
618 | 631 | ||
@@ -620,8 +633,39 @@ static void audit_log_exit(struct audit_context *context, gfp_t gfp_mask) | |||
620 | case AUDIT_IPC: { | 633 | case AUDIT_IPC: { |
621 | struct audit_aux_data_ipcctl *axi = (void *)aux; | 634 | struct audit_aux_data_ipcctl *axi = (void *)aux; |
622 | audit_log_format(ab, | 635 | audit_log_format(ab, |
623 | " qbytes=%lx iuid=%u igid=%u mode=%x obj=%s", | 636 | " qbytes=%lx iuid=%u igid=%u mode=%x", |
624 | axi->qbytes, axi->uid, axi->gid, axi->mode, axi->ctx); | 637 | axi->qbytes, axi->uid, axi->gid, axi->mode); |
638 | if (axi->osid != 0) { | ||
639 | char *ctx = NULL; | ||
640 | u32 len; | ||
641 | if (selinux_ctxid_to_string( | ||
642 | axi->osid, &ctx, &len)) { | ||
643 | audit_log_format(ab, " osid=%u", | ||
644 | axi->osid); | ||
645 | call_panic = 1; | ||
646 | } else | ||
647 | audit_log_format(ab, " obj=%s", ctx); | ||
648 | kfree(ctx); | ||
649 | } | ||
650 | break; } | ||
651 | |||
652 | case AUDIT_IPC_SET_PERM: { | ||
653 | struct audit_aux_data_ipcctl *axi = (void *)aux; | ||
654 | audit_log_format(ab, | ||
655 | " new qbytes=%lx new iuid=%u new igid=%u new mode=%x", | ||
656 | axi->qbytes, axi->uid, axi->gid, axi->mode); | ||
657 | if (axi->osid != 0) { | ||
658 | char *ctx = NULL; | ||
659 | u32 len; | ||
660 | if (selinux_ctxid_to_string( | ||
661 | axi->osid, &ctx, &len)) { | ||
662 | audit_log_format(ab, " osid=%u", | ||
663 | axi->osid); | ||
664 | call_panic = 1; | ||
665 | } else | ||
666 | audit_log_format(ab, " obj=%s", ctx); | ||
667 | kfree(ctx); | ||
668 | } | ||
625 | break; } | 669 | break; } |
626 | 670 | ||
627 | case AUDIT_SOCKETCALL: { | 671 | case AUDIT_SOCKETCALL: { |
@@ -649,7 +693,7 @@ static void audit_log_exit(struct audit_context *context, gfp_t gfp_mask) | |||
649 | } | 693 | } |
650 | 694 | ||
651 | if (context->pwd && context->pwdmnt) { | 695 | if (context->pwd && context->pwdmnt) { |
652 | ab = audit_log_start(context, gfp_mask, AUDIT_CWD); | 696 | ab = audit_log_start(context, GFP_KERNEL, AUDIT_CWD); |
653 | if (ab) { | 697 | if (ab) { |
654 | audit_log_d_path(ab, "cwd=", context->pwd, context->pwdmnt); | 698 | audit_log_d_path(ab, "cwd=", context->pwd, context->pwdmnt); |
655 | audit_log_end(ab); | 699 | audit_log_end(ab); |
@@ -659,7 +703,7 @@ static void audit_log_exit(struct audit_context *context, gfp_t gfp_mask) | |||
659 | unsigned long ino = context->names[i].ino; | 703 | unsigned long ino = context->names[i].ino; |
660 | unsigned long pino = context->names[i].pino; | 704 | unsigned long pino = context->names[i].pino; |
661 | 705 | ||
662 | ab = audit_log_start(context, gfp_mask, AUDIT_PATH); | 706 | ab = audit_log_start(context, GFP_KERNEL, AUDIT_PATH); |
663 | if (!ab) | 707 | if (!ab) |
664 | continue; /* audit_panic has been called */ | 708 | continue; /* audit_panic has been called */ |
665 | 709 | ||
@@ -685,32 +729,35 @@ static void audit_log_exit(struct audit_context *context, gfp_t gfp_mask) | |||
685 | context->names[i].gid, | 729 | context->names[i].gid, |
686 | MAJOR(context->names[i].rdev), | 730 | MAJOR(context->names[i].rdev), |
687 | MINOR(context->names[i].rdev)); | 731 | MINOR(context->names[i].rdev)); |
688 | if (context->names[i].ctx) { | 732 | if (context->names[i].osid != 0) { |
689 | audit_log_format(ab, " obj=%s", | 733 | char *ctx = NULL; |
690 | context->names[i].ctx); | 734 | u32 len; |
735 | if (selinux_ctxid_to_string( | ||
736 | context->names[i].osid, &ctx, &len)) { | ||
737 | audit_log_format(ab, " osid=%u", | ||
738 | context->names[i].osid); | ||
739 | call_panic = 2; | ||
740 | } else | ||
741 | audit_log_format(ab, " obj=%s", ctx); | ||
742 | kfree(ctx); | ||
691 | } | 743 | } |
692 | 744 | ||
693 | audit_log_end(ab); | 745 | audit_log_end(ab); |
694 | } | 746 | } |
747 | if (call_panic) | ||
748 | audit_panic("error converting sid to string"); | ||
695 | } | 749 | } |
696 | 750 | ||
697 | /** | 751 | /** |
698 | * audit_free - free a per-task audit context | 752 | * audit_free - free a per-task audit context |
699 | * @tsk: task whose audit context block to free | 753 | * @tsk: task whose audit context block to free |
700 | * | 754 | * |
701 | * Called from copy_process and __put_task_struct. | 755 | * Called from copy_process and do_exit |
702 | */ | 756 | */ |
703 | void audit_free(struct task_struct *tsk) | 757 | void audit_free(struct task_struct *tsk) |
704 | { | 758 | { |
705 | struct audit_context *context; | 759 | struct audit_context *context; |
706 | 760 | ||
707 | /* | ||
708 | * No need to lock the task - when we execute audit_free() | ||
709 | * then the task has no external references anymore, and | ||
710 | * we are tearing it down. (The locking also confuses | ||
711 | * DEBUG_LOCKDEP - this freeing may occur in softirq | ||
712 | * contexts as well, via RCU.) | ||
713 | */ | ||
714 | context = audit_get_context(tsk, 0, 0); | 761 | context = audit_get_context(tsk, 0, 0); |
715 | if (likely(!context)) | 762 | if (likely(!context)) |
716 | return; | 763 | return; |
@@ -719,8 +766,9 @@ void audit_free(struct task_struct *tsk) | |||
719 | * function (e.g., exit_group), then free context block. | 766 | * function (e.g., exit_group), then free context block. |
720 | * We use GFP_ATOMIC here because we might be doing this | 767 | * We use GFP_ATOMIC here because we might be doing this |
721 | * in the context of the idle thread */ | 768 | * in the context of the idle thread */ |
769 | /* that can happen only if we are called from do_exit() */ | ||
722 | if (context->in_syscall && context->auditable) | 770 | if (context->in_syscall && context->auditable) |
723 | audit_log_exit(context, GFP_ATOMIC); | 771 | audit_log_exit(context, tsk); |
724 | 772 | ||
725 | audit_free_context(context); | 773 | audit_free_context(context); |
726 | } | 774 | } |
@@ -743,10 +791,11 @@ void audit_free(struct task_struct *tsk) | |||
743 | * will only be written if another part of the kernel requests that it | 791 | * will only be written if another part of the kernel requests that it |
744 | * be written). | 792 | * be written). |
745 | */ | 793 | */ |
746 | void audit_syscall_entry(struct task_struct *tsk, int arch, int major, | 794 | void audit_syscall_entry(int arch, int major, |
747 | unsigned long a1, unsigned long a2, | 795 | unsigned long a1, unsigned long a2, |
748 | unsigned long a3, unsigned long a4) | 796 | unsigned long a3, unsigned long a4) |
749 | { | 797 | { |
798 | struct task_struct *tsk = current; | ||
750 | struct audit_context *context = tsk->audit_context; | 799 | struct audit_context *context = tsk->audit_context; |
751 | enum audit_state state; | 800 | enum audit_state state; |
752 | 801 | ||
@@ -824,22 +873,18 @@ void audit_syscall_entry(struct task_struct *tsk, int arch, int major, | |||
824 | * message), then write out the syscall information. In call cases, | 873 | * message), then write out the syscall information. In call cases, |
825 | * free the names stored from getname(). | 874 | * free the names stored from getname(). |
826 | */ | 875 | */ |
827 | void audit_syscall_exit(struct task_struct *tsk, int valid, long return_code) | 876 | void audit_syscall_exit(int valid, long return_code) |
828 | { | 877 | { |
878 | struct task_struct *tsk = current; | ||
829 | struct audit_context *context; | 879 | struct audit_context *context; |
830 | 880 | ||
831 | get_task_struct(tsk); | ||
832 | task_lock(tsk); | ||
833 | context = audit_get_context(tsk, valid, return_code); | 881 | context = audit_get_context(tsk, valid, return_code); |
834 | task_unlock(tsk); | ||
835 | 882 | ||
836 | /* Not having a context here is ok, since the parent may have | ||
837 | * called __put_task_struct. */ | ||
838 | if (likely(!context)) | 883 | if (likely(!context)) |
839 | goto out; | 884 | return; |
840 | 885 | ||
841 | if (context->in_syscall && context->auditable) | 886 | if (context->in_syscall && context->auditable) |
842 | audit_log_exit(context, GFP_KERNEL); | 887 | audit_log_exit(context, tsk); |
843 | 888 | ||
844 | context->in_syscall = 0; | 889 | context->in_syscall = 0; |
845 | context->auditable = 0; | 890 | context->auditable = 0; |
@@ -854,8 +899,6 @@ void audit_syscall_exit(struct task_struct *tsk, int valid, long return_code) | |||
854 | audit_free_aux(context); | 899 | audit_free_aux(context); |
855 | tsk->audit_context = context; | 900 | tsk->audit_context = context; |
856 | } | 901 | } |
857 | out: | ||
858 | put_task_struct(tsk); | ||
859 | } | 902 | } |
860 | 903 | ||
861 | /** | 904 | /** |
@@ -936,40 +979,11 @@ void audit_putname(const char *name) | |||
936 | #endif | 979 | #endif |
937 | } | 980 | } |
938 | 981 | ||
939 | void audit_inode_context(int idx, const struct inode *inode) | 982 | static void audit_inode_context(int idx, const struct inode *inode) |
940 | { | 983 | { |
941 | struct audit_context *context = current->audit_context; | 984 | struct audit_context *context = current->audit_context; |
942 | const char *suffix = security_inode_xattr_getsuffix(); | ||
943 | char *ctx = NULL; | ||
944 | int len = 0; | ||
945 | |||
946 | if (!suffix) | ||
947 | goto ret; | ||
948 | |||
949 | len = security_inode_getsecurity(inode, suffix, NULL, 0, 0); | ||
950 | if (len == -EOPNOTSUPP) | ||
951 | goto ret; | ||
952 | if (len < 0) | ||
953 | goto error_path; | ||
954 | |||
955 | ctx = kmalloc(len, GFP_KERNEL); | ||
956 | if (!ctx) | ||
957 | goto error_path; | ||
958 | 985 | ||
959 | len = security_inode_getsecurity(inode, suffix, ctx, len, 0); | 986 | selinux_get_inode_sid(inode, &context->names[idx].osid); |
960 | if (len < 0) | ||
961 | goto error_path; | ||
962 | |||
963 | kfree(context->names[idx].ctx); | ||
964 | context->names[idx].ctx = ctx; | ||
965 | goto ret; | ||
966 | |||
967 | error_path: | ||
968 | if (ctx) | ||
969 | kfree(ctx); | ||
970 | audit_panic("error in audit_inode_context"); | ||
971 | ret: | ||
972 | return; | ||
973 | } | 987 | } |
974 | 988 | ||
975 | 989 | ||
@@ -1155,40 +1169,37 @@ uid_t audit_get_loginuid(struct audit_context *ctx) | |||
1155 | return ctx ? ctx->loginuid : -1; | 1169 | return ctx ? ctx->loginuid : -1; |
1156 | } | 1170 | } |
1157 | 1171 | ||
1158 | static char *audit_ipc_context(struct kern_ipc_perm *ipcp) | 1172 | /** |
1173 | * audit_ipc_obj - record audit data for ipc object | ||
1174 | * @ipcp: ipc permissions | ||
1175 | * | ||
1176 | * Returns 0 for success or NULL context or < 0 on error. | ||
1177 | */ | ||
1178 | int audit_ipc_obj(struct kern_ipc_perm *ipcp) | ||
1159 | { | 1179 | { |
1180 | struct audit_aux_data_ipcctl *ax; | ||
1160 | struct audit_context *context = current->audit_context; | 1181 | struct audit_context *context = current->audit_context; |
1161 | char *ctx = NULL; | ||
1162 | int len = 0; | ||
1163 | 1182 | ||
1164 | if (likely(!context)) | 1183 | if (likely(!context)) |
1165 | return NULL; | 1184 | return 0; |
1166 | |||
1167 | len = security_ipc_getsecurity(ipcp, NULL, 0); | ||
1168 | if (len == -EOPNOTSUPP) | ||
1169 | goto ret; | ||
1170 | if (len < 0) | ||
1171 | goto error_path; | ||
1172 | |||
1173 | ctx = kmalloc(len, GFP_ATOMIC); | ||
1174 | if (!ctx) | ||
1175 | goto error_path; | ||
1176 | 1185 | ||
1177 | len = security_ipc_getsecurity(ipcp, ctx, len); | 1186 | ax = kmalloc(sizeof(*ax), GFP_ATOMIC); |
1178 | if (len < 0) | 1187 | if (!ax) |
1179 | goto error_path; | 1188 | return -ENOMEM; |
1180 | 1189 | ||
1181 | return ctx; | 1190 | ax->uid = ipcp->uid; |
1191 | ax->gid = ipcp->gid; | ||
1192 | ax->mode = ipcp->mode; | ||
1193 | selinux_get_ipc_sid(ipcp, &ax->osid); | ||
1182 | 1194 | ||
1183 | error_path: | 1195 | ax->d.type = AUDIT_IPC; |
1184 | kfree(ctx); | 1196 | ax->d.next = context->aux; |
1185 | audit_panic("error in audit_ipc_context"); | 1197 | context->aux = (void *)ax; |
1186 | ret: | 1198 | return 0; |
1187 | return NULL; | ||
1188 | } | 1199 | } |
1189 | 1200 | ||
1190 | /** | 1201 | /** |
1191 | * audit_ipc_perms - record audit data for ipc | 1202 | * audit_ipc_set_perm - record audit data for new ipc permissions |
1192 | * @qbytes: msgq bytes | 1203 | * @qbytes: msgq bytes |
1193 | * @uid: msgq user id | 1204 | * @uid: msgq user id |
1194 | * @gid: msgq group id | 1205 | * @gid: msgq group id |
@@ -1196,7 +1207,7 @@ ret: | |||
1196 | * | 1207 | * |
1197 | * Returns 0 for success or NULL context or < 0 on error. | 1208 | * Returns 0 for success or NULL context or < 0 on error. |
1198 | */ | 1209 | */ |
1199 | int audit_ipc_perms(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode, struct kern_ipc_perm *ipcp) | 1210 | int audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode, struct kern_ipc_perm *ipcp) |
1200 | { | 1211 | { |
1201 | struct audit_aux_data_ipcctl *ax; | 1212 | struct audit_aux_data_ipcctl *ax; |
1202 | struct audit_context *context = current->audit_context; | 1213 | struct audit_context *context = current->audit_context; |
@@ -1212,9 +1223,9 @@ int audit_ipc_perms(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode, str | |||
1212 | ax->uid = uid; | 1223 | ax->uid = uid; |
1213 | ax->gid = gid; | 1224 | ax->gid = gid; |
1214 | ax->mode = mode; | 1225 | ax->mode = mode; |
1215 | ax->ctx = audit_ipc_context(ipcp); | 1226 | selinux_get_ipc_sid(ipcp, &ax->osid); |
1216 | 1227 | ||
1217 | ax->d.type = AUDIT_IPC; | 1228 | ax->d.type = AUDIT_IPC_SET_PERM; |
1218 | ax->d.next = context->aux; | 1229 | ax->d.next = context->aux; |
1219 | context->aux = (void *)ax; | 1230 | context->aux = (void *)ax; |
1220 | return 0; | 1231 | return 0; |
diff --git a/kernel/compat.c b/kernel/compat.c index b9bdd1271f..c1601a84f8 100644 --- a/kernel/compat.c +++ b/kernel/compat.c | |||
@@ -17,7 +17,6 @@ | |||
17 | #include <linux/time.h> | 17 | #include <linux/time.h> |
18 | #include <linux/signal.h> | 18 | #include <linux/signal.h> |
19 | #include <linux/sched.h> /* for MAX_SCHEDULE_TIMEOUT */ | 19 | #include <linux/sched.h> /* for MAX_SCHEDULE_TIMEOUT */ |
20 | #include <linux/futex.h> /* for FUTEX_WAIT */ | ||
21 | #include <linux/syscalls.h> | 20 | #include <linux/syscalls.h> |
22 | #include <linux/unistd.h> | 21 | #include <linux/unistd.h> |
23 | #include <linux/security.h> | 22 | #include <linux/security.h> |
@@ -239,28 +238,6 @@ asmlinkage long compat_sys_sigprocmask(int how, compat_old_sigset_t __user *set, | |||
239 | return ret; | 238 | return ret; |
240 | } | 239 | } |
241 | 240 | ||
242 | #ifdef CONFIG_FUTEX | ||
243 | asmlinkage long compat_sys_futex(u32 __user *uaddr, int op, int val, | ||
244 | struct compat_timespec __user *utime, u32 __user *uaddr2, | ||
245 | int val3) | ||
246 | { | ||
247 | struct timespec t; | ||
248 | unsigned long timeout = MAX_SCHEDULE_TIMEOUT; | ||
249 | int val2 = 0; | ||
250 | |||
251 | if ((op == FUTEX_WAIT) && utime) { | ||
252 | if (get_compat_timespec(&t, utime)) | ||
253 | return -EFAULT; | ||
254 | timeout = timespec_to_jiffies(&t) + 1; | ||
255 | } | ||
256 | if (op >= FUTEX_REQUEUE) | ||
257 | val2 = (int) (unsigned long) utime; | ||
258 | |||
259 | return do_futex((unsigned long)uaddr, op, val, timeout, | ||
260 | (unsigned long)uaddr2, val2, val3); | ||
261 | } | ||
262 | #endif | ||
263 | |||
264 | asmlinkage long compat_sys_setrlimit(unsigned int resource, | 241 | asmlinkage long compat_sys_setrlimit(unsigned int resource, |
265 | struct compat_rlimit __user *rlim) | 242 | struct compat_rlimit __user *rlim) |
266 | { | 243 | { |
diff --git a/kernel/cpu.c b/kernel/cpu.c index 8be22bd809..fe2b8d0bfe 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c | |||
@@ -18,7 +18,7 @@ | |||
18 | /* This protects CPUs going up and down... */ | 18 | /* This protects CPUs going up and down... */ |
19 | static DECLARE_MUTEX(cpucontrol); | 19 | static DECLARE_MUTEX(cpucontrol); |
20 | 20 | ||
21 | static struct notifier_block *cpu_chain; | 21 | static BLOCKING_NOTIFIER_HEAD(cpu_chain); |
22 | 22 | ||
23 | #ifdef CONFIG_HOTPLUG_CPU | 23 | #ifdef CONFIG_HOTPLUG_CPU |
24 | static struct task_struct *lock_cpu_hotplug_owner; | 24 | static struct task_struct *lock_cpu_hotplug_owner; |
@@ -71,21 +71,13 @@ EXPORT_SYMBOL_GPL(lock_cpu_hotplug_interruptible); | |||
71 | /* Need to know about CPUs going up/down? */ | 71 | /* Need to know about CPUs going up/down? */ |
72 | int register_cpu_notifier(struct notifier_block *nb) | 72 | int register_cpu_notifier(struct notifier_block *nb) |
73 | { | 73 | { |
74 | int ret; | 74 | return blocking_notifier_chain_register(&cpu_chain, nb); |
75 | |||
76 | if ((ret = lock_cpu_hotplug_interruptible()) != 0) | ||
77 | return ret; | ||
78 | ret = notifier_chain_register(&cpu_chain, nb); | ||
79 | unlock_cpu_hotplug(); | ||
80 | return ret; | ||
81 | } | 75 | } |
82 | EXPORT_SYMBOL(register_cpu_notifier); | 76 | EXPORT_SYMBOL(register_cpu_notifier); |
83 | 77 | ||
84 | void unregister_cpu_notifier(struct notifier_block *nb) | 78 | void unregister_cpu_notifier(struct notifier_block *nb) |
85 | { | 79 | { |
86 | lock_cpu_hotplug(); | 80 | blocking_notifier_chain_unregister(&cpu_chain, nb); |
87 | notifier_chain_unregister(&cpu_chain, nb); | ||
88 | unlock_cpu_hotplug(); | ||
89 | } | 81 | } |
90 | EXPORT_SYMBOL(unregister_cpu_notifier); | 82 | EXPORT_SYMBOL(unregister_cpu_notifier); |
91 | 83 | ||
@@ -141,7 +133,7 @@ int cpu_down(unsigned int cpu) | |||
141 | goto out; | 133 | goto out; |
142 | } | 134 | } |
143 | 135 | ||
144 | err = notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE, | 136 | err = blocking_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE, |
145 | (void *)(long)cpu); | 137 | (void *)(long)cpu); |
146 | if (err == NOTIFY_BAD) { | 138 | if (err == NOTIFY_BAD) { |
147 | printk("%s: attempt to take down CPU %u failed\n", | 139 | printk("%s: attempt to take down CPU %u failed\n", |
@@ -159,7 +151,7 @@ int cpu_down(unsigned int cpu) | |||
159 | p = __stop_machine_run(take_cpu_down, NULL, cpu); | 151 | p = __stop_machine_run(take_cpu_down, NULL, cpu); |
160 | if (IS_ERR(p)) { | 152 | if (IS_ERR(p)) { |
161 | /* CPU didn't die: tell everyone. Can't complain. */ | 153 | /* CPU didn't die: tell everyone. Can't complain. */ |
162 | if (notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED, | 154 | if (blocking_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED, |
163 | (void *)(long)cpu) == NOTIFY_BAD) | 155 | (void *)(long)cpu) == NOTIFY_BAD) |
164 | BUG(); | 156 | BUG(); |
165 | 157 | ||
@@ -182,8 +174,8 @@ int cpu_down(unsigned int cpu) | |||
182 | put_cpu(); | 174 | put_cpu(); |
183 | 175 | ||
184 | /* CPU is completely dead: tell everyone. Too late to complain. */ | 176 | /* CPU is completely dead: tell everyone. Too late to complain. */ |
185 | if (notifier_call_chain(&cpu_chain, CPU_DEAD, (void *)(long)cpu) | 177 | if (blocking_notifier_call_chain(&cpu_chain, CPU_DEAD, |
186 | == NOTIFY_BAD) | 178 | (void *)(long)cpu) == NOTIFY_BAD) |
187 | BUG(); | 179 | BUG(); |
188 | 180 | ||
189 | check_for_tasks(cpu); | 181 | check_for_tasks(cpu); |
@@ -211,7 +203,7 @@ int __devinit cpu_up(unsigned int cpu) | |||
211 | goto out; | 203 | goto out; |
212 | } | 204 | } |
213 | 205 | ||
214 | ret = notifier_call_chain(&cpu_chain, CPU_UP_PREPARE, hcpu); | 206 | ret = blocking_notifier_call_chain(&cpu_chain, CPU_UP_PREPARE, hcpu); |
215 | if (ret == NOTIFY_BAD) { | 207 | if (ret == NOTIFY_BAD) { |
216 | printk("%s: attempt to bring up CPU %u failed\n", | 208 | printk("%s: attempt to bring up CPU %u failed\n", |
217 | __FUNCTION__, cpu); | 209 | __FUNCTION__, cpu); |
@@ -226,11 +218,12 @@ int __devinit cpu_up(unsigned int cpu) | |||
226 | BUG_ON(!cpu_online(cpu)); | 218 | BUG_ON(!cpu_online(cpu)); |
227 | 219 | ||
228 | /* Now call notifier in preparation. */ | 220 | /* Now call notifier in preparation. */ |
229 | notifier_call_chain(&cpu_chain, CPU_ONLINE, hcpu); | 221 | blocking_notifier_call_chain(&cpu_chain, CPU_ONLINE, hcpu); |
230 | 222 | ||
231 | out_notify: | 223 | out_notify: |
232 | if (ret != 0) | 224 | if (ret != 0) |
233 | notifier_call_chain(&cpu_chain, CPU_UP_CANCELED, hcpu); | 225 | blocking_notifier_call_chain(&cpu_chain, |
226 | CPU_UP_CANCELED, hcpu); | ||
234 | out: | 227 | out: |
235 | unlock_cpu_hotplug(); | 228 | unlock_cpu_hotplug(); |
236 | return ret; | 229 | return ret; |
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 18aea1bd12..ab81fdd457 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -616,12 +616,10 @@ static void guarantee_online_mems(const struct cpuset *cs, nodemask_t *pmask) | |||
616 | * current->cpuset if a task has its memory placement changed. | 616 | * current->cpuset if a task has its memory placement changed. |
617 | * Do not call this routine if in_interrupt(). | 617 | * Do not call this routine if in_interrupt(). |
618 | * | 618 | * |
619 | * Call without callback_mutex or task_lock() held. May be called | 619 | * Call without callback_mutex or task_lock() held. May be |
620 | * with or without manage_mutex held. Doesn't need task_lock to guard | 620 | * called with or without manage_mutex held. Thanks in part to |
621 | * against another task changing a non-NULL cpuset pointer to NULL, | 621 | * 'the_top_cpuset_hack', the tasks cpuset pointer will never |
622 | * as that is only done by a task on itself, and if the current task | 622 | * be NULL. This routine also might acquire callback_mutex and |
623 | * is here, it is not simultaneously in the exit code NULL'ing its | ||
624 | * cpuset pointer. This routine also might acquire callback_mutex and | ||
625 | * current->mm->mmap_sem during call. | 623 | * current->mm->mmap_sem during call. |
626 | * | 624 | * |
627 | * Reading current->cpuset->mems_generation doesn't need task_lock | 625 | * Reading current->cpuset->mems_generation doesn't need task_lock |
@@ -836,6 +834,55 @@ static int update_cpumask(struct cpuset *cs, char *buf) | |||
836 | } | 834 | } |
837 | 835 | ||
838 | /* | 836 | /* |
837 | * cpuset_migrate_mm | ||
838 | * | ||
839 | * Migrate memory region from one set of nodes to another. | ||
840 | * | ||
841 | * Temporarilly set tasks mems_allowed to target nodes of migration, | ||
842 | * so that the migration code can allocate pages on these nodes. | ||
843 | * | ||
844 | * Call holding manage_mutex, so our current->cpuset won't change | ||
845 | * during this call, as manage_mutex holds off any attach_task() | ||
846 | * calls. Therefore we don't need to take task_lock around the | ||
847 | * call to guarantee_online_mems(), as we know no one is changing | ||
848 | * our tasks cpuset. | ||
849 | * | ||
850 | * Hold callback_mutex around the two modifications of our tasks | ||
851 | * mems_allowed to synchronize with cpuset_mems_allowed(). | ||
852 | * | ||
853 | * While the mm_struct we are migrating is typically from some | ||
854 | * other task, the task_struct mems_allowed that we are hacking | ||
855 | * is for our current task, which must allocate new pages for that | ||
856 | * migrating memory region. | ||
857 | * | ||
858 | * We call cpuset_update_task_memory_state() before hacking | ||
859 | * our tasks mems_allowed, so that we are assured of being in | ||
860 | * sync with our tasks cpuset, and in particular, callbacks to | ||
861 | * cpuset_update_task_memory_state() from nested page allocations | ||
862 | * won't see any mismatch of our cpuset and task mems_generation | ||
863 | * values, so won't overwrite our hacked tasks mems_allowed | ||
864 | * nodemask. | ||
865 | */ | ||
866 | |||
867 | static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from, | ||
868 | const nodemask_t *to) | ||
869 | { | ||
870 | struct task_struct *tsk = current; | ||
871 | |||
872 | cpuset_update_task_memory_state(); | ||
873 | |||
874 | mutex_lock(&callback_mutex); | ||
875 | tsk->mems_allowed = *to; | ||
876 | mutex_unlock(&callback_mutex); | ||
877 | |||
878 | do_migrate_pages(mm, from, to, MPOL_MF_MOVE_ALL); | ||
879 | |||
880 | mutex_lock(&callback_mutex); | ||
881 | guarantee_online_mems(tsk->cpuset, &tsk->mems_allowed); | ||
882 | mutex_unlock(&callback_mutex); | ||
883 | } | ||
884 | |||
885 | /* | ||
839 | * Handle user request to change the 'mems' memory placement | 886 | * Handle user request to change the 'mems' memory placement |
840 | * of a cpuset. Needs to validate the request, update the | 887 | * of a cpuset. Needs to validate the request, update the |
841 | * cpusets mems_allowed and mems_generation, and for each | 888 | * cpusets mems_allowed and mems_generation, and for each |
@@ -947,10 +994,8 @@ static int update_nodemask(struct cpuset *cs, char *buf) | |||
947 | struct mm_struct *mm = mmarray[i]; | 994 | struct mm_struct *mm = mmarray[i]; |
948 | 995 | ||
949 | mpol_rebind_mm(mm, &cs->mems_allowed); | 996 | mpol_rebind_mm(mm, &cs->mems_allowed); |
950 | if (migrate) { | 997 | if (migrate) |
951 | do_migrate_pages(mm, &oldmem, &cs->mems_allowed, | 998 | cpuset_migrate_mm(mm, &oldmem, &cs->mems_allowed); |
952 | MPOL_MF_MOVE_ALL); | ||
953 | } | ||
954 | mmput(mm); | 999 | mmput(mm); |
955 | } | 1000 | } |
956 | 1001 | ||
@@ -1185,11 +1230,11 @@ static int attach_task(struct cpuset *cs, char *pidbuf, char **ppathbuf) | |||
1185 | mm = get_task_mm(tsk); | 1230 | mm = get_task_mm(tsk); |
1186 | if (mm) { | 1231 | if (mm) { |
1187 | mpol_rebind_mm(mm, &to); | 1232 | mpol_rebind_mm(mm, &to); |
1233 | if (is_memory_migrate(cs)) | ||
1234 | cpuset_migrate_mm(mm, &from, &to); | ||
1188 | mmput(mm); | 1235 | mmput(mm); |
1189 | } | 1236 | } |
1190 | 1237 | ||
1191 | if (is_memory_migrate(cs)) | ||
1192 | do_migrate_pages(tsk->mm, &from, &to, MPOL_MF_MOVE_ALL); | ||
1193 | put_task_struct(tsk); | 1238 | put_task_struct(tsk); |
1194 | synchronize_rcu(); | 1239 | synchronize_rcu(); |
1195 | if (atomic_dec_and_test(&oldcs->count)) | 1240 | if (atomic_dec_and_test(&oldcs->count)) |
@@ -2186,19 +2231,25 @@ static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs) | |||
2186 | * So only GFP_KERNEL allocations, if all nodes in the cpuset are | 2231 | * So only GFP_KERNEL allocations, if all nodes in the cpuset are |
2187 | * short of memory, might require taking the callback_mutex mutex. | 2232 | * short of memory, might require taking the callback_mutex mutex. |
2188 | * | 2233 | * |
2189 | * The first loop over the zonelist in mm/page_alloc.c:__alloc_pages() | 2234 | * The first call here from mm/page_alloc:get_page_from_freelist() |
2190 | * calls here with __GFP_HARDWALL always set in gfp_mask, enforcing | 2235 | * has __GFP_HARDWALL set in gfp_mask, enforcing hardwall cpusets, so |
2191 | * hardwall cpusets - no allocation on a node outside the cpuset is | 2236 | * no allocation on a node outside the cpuset is allowed (unless in |
2192 | * allowed (unless in interrupt, of course). | 2237 | * interrupt, of course). |
2193 | * | 2238 | * |
2194 | * The second loop doesn't even call here for GFP_ATOMIC requests | 2239 | * The second pass through get_page_from_freelist() doesn't even call |
2195 | * (if the __alloc_pages() local variable 'wait' is set). That check | 2240 | * here for GFP_ATOMIC calls. For those calls, the __alloc_pages() |
2196 | * and the checks below have the combined affect in the second loop of | 2241 | * variable 'wait' is not set, and the bit ALLOC_CPUSET is not set |
2197 | * the __alloc_pages() routine that: | 2242 | * in alloc_flags. That logic and the checks below have the combined |
2243 | * affect that: | ||
2198 | * in_interrupt - any node ok (current task context irrelevant) | 2244 | * in_interrupt - any node ok (current task context irrelevant) |
2199 | * GFP_ATOMIC - any node ok | 2245 | * GFP_ATOMIC - any node ok |
2200 | * GFP_KERNEL - any node in enclosing mem_exclusive cpuset ok | 2246 | * GFP_KERNEL - any node in enclosing mem_exclusive cpuset ok |
2201 | * GFP_USER - only nodes in current tasks mems allowed ok. | 2247 | * GFP_USER - only nodes in current tasks mems allowed ok. |
2248 | * | ||
2249 | * Rule: | ||
2250 | * Don't call cpuset_zone_allowed() if you can't sleep, unless you | ||
2251 | * pass in the __GFP_HARDWALL flag set in gfp_flag, which disables | ||
2252 | * the code that might scan up ancestor cpusets and sleep. | ||
2202 | **/ | 2253 | **/ |
2203 | 2254 | ||
2204 | int __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask) | 2255 | int __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask) |
@@ -2210,6 +2261,7 @@ int __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask) | |||
2210 | if (in_interrupt()) | 2261 | if (in_interrupt()) |
2211 | return 1; | 2262 | return 1; |
2212 | node = z->zone_pgdat->node_id; | 2263 | node = z->zone_pgdat->node_id; |
2264 | might_sleep_if(!(gfp_mask & __GFP_HARDWALL)); | ||
2213 | if (node_isset(node, current->mems_allowed)) | 2265 | if (node_isset(node, current->mems_allowed)) |
2214 | return 1; | 2266 | return 1; |
2215 | if (gfp_mask & __GFP_HARDWALL) /* If hardwall request, stop here */ | 2267 | if (gfp_mask & __GFP_HARDWALL) /* If hardwall request, stop here */ |
diff --git a/kernel/exit.c b/kernel/exit.c index 8037405e13..e95b932822 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -29,8 +29,13 @@ | |||
29 | #include <linux/cpuset.h> | 29 | #include <linux/cpuset.h> |
30 | #include <linux/syscalls.h> | 30 | #include <linux/syscalls.h> |
31 | #include <linux/signal.h> | 31 | #include <linux/signal.h> |
32 | #include <linux/posix-timers.h> | ||
32 | #include <linux/cn_proc.h> | 33 | #include <linux/cn_proc.h> |
33 | #include <linux/mutex.h> | 34 | #include <linux/mutex.h> |
35 | #include <linux/futex.h> | ||
36 | #include <linux/compat.h> | ||
37 | #include <linux/pipe_fs_i.h> | ||
38 | #include <linux/audit.h> /* for audit_free() */ | ||
34 | 39 | ||
35 | #include <asm/uaccess.h> | 40 | #include <asm/uaccess.h> |
36 | #include <asm/unistd.h> | 41 | #include <asm/unistd.h> |
@@ -48,15 +53,85 @@ static void __unhash_process(struct task_struct *p) | |||
48 | { | 53 | { |
49 | nr_threads--; | 54 | nr_threads--; |
50 | detach_pid(p, PIDTYPE_PID); | 55 | detach_pid(p, PIDTYPE_PID); |
51 | detach_pid(p, PIDTYPE_TGID); | ||
52 | if (thread_group_leader(p)) { | 56 | if (thread_group_leader(p)) { |
53 | detach_pid(p, PIDTYPE_PGID); | 57 | detach_pid(p, PIDTYPE_PGID); |
54 | detach_pid(p, PIDTYPE_SID); | 58 | detach_pid(p, PIDTYPE_SID); |
55 | if (p->pid) | 59 | |
56 | __get_cpu_var(process_counts)--; | 60 | list_del_rcu(&p->tasks); |
61 | __get_cpu_var(process_counts)--; | ||
57 | } | 62 | } |
63 | list_del_rcu(&p->thread_group); | ||
64 | remove_parent(p); | ||
65 | } | ||
66 | |||
67 | /* | ||
68 | * This function expects the tasklist_lock write-locked. | ||
69 | */ | ||
70 | static void __exit_signal(struct task_struct *tsk) | ||
71 | { | ||
72 | struct signal_struct *sig = tsk->signal; | ||
73 | struct sighand_struct *sighand; | ||
74 | |||
75 | BUG_ON(!sig); | ||
76 | BUG_ON(!atomic_read(&sig->count)); | ||
77 | |||
78 | rcu_read_lock(); | ||
79 | sighand = rcu_dereference(tsk->sighand); | ||
80 | spin_lock(&sighand->siglock); | ||
58 | 81 | ||
59 | REMOVE_LINKS(p); | 82 | posix_cpu_timers_exit(tsk); |
83 | if (atomic_dec_and_test(&sig->count)) | ||
84 | posix_cpu_timers_exit_group(tsk); | ||
85 | else { | ||
86 | /* | ||
87 | * If there is any task waiting for the group exit | ||
88 | * then notify it: | ||
89 | */ | ||
90 | if (sig->group_exit_task && atomic_read(&sig->count) == sig->notify_count) { | ||
91 | wake_up_process(sig->group_exit_task); | ||
92 | sig->group_exit_task = NULL; | ||
93 | } | ||
94 | if (tsk == sig->curr_target) | ||
95 | sig->curr_target = next_thread(tsk); | ||
96 | /* | ||
97 | * Accumulate here the counters for all threads but the | ||
98 | * group leader as they die, so they can be added into | ||
99 | * the process-wide totals when those are taken. | ||
100 | * The group leader stays around as a zombie as long | ||
101 | * as there are other threads. When it gets reaped, | ||
102 | * the exit.c code will add its counts into these totals. | ||
103 | * We won't ever get here for the group leader, since it | ||
104 | * will have been the last reference on the signal_struct. | ||
105 | */ | ||
106 | sig->utime = cputime_add(sig->utime, tsk->utime); | ||
107 | sig->stime = cputime_add(sig->stime, tsk->stime); | ||
108 | sig->min_flt += tsk->min_flt; | ||
109 | sig->maj_flt += tsk->maj_flt; | ||
110 | sig->nvcsw += tsk->nvcsw; | ||
111 | sig->nivcsw += tsk->nivcsw; | ||
112 | sig->sched_time += tsk->sched_time; | ||
113 | sig = NULL; /* Marker for below. */ | ||
114 | } | ||
115 | |||
116 | __unhash_process(tsk); | ||
117 | |||
118 | tsk->signal = NULL; | ||
119 | tsk->sighand = NULL; | ||
120 | spin_unlock(&sighand->siglock); | ||
121 | rcu_read_unlock(); | ||
122 | |||
123 | __cleanup_sighand(sighand); | ||
124 | clear_tsk_thread_flag(tsk,TIF_SIGPENDING); | ||
125 | flush_sigqueue(&tsk->pending); | ||
126 | if (sig) { | ||
127 | flush_sigqueue(&sig->shared_pending); | ||
128 | __cleanup_signal(sig); | ||
129 | } | ||
130 | } | ||
131 | |||
132 | static void delayed_put_task_struct(struct rcu_head *rhp) | ||
133 | { | ||
134 | put_task_struct(container_of(rhp, struct task_struct, rcu)); | ||
60 | } | 135 | } |
61 | 136 | ||
62 | void release_task(struct task_struct * p) | 137 | void release_task(struct task_struct * p) |
@@ -65,21 +140,14 @@ void release_task(struct task_struct * p) | |||
65 | task_t *leader; | 140 | task_t *leader; |
66 | struct dentry *proc_dentry; | 141 | struct dentry *proc_dentry; |
67 | 142 | ||
68 | repeat: | 143 | repeat: |
69 | atomic_dec(&p->user->processes); | 144 | atomic_dec(&p->user->processes); |
70 | spin_lock(&p->proc_lock); | 145 | spin_lock(&p->proc_lock); |
71 | proc_dentry = proc_pid_unhash(p); | 146 | proc_dentry = proc_pid_unhash(p); |
72 | write_lock_irq(&tasklist_lock); | 147 | write_lock_irq(&tasklist_lock); |
73 | if (unlikely(p->ptrace)) | 148 | ptrace_unlink(p); |
74 | __ptrace_unlink(p); | ||
75 | BUG_ON(!list_empty(&p->ptrace_list) || !list_empty(&p->ptrace_children)); | 149 | BUG_ON(!list_empty(&p->ptrace_list) || !list_empty(&p->ptrace_children)); |
76 | __exit_signal(p); | 150 | __exit_signal(p); |
77 | /* | ||
78 | * Note that the fastpath in sys_times depends on __exit_signal having | ||
79 | * updated the counters before a task is removed from the tasklist of | ||
80 | * the process by __unhash_process. | ||
81 | */ | ||
82 | __unhash_process(p); | ||
83 | 151 | ||
84 | /* | 152 | /* |
85 | * If we are the last non-leader member of the thread | 153 | * If we are the last non-leader member of the thread |
@@ -107,28 +175,13 @@ repeat: | |||
107 | spin_unlock(&p->proc_lock); | 175 | spin_unlock(&p->proc_lock); |
108 | proc_pid_flush(proc_dentry); | 176 | proc_pid_flush(proc_dentry); |
109 | release_thread(p); | 177 | release_thread(p); |
110 | put_task_struct(p); | 178 | call_rcu(&p->rcu, delayed_put_task_struct); |
111 | 179 | ||
112 | p = leader; | 180 | p = leader; |
113 | if (unlikely(zap_leader)) | 181 | if (unlikely(zap_leader)) |
114 | goto repeat; | 182 | goto repeat; |
115 | } | 183 | } |
116 | 184 | ||
117 | /* we are using it only for SMP init */ | ||
118 | |||
119 | void unhash_process(struct task_struct *p) | ||
120 | { | ||
121 | struct dentry *proc_dentry; | ||
122 | |||
123 | spin_lock(&p->proc_lock); | ||
124 | proc_dentry = proc_pid_unhash(p); | ||
125 | write_lock_irq(&tasklist_lock); | ||
126 | __unhash_process(p); | ||
127 | write_unlock_irq(&tasklist_lock); | ||
128 | spin_unlock(&p->proc_lock); | ||
129 | proc_pid_flush(proc_dentry); | ||
130 | } | ||
131 | |||
132 | /* | 185 | /* |
133 | * This checks not only the pgrp, but falls back on the pid if no | 186 | * This checks not only the pgrp, but falls back on the pid if no |
134 | * satisfactory pgrp is found. I dunno - gdb doesn't work correctly | 187 | * satisfactory pgrp is found. I dunno - gdb doesn't work correctly |
@@ -236,10 +289,10 @@ static void reparent_to_init(void) | |||
236 | 289 | ||
237 | ptrace_unlink(current); | 290 | ptrace_unlink(current); |
238 | /* Reparent to init */ | 291 | /* Reparent to init */ |
239 | REMOVE_LINKS(current); | 292 | remove_parent(current); |
240 | current->parent = child_reaper; | 293 | current->parent = child_reaper; |
241 | current->real_parent = child_reaper; | 294 | current->real_parent = child_reaper; |
242 | SET_LINKS(current); | 295 | add_parent(current); |
243 | 296 | ||
244 | /* Set the exit signal to SIGCHLD so we signal init on exit */ | 297 | /* Set the exit signal to SIGCHLD so we signal init on exit */ |
245 | current->exit_signal = SIGCHLD; | 298 | current->exit_signal = SIGCHLD; |
@@ -536,13 +589,13 @@ static void exit_mm(struct task_struct * tsk) | |||
536 | mmput(mm); | 589 | mmput(mm); |
537 | } | 590 | } |
538 | 591 | ||
539 | static inline void choose_new_parent(task_t *p, task_t *reaper, task_t *child_reaper) | 592 | static inline void choose_new_parent(task_t *p, task_t *reaper) |
540 | { | 593 | { |
541 | /* | 594 | /* |
542 | * Make sure we're not reparenting to ourselves and that | 595 | * Make sure we're not reparenting to ourselves and that |
543 | * the parent is not a zombie. | 596 | * the parent is not a zombie. |
544 | */ | 597 | */ |
545 | BUG_ON(p == reaper || reaper->exit_state >= EXIT_ZOMBIE); | 598 | BUG_ON(p == reaper || reaper->exit_state); |
546 | p->real_parent = reaper; | 599 | p->real_parent = reaper; |
547 | } | 600 | } |
548 | 601 | ||
@@ -567,9 +620,9 @@ static void reparent_thread(task_t *p, task_t *father, int traced) | |||
567 | * anyway, so let go of it. | 620 | * anyway, so let go of it. |
568 | */ | 621 | */ |
569 | p->ptrace = 0; | 622 | p->ptrace = 0; |
570 | list_del_init(&p->sibling); | 623 | remove_parent(p); |
571 | p->parent = p->real_parent; | 624 | p->parent = p->real_parent; |
572 | list_add_tail(&p->sibling, &p->parent->children); | 625 | add_parent(p); |
573 | 626 | ||
574 | /* If we'd notified the old parent about this child's death, | 627 | /* If we'd notified the old parent about this child's death, |
575 | * also notify the new parent. | 628 | * also notify the new parent. |
@@ -643,7 +696,7 @@ static void forget_original_parent(struct task_struct * father, | |||
643 | 696 | ||
644 | if (father == p->real_parent) { | 697 | if (father == p->real_parent) { |
645 | /* reparent with a reaper, real father it's us */ | 698 | /* reparent with a reaper, real father it's us */ |
646 | choose_new_parent(p, reaper, child_reaper); | 699 | choose_new_parent(p, reaper); |
647 | reparent_thread(p, father, 0); | 700 | reparent_thread(p, father, 0); |
648 | } else { | 701 | } else { |
649 | /* reparent ptraced task to its real parent */ | 702 | /* reparent ptraced task to its real parent */ |
@@ -664,7 +717,7 @@ static void forget_original_parent(struct task_struct * father, | |||
664 | } | 717 | } |
665 | list_for_each_safe(_p, _n, &father->ptrace_children) { | 718 | list_for_each_safe(_p, _n, &father->ptrace_children) { |
666 | p = list_entry(_p,struct task_struct,ptrace_list); | 719 | p = list_entry(_p,struct task_struct,ptrace_list); |
667 | choose_new_parent(p, reaper, child_reaper); | 720 | choose_new_parent(p, reaper); |
668 | reparent_thread(p, father, 1); | 721 | reparent_thread(p, father, 1); |
669 | } | 722 | } |
670 | } | 723 | } |
@@ -805,7 +858,7 @@ fastcall NORET_TYPE void do_exit(long code) | |||
805 | panic("Aiee, killing interrupt handler!"); | 858 | panic("Aiee, killing interrupt handler!"); |
806 | if (unlikely(!tsk->pid)) | 859 | if (unlikely(!tsk->pid)) |
807 | panic("Attempted to kill the idle task!"); | 860 | panic("Attempted to kill the idle task!"); |
808 | if (unlikely(tsk->pid == 1)) | 861 | if (unlikely(tsk == child_reaper)) |
809 | panic("Attempted to kill init!"); | 862 | panic("Attempted to kill init!"); |
810 | 863 | ||
811 | if (unlikely(current->ptrace & PT_TRACE_EXIT)) { | 864 | if (unlikely(current->ptrace & PT_TRACE_EXIT)) { |
@@ -852,6 +905,14 @@ fastcall NORET_TYPE void do_exit(long code) | |||
852 | exit_itimers(tsk->signal); | 905 | exit_itimers(tsk->signal); |
853 | acct_process(code); | 906 | acct_process(code); |
854 | } | 907 | } |
908 | if (unlikely(tsk->robust_list)) | ||
909 | exit_robust_list(tsk); | ||
910 | #ifdef CONFIG_COMPAT | ||
911 | if (unlikely(tsk->compat_robust_list)) | ||
912 | compat_exit_robust_list(tsk); | ||
913 | #endif | ||
914 | if (unlikely(tsk->audit_context)) | ||
915 | audit_free(tsk); | ||
855 | exit_mm(tsk); | 916 | exit_mm(tsk); |
856 | 917 | ||
857 | exit_sem(tsk); | 918 | exit_sem(tsk); |
@@ -884,6 +945,9 @@ fastcall NORET_TYPE void do_exit(long code) | |||
884 | if (tsk->io_context) | 945 | if (tsk->io_context) |
885 | exit_io_context(); | 946 | exit_io_context(); |
886 | 947 | ||
948 | if (tsk->splice_pipe) | ||
949 | __free_pipe_info(tsk->splice_pipe); | ||
950 | |||
887 | /* PF_DEAD causes final put_task_struct after we schedule. */ | 951 | /* PF_DEAD causes final put_task_struct after we schedule. */ |
888 | preempt_disable(); | 952 | preempt_disable(); |
889 | BUG_ON(tsk->flags & PF_DEAD); | 953 | BUG_ON(tsk->flags & PF_DEAD); |
@@ -912,13 +976,6 @@ asmlinkage long sys_exit(int error_code) | |||
912 | do_exit((error_code&0xff)<<8); | 976 | do_exit((error_code&0xff)<<8); |
913 | } | 977 | } |
914 | 978 | ||
915 | task_t fastcall *next_thread(const task_t *p) | ||
916 | { | ||
917 | return pid_task(p->pids[PIDTYPE_TGID].pid_list.next, PIDTYPE_TGID); | ||
918 | } | ||
919 | |||
920 | EXPORT_SYMBOL(next_thread); | ||
921 | |||
922 | /* | 979 | /* |
923 | * Take down every thread in the group. This is called by fatal signals | 980 | * Take down every thread in the group. This is called by fatal signals |
924 | * as well as by sys_exit_group (below). | 981 | * as well as by sys_exit_group (below). |
@@ -933,7 +990,6 @@ do_group_exit(int exit_code) | |||
933 | else if (!thread_group_empty(current)) { | 990 | else if (!thread_group_empty(current)) { |
934 | struct signal_struct *const sig = current->signal; | 991 | struct signal_struct *const sig = current->signal; |
935 | struct sighand_struct *const sighand = current->sighand; | 992 | struct sighand_struct *const sighand = current->sighand; |
936 | read_lock(&tasklist_lock); | ||
937 | spin_lock_irq(&sighand->siglock); | 993 | spin_lock_irq(&sighand->siglock); |
938 | if (sig->flags & SIGNAL_GROUP_EXIT) | 994 | if (sig->flags & SIGNAL_GROUP_EXIT) |
939 | /* Another thread got here before we took the lock. */ | 995 | /* Another thread got here before we took the lock. */ |
@@ -943,7 +999,6 @@ do_group_exit(int exit_code) | |||
943 | zap_other_threads(current); | 999 | zap_other_threads(current); |
944 | } | 1000 | } |
945 | spin_unlock_irq(&sighand->siglock); | 1001 | spin_unlock_irq(&sighand->siglock); |
946 | read_unlock(&tasklist_lock); | ||
947 | } | 1002 | } |
948 | 1003 | ||
949 | do_exit(exit_code); | 1004 | do_exit(exit_code); |
@@ -1273,7 +1328,7 @@ bail_ref: | |||
1273 | 1328 | ||
1274 | /* move to end of parent's list to avoid starvation */ | 1329 | /* move to end of parent's list to avoid starvation */ |
1275 | remove_parent(p); | 1330 | remove_parent(p); |
1276 | add_parent(p, p->parent); | 1331 | add_parent(p); |
1277 | 1332 | ||
1278 | write_unlock_irq(&tasklist_lock); | 1333 | write_unlock_irq(&tasklist_lock); |
1279 | 1334 | ||
diff --git a/kernel/extable.c b/kernel/extable.c index 7501b531ce..7fe2628553 100644 --- a/kernel/extable.c +++ b/kernel/extable.c | |||
@@ -40,7 +40,7 @@ const struct exception_table_entry *search_exception_tables(unsigned long addr) | |||
40 | return e; | 40 | return e; |
41 | } | 41 | } |
42 | 42 | ||
43 | static int core_kernel_text(unsigned long addr) | 43 | int core_kernel_text(unsigned long addr) |
44 | { | 44 | { |
45 | if (addr >= (unsigned long)_stext && | 45 | if (addr >= (unsigned long)_stext && |
46 | addr <= (unsigned long)_etext) | 46 | addr <= (unsigned long)_etext) |
diff --git a/kernel/fork.c b/kernel/fork.c index e0a2b449de..ac8100e308 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -84,7 +84,7 @@ static kmem_cache_t *task_struct_cachep; | |||
84 | #endif | 84 | #endif |
85 | 85 | ||
86 | /* SLAB cache for signal_struct structures (tsk->signal) */ | 86 | /* SLAB cache for signal_struct structures (tsk->signal) */ |
87 | kmem_cache_t *signal_cachep; | 87 | static kmem_cache_t *signal_cachep; |
88 | 88 | ||
89 | /* SLAB cache for sighand_struct structures (tsk->sighand) */ | 89 | /* SLAB cache for sighand_struct structures (tsk->sighand) */ |
90 | kmem_cache_t *sighand_cachep; | 90 | kmem_cache_t *sighand_cachep; |
@@ -108,16 +108,12 @@ void free_task(struct task_struct *tsk) | |||
108 | } | 108 | } |
109 | EXPORT_SYMBOL(free_task); | 109 | EXPORT_SYMBOL(free_task); |
110 | 110 | ||
111 | void __put_task_struct_cb(struct rcu_head *rhp) | 111 | void __put_task_struct(struct task_struct *tsk) |
112 | { | 112 | { |
113 | struct task_struct *tsk = container_of(rhp, struct task_struct, rcu); | ||
114 | |||
115 | WARN_ON(!(tsk->exit_state & (EXIT_DEAD | EXIT_ZOMBIE))); | 113 | WARN_ON(!(tsk->exit_state & (EXIT_DEAD | EXIT_ZOMBIE))); |
116 | WARN_ON(atomic_read(&tsk->usage)); | 114 | WARN_ON(atomic_read(&tsk->usage)); |
117 | WARN_ON(tsk == current); | 115 | WARN_ON(tsk == current); |
118 | 116 | ||
119 | if (unlikely(tsk->audit_context)) | ||
120 | audit_free(tsk); | ||
121 | security_task_free(tsk); | 117 | security_task_free(tsk); |
122 | free_uid(tsk->user); | 118 | free_uid(tsk->user); |
123 | put_group_info(tsk->group_info); | 119 | put_group_info(tsk->group_info); |
@@ -182,6 +178,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) | |||
182 | atomic_set(&tsk->usage,2); | 178 | atomic_set(&tsk->usage,2); |
183 | atomic_set(&tsk->fs_excl, 0); | 179 | atomic_set(&tsk->fs_excl, 0); |
184 | tsk->btrace_seq = 0; | 180 | tsk->btrace_seq = 0; |
181 | tsk->splice_pipe = NULL; | ||
185 | return tsk; | 182 | return tsk; |
186 | } | 183 | } |
187 | 184 | ||
@@ -721,7 +718,7 @@ out_release: | |||
721 | free_fdset (new_fdt->open_fds, new_fdt->max_fdset); | 718 | free_fdset (new_fdt->open_fds, new_fdt->max_fdset); |
722 | free_fd_array(new_fdt->fd, new_fdt->max_fds); | 719 | free_fd_array(new_fdt->fd, new_fdt->max_fds); |
723 | kmem_cache_free(files_cachep, newf); | 720 | kmem_cache_free(files_cachep, newf); |
724 | goto out; | 721 | return NULL; |
725 | } | 722 | } |
726 | 723 | ||
727 | static int copy_files(unsigned long clone_flags, struct task_struct * tsk) | 724 | static int copy_files(unsigned long clone_flags, struct task_struct * tsk) |
@@ -786,14 +783,6 @@ int unshare_files(void) | |||
786 | 783 | ||
787 | EXPORT_SYMBOL(unshare_files); | 784 | EXPORT_SYMBOL(unshare_files); |
788 | 785 | ||
789 | void sighand_free_cb(struct rcu_head *rhp) | ||
790 | { | ||
791 | struct sighand_struct *sp; | ||
792 | |||
793 | sp = container_of(rhp, struct sighand_struct, rcu); | ||
794 | kmem_cache_free(sighand_cachep, sp); | ||
795 | } | ||
796 | |||
797 | static inline int copy_sighand(unsigned long clone_flags, struct task_struct * tsk) | 786 | static inline int copy_sighand(unsigned long clone_flags, struct task_struct * tsk) |
798 | { | 787 | { |
799 | struct sighand_struct *sig; | 788 | struct sighand_struct *sig; |
@@ -806,12 +795,17 @@ static inline int copy_sighand(unsigned long clone_flags, struct task_struct * t | |||
806 | rcu_assign_pointer(tsk->sighand, sig); | 795 | rcu_assign_pointer(tsk->sighand, sig); |
807 | if (!sig) | 796 | if (!sig) |
808 | return -ENOMEM; | 797 | return -ENOMEM; |
809 | spin_lock_init(&sig->siglock); | ||
810 | atomic_set(&sig->count, 1); | 798 | atomic_set(&sig->count, 1); |
811 | memcpy(sig->action, current->sighand->action, sizeof(sig->action)); | 799 | memcpy(sig->action, current->sighand->action, sizeof(sig->action)); |
812 | return 0; | 800 | return 0; |
813 | } | 801 | } |
814 | 802 | ||
803 | void __cleanup_sighand(struct sighand_struct *sighand) | ||
804 | { | ||
805 | if (atomic_dec_and_test(&sighand->count)) | ||
806 | kmem_cache_free(sighand_cachep, sighand); | ||
807 | } | ||
808 | |||
815 | static inline int copy_signal(unsigned long clone_flags, struct task_struct * tsk) | 809 | static inline int copy_signal(unsigned long clone_flags, struct task_struct * tsk) |
816 | { | 810 | { |
817 | struct signal_struct *sig; | 811 | struct signal_struct *sig; |
@@ -881,6 +875,22 @@ static inline int copy_signal(unsigned long clone_flags, struct task_struct * ts | |||
881 | return 0; | 875 | return 0; |
882 | } | 876 | } |
883 | 877 | ||
878 | void __cleanup_signal(struct signal_struct *sig) | ||
879 | { | ||
880 | exit_thread_group_keys(sig); | ||
881 | kmem_cache_free(signal_cachep, sig); | ||
882 | } | ||
883 | |||
884 | static inline void cleanup_signal(struct task_struct *tsk) | ||
885 | { | ||
886 | struct signal_struct *sig = tsk->signal; | ||
887 | |||
888 | atomic_dec(&sig->live); | ||
889 | |||
890 | if (atomic_dec_and_test(&sig->count)) | ||
891 | __cleanup_signal(sig); | ||
892 | } | ||
893 | |||
884 | static inline void copy_flags(unsigned long clone_flags, struct task_struct *p) | 894 | static inline void copy_flags(unsigned long clone_flags, struct task_struct *p) |
885 | { | 895 | { |
886 | unsigned long new_flags = p->flags; | 896 | unsigned long new_flags = p->flags; |
@@ -1061,7 +1071,10 @@ static task_t *copy_process(unsigned long clone_flags, | |||
1061 | * Clear TID on mm_release()? | 1071 | * Clear TID on mm_release()? |
1062 | */ | 1072 | */ |
1063 | p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL; | 1073 | p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL; |
1064 | 1074 | p->robust_list = NULL; | |
1075 | #ifdef CONFIG_COMPAT | ||
1076 | p->compat_robust_list = NULL; | ||
1077 | #endif | ||
1065 | /* | 1078 | /* |
1066 | * sigaltstack should be cleared when sharing the same VM | 1079 | * sigaltstack should be cleared when sharing the same VM |
1067 | */ | 1080 | */ |
@@ -1092,6 +1105,7 @@ static task_t *copy_process(unsigned long clone_flags, | |||
1092 | * We dont wake it up yet. | 1105 | * We dont wake it up yet. |
1093 | */ | 1106 | */ |
1094 | p->group_leader = p; | 1107 | p->group_leader = p; |
1108 | INIT_LIST_HEAD(&p->thread_group); | ||
1095 | INIT_LIST_HEAD(&p->ptrace_children); | 1109 | INIT_LIST_HEAD(&p->ptrace_children); |
1096 | INIT_LIST_HEAD(&p->ptrace_list); | 1110 | INIT_LIST_HEAD(&p->ptrace_list); |
1097 | 1111 | ||
@@ -1115,16 +1129,6 @@ static task_t *copy_process(unsigned long clone_flags, | |||
1115 | !cpu_online(task_cpu(p)))) | 1129 | !cpu_online(task_cpu(p)))) |
1116 | set_task_cpu(p, smp_processor_id()); | 1130 | set_task_cpu(p, smp_processor_id()); |
1117 | 1131 | ||
1118 | /* | ||
1119 | * Check for pending SIGKILL! The new thread should not be allowed | ||
1120 | * to slip out of an OOM kill. (or normal SIGKILL.) | ||
1121 | */ | ||
1122 | if (sigismember(¤t->pending.signal, SIGKILL)) { | ||
1123 | write_unlock_irq(&tasklist_lock); | ||
1124 | retval = -EINTR; | ||
1125 | goto bad_fork_cleanup_namespace; | ||
1126 | } | ||
1127 | |||
1128 | /* CLONE_PARENT re-uses the old parent */ | 1132 | /* CLONE_PARENT re-uses the old parent */ |
1129 | if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) | 1133 | if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) |
1130 | p->real_parent = current->real_parent; | 1134 | p->real_parent = current->real_parent; |
@@ -1133,6 +1137,23 @@ static task_t *copy_process(unsigned long clone_flags, | |||
1133 | p->parent = p->real_parent; | 1137 | p->parent = p->real_parent; |
1134 | 1138 | ||
1135 | spin_lock(¤t->sighand->siglock); | 1139 | spin_lock(¤t->sighand->siglock); |
1140 | |||
1141 | /* | ||
1142 | * Process group and session signals need to be delivered to just the | ||
1143 | * parent before the fork or both the parent and the child after the | ||
1144 | * fork. Restart if a signal comes in before we add the new process to | ||
1145 | * it's process group. | ||
1146 | * A fatal signal pending means that current will exit, so the new | ||
1147 | * thread can't slip out of an OOM kill (or normal SIGKILL). | ||
1148 | */ | ||
1149 | recalc_sigpending(); | ||
1150 | if (signal_pending(current)) { | ||
1151 | spin_unlock(¤t->sighand->siglock); | ||
1152 | write_unlock_irq(&tasklist_lock); | ||
1153 | retval = -ERESTARTNOINTR; | ||
1154 | goto bad_fork_cleanup_namespace; | ||
1155 | } | ||
1156 | |||
1136 | if (clone_flags & CLONE_THREAD) { | 1157 | if (clone_flags & CLONE_THREAD) { |
1137 | /* | 1158 | /* |
1138 | * Important: if an exit-all has been started then | 1159 | * Important: if an exit-all has been started then |
@@ -1145,17 +1166,9 @@ static task_t *copy_process(unsigned long clone_flags, | |||
1145 | retval = -EAGAIN; | 1166 | retval = -EAGAIN; |
1146 | goto bad_fork_cleanup_namespace; | 1167 | goto bad_fork_cleanup_namespace; |
1147 | } | 1168 | } |
1148 | p->group_leader = current->group_leader; | ||
1149 | 1169 | ||
1150 | if (current->signal->group_stop_count > 0) { | 1170 | p->group_leader = current->group_leader; |
1151 | /* | 1171 | list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group); |
1152 | * There is an all-stop in progress for the group. | ||
1153 | * We ourselves will stop as soon as we check signals. | ||
1154 | * Make the new thread part of that group stop too. | ||
1155 | */ | ||
1156 | current->signal->group_stop_count++; | ||
1157 | set_tsk_thread_flag(p, TIF_SIGPENDING); | ||
1158 | } | ||
1159 | 1172 | ||
1160 | if (!cputime_eq(current->signal->it_virt_expires, | 1173 | if (!cputime_eq(current->signal->it_virt_expires, |
1161 | cputime_zero) || | 1174 | cputime_zero) || |
@@ -1178,23 +1191,25 @@ static task_t *copy_process(unsigned long clone_flags, | |||
1178 | */ | 1191 | */ |
1179 | p->ioprio = current->ioprio; | 1192 | p->ioprio = current->ioprio; |
1180 | 1193 | ||
1181 | SET_LINKS(p); | 1194 | if (likely(p->pid)) { |
1182 | if (unlikely(p->ptrace & PT_PTRACED)) | 1195 | add_parent(p); |
1183 | __ptrace_link(p, current->parent); | 1196 | if (unlikely(p->ptrace & PT_PTRACED)) |
1184 | 1197 | __ptrace_link(p, current->parent); | |
1185 | if (thread_group_leader(p)) { | 1198 | |
1186 | p->signal->tty = current->signal->tty; | 1199 | if (thread_group_leader(p)) { |
1187 | p->signal->pgrp = process_group(current); | 1200 | p->signal->tty = current->signal->tty; |
1188 | p->signal->session = current->signal->session; | 1201 | p->signal->pgrp = process_group(current); |
1189 | attach_pid(p, PIDTYPE_PGID, process_group(p)); | 1202 | p->signal->session = current->signal->session; |
1190 | attach_pid(p, PIDTYPE_SID, p->signal->session); | 1203 | attach_pid(p, PIDTYPE_PGID, process_group(p)); |
1191 | if (p->pid) | 1204 | attach_pid(p, PIDTYPE_SID, p->signal->session); |
1205 | |||
1206 | list_add_tail_rcu(&p->tasks, &init_task.tasks); | ||
1192 | __get_cpu_var(process_counts)++; | 1207 | __get_cpu_var(process_counts)++; |
1208 | } | ||
1209 | attach_pid(p, PIDTYPE_PID, p->pid); | ||
1210 | nr_threads++; | ||
1193 | } | 1211 | } |
1194 | attach_pid(p, PIDTYPE_TGID, p->tgid); | ||
1195 | attach_pid(p, PIDTYPE_PID, p->pid); | ||
1196 | 1212 | ||
1197 | nr_threads++; | ||
1198 | total_forks++; | 1213 | total_forks++; |
1199 | spin_unlock(¤t->sighand->siglock); | 1214 | spin_unlock(¤t->sighand->siglock); |
1200 | write_unlock_irq(&tasklist_lock); | 1215 | write_unlock_irq(&tasklist_lock); |
@@ -1209,9 +1224,9 @@ bad_fork_cleanup_mm: | |||
1209 | if (p->mm) | 1224 | if (p->mm) |
1210 | mmput(p->mm); | 1225 | mmput(p->mm); |
1211 | bad_fork_cleanup_signal: | 1226 | bad_fork_cleanup_signal: |
1212 | exit_signal(p); | 1227 | cleanup_signal(p); |
1213 | bad_fork_cleanup_sighand: | 1228 | bad_fork_cleanup_sighand: |
1214 | exit_sighand(p); | 1229 | __cleanup_sighand(p->sighand); |
1215 | bad_fork_cleanup_fs: | 1230 | bad_fork_cleanup_fs: |
1216 | exit_fs(p); /* blocking */ | 1231 | exit_fs(p); /* blocking */ |
1217 | bad_fork_cleanup_files: | 1232 | bad_fork_cleanup_files: |
@@ -1258,7 +1273,7 @@ task_t * __devinit fork_idle(int cpu) | |||
1258 | if (!task) | 1273 | if (!task) |
1259 | return ERR_PTR(-ENOMEM); | 1274 | return ERR_PTR(-ENOMEM); |
1260 | init_idle(task, cpu); | 1275 | init_idle(task, cpu); |
1261 | unhash_process(task); | 1276 | |
1262 | return task; | 1277 | return task; |
1263 | } | 1278 | } |
1264 | 1279 | ||
@@ -1293,17 +1308,19 @@ long do_fork(unsigned long clone_flags, | |||
1293 | { | 1308 | { |
1294 | struct task_struct *p; | 1309 | struct task_struct *p; |
1295 | int trace = 0; | 1310 | int trace = 0; |
1296 | long pid = alloc_pidmap(); | 1311 | struct pid *pid = alloc_pid(); |
1312 | long nr; | ||
1297 | 1313 | ||
1298 | if (pid < 0) | 1314 | if (!pid) |
1299 | return -EAGAIN; | 1315 | return -EAGAIN; |
1316 | nr = pid->nr; | ||
1300 | if (unlikely(current->ptrace)) { | 1317 | if (unlikely(current->ptrace)) { |
1301 | trace = fork_traceflag (clone_flags); | 1318 | trace = fork_traceflag (clone_flags); |
1302 | if (trace) | 1319 | if (trace) |
1303 | clone_flags |= CLONE_PTRACE; | 1320 | clone_flags |= CLONE_PTRACE; |
1304 | } | 1321 | } |
1305 | 1322 | ||
1306 | p = copy_process(clone_flags, stack_start, regs, stack_size, parent_tidptr, child_tidptr, pid); | 1323 | p = copy_process(clone_flags, stack_start, regs, stack_size, parent_tidptr, child_tidptr, nr); |
1307 | /* | 1324 | /* |
1308 | * Do this prior waking up the new thread - the thread pointer | 1325 | * Do this prior waking up the new thread - the thread pointer |
1309 | * might get invalid after that point, if the thread exits quickly. | 1326 | * might get invalid after that point, if the thread exits quickly. |
@@ -1330,7 +1347,7 @@ long do_fork(unsigned long clone_flags, | |||
1330 | p->state = TASK_STOPPED; | 1347 | p->state = TASK_STOPPED; |
1331 | 1348 | ||
1332 | if (unlikely (trace)) { | 1349 | if (unlikely (trace)) { |
1333 | current->ptrace_message = pid; | 1350 | current->ptrace_message = nr; |
1334 | ptrace_notify ((trace << 8) | SIGTRAP); | 1351 | ptrace_notify ((trace << 8) | SIGTRAP); |
1335 | } | 1352 | } |
1336 | 1353 | ||
@@ -1340,21 +1357,31 @@ long do_fork(unsigned long clone_flags, | |||
1340 | ptrace_notify ((PTRACE_EVENT_VFORK_DONE << 8) | SIGTRAP); | 1357 | ptrace_notify ((PTRACE_EVENT_VFORK_DONE << 8) | SIGTRAP); |
1341 | } | 1358 | } |
1342 | } else { | 1359 | } else { |
1343 | free_pidmap(pid); | 1360 | free_pid(pid); |
1344 | pid = PTR_ERR(p); | 1361 | nr = PTR_ERR(p); |
1345 | } | 1362 | } |
1346 | return pid; | 1363 | return nr; |
1347 | } | 1364 | } |
1348 | 1365 | ||
1349 | #ifndef ARCH_MIN_MMSTRUCT_ALIGN | 1366 | #ifndef ARCH_MIN_MMSTRUCT_ALIGN |
1350 | #define ARCH_MIN_MMSTRUCT_ALIGN 0 | 1367 | #define ARCH_MIN_MMSTRUCT_ALIGN 0 |
1351 | #endif | 1368 | #endif |
1352 | 1369 | ||
1370 | static void sighand_ctor(void *data, kmem_cache_t *cachep, unsigned long flags) | ||
1371 | { | ||
1372 | struct sighand_struct *sighand = data; | ||
1373 | |||
1374 | if ((flags & (SLAB_CTOR_VERIFY | SLAB_CTOR_CONSTRUCTOR)) == | ||
1375 | SLAB_CTOR_CONSTRUCTOR) | ||
1376 | spin_lock_init(&sighand->siglock); | ||
1377 | } | ||
1378 | |||
1353 | void __init proc_caches_init(void) | 1379 | void __init proc_caches_init(void) |
1354 | { | 1380 | { |
1355 | sighand_cachep = kmem_cache_create("sighand_cache", | 1381 | sighand_cachep = kmem_cache_create("sighand_cache", |
1356 | sizeof(struct sighand_struct), 0, | 1382 | sizeof(struct sighand_struct), 0, |
1357 | SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); | 1383 | SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_DESTROY_BY_RCU, |
1384 | sighand_ctor, NULL); | ||
1358 | signal_cachep = kmem_cache_create("signal_cache", | 1385 | signal_cachep = kmem_cache_create("signal_cache", |
1359 | sizeof(struct signal_struct), 0, | 1386 | sizeof(struct signal_struct), 0, |
1360 | SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); | 1387 | SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); |
diff --git a/kernel/futex.c b/kernel/futex.c index 5efa2f9780..5699c51205 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
@@ -8,6 +8,10 @@ | |||
8 | * Removed page pinning, fix privately mapped COW pages and other cleanups | 8 | * Removed page pinning, fix privately mapped COW pages and other cleanups |
9 | * (C) Copyright 2003, 2004 Jamie Lokier | 9 | * (C) Copyright 2003, 2004 Jamie Lokier |
10 | * | 10 | * |
11 | * Robust futex support started by Ingo Molnar | ||
12 | * (C) Copyright 2006 Red Hat Inc, All Rights Reserved | ||
13 | * Thanks to Thomas Gleixner for suggestions, analysis and fixes. | ||
14 | * | ||
11 | * Thanks to Ben LaHaise for yelling "hashed waitqueues" loudly | 15 | * Thanks to Ben LaHaise for yelling "hashed waitqueues" loudly |
12 | * enough at me, Linus for the original (flawed) idea, Matthew | 16 | * enough at me, Linus for the original (flawed) idea, Matthew |
13 | * Kirkwood for proof-of-concept implementation. | 17 | * Kirkwood for proof-of-concept implementation. |
@@ -829,6 +833,172 @@ error: | |||
829 | goto out; | 833 | goto out; |
830 | } | 834 | } |
831 | 835 | ||
836 | /* | ||
837 | * Support for robust futexes: the kernel cleans up held futexes at | ||
838 | * thread exit time. | ||
839 | * | ||
840 | * Implementation: user-space maintains a per-thread list of locks it | ||
841 | * is holding. Upon do_exit(), the kernel carefully walks this list, | ||
842 | * and marks all locks that are owned by this thread with the | ||
843 | * FUTEX_OWNER_DEAD bit, and wakes up a waiter (if any). The list is | ||
844 | * always manipulated with the lock held, so the list is private and | ||
845 | * per-thread. Userspace also maintains a per-thread 'list_op_pending' | ||
846 | * field, to allow the kernel to clean up if the thread dies after | ||
847 | * acquiring the lock, but just before it could have added itself to | ||
848 | * the list. There can only be one such pending lock. | ||
849 | */ | ||
850 | |||
851 | /** | ||
852 | * sys_set_robust_list - set the robust-futex list head of a task | ||
853 | * @head: pointer to the list-head | ||
854 | * @len: length of the list-head, as userspace expects | ||
855 | */ | ||
856 | asmlinkage long | ||
857 | sys_set_robust_list(struct robust_list_head __user *head, | ||
858 | size_t len) | ||
859 | { | ||
860 | /* | ||
861 | * The kernel knows only one size for now: | ||
862 | */ | ||
863 | if (unlikely(len != sizeof(*head))) | ||
864 | return -EINVAL; | ||
865 | |||
866 | current->robust_list = head; | ||
867 | |||
868 | return 0; | ||
869 | } | ||
870 | |||
871 | /** | ||
872 | * sys_get_robust_list - get the robust-futex list head of a task | ||
873 | * @pid: pid of the process [zero for current task] | ||
874 | * @head_ptr: pointer to a list-head pointer, the kernel fills it in | ||
875 | * @len_ptr: pointer to a length field, the kernel fills in the header size | ||
876 | */ | ||
877 | asmlinkage long | ||
878 | sys_get_robust_list(int pid, struct robust_list_head __user **head_ptr, | ||
879 | size_t __user *len_ptr) | ||
880 | { | ||
881 | struct robust_list_head *head; | ||
882 | unsigned long ret; | ||
883 | |||
884 | if (!pid) | ||
885 | head = current->robust_list; | ||
886 | else { | ||
887 | struct task_struct *p; | ||
888 | |||
889 | ret = -ESRCH; | ||
890 | read_lock(&tasklist_lock); | ||
891 | p = find_task_by_pid(pid); | ||
892 | if (!p) | ||
893 | goto err_unlock; | ||
894 | ret = -EPERM; | ||
895 | if ((current->euid != p->euid) && (current->euid != p->uid) && | ||
896 | !capable(CAP_SYS_PTRACE)) | ||
897 | goto err_unlock; | ||
898 | head = p->robust_list; | ||
899 | read_unlock(&tasklist_lock); | ||
900 | } | ||
901 | |||
902 | if (put_user(sizeof(*head), len_ptr)) | ||
903 | return -EFAULT; | ||
904 | return put_user(head, head_ptr); | ||
905 | |||
906 | err_unlock: | ||
907 | read_unlock(&tasklist_lock); | ||
908 | |||
909 | return ret; | ||
910 | } | ||
911 | |||
912 | /* | ||
913 | * Process a futex-list entry, check whether it's owned by the | ||
914 | * dying task, and do notification if so: | ||
915 | */ | ||
916 | int handle_futex_death(u32 __user *uaddr, struct task_struct *curr) | ||
917 | { | ||
918 | u32 uval; | ||
919 | |||
920 | retry: | ||
921 | if (get_user(uval, uaddr)) | ||
922 | return -1; | ||
923 | |||
924 | if ((uval & FUTEX_TID_MASK) == curr->pid) { | ||
925 | /* | ||
926 | * Ok, this dying thread is truly holding a futex | ||
927 | * of interest. Set the OWNER_DIED bit atomically | ||
928 | * via cmpxchg, and if the value had FUTEX_WAITERS | ||
929 | * set, wake up a waiter (if any). (We have to do a | ||
930 | * futex_wake() even if OWNER_DIED is already set - | ||
931 | * to handle the rare but possible case of recursive | ||
932 | * thread-death.) The rest of the cleanup is done in | ||
933 | * userspace. | ||
934 | */ | ||
935 | if (futex_atomic_cmpxchg_inatomic(uaddr, uval, | ||
936 | uval | FUTEX_OWNER_DIED) != uval) | ||
937 | goto retry; | ||
938 | |||
939 | if (uval & FUTEX_WAITERS) | ||
940 | futex_wake((unsigned long)uaddr, 1); | ||
941 | } | ||
942 | return 0; | ||
943 | } | ||
944 | |||
945 | /* | ||
946 | * Walk curr->robust_list (very carefully, it's a userspace list!) | ||
947 | * and mark any locks found there dead, and notify any waiters. | ||
948 | * | ||
949 | * We silently return on any sign of list-walking problem. | ||
950 | */ | ||
951 | void exit_robust_list(struct task_struct *curr) | ||
952 | { | ||
953 | struct robust_list_head __user *head = curr->robust_list; | ||
954 | struct robust_list __user *entry, *pending; | ||
955 | unsigned int limit = ROBUST_LIST_LIMIT; | ||
956 | unsigned long futex_offset; | ||
957 | |||
958 | /* | ||
959 | * Fetch the list head (which was registered earlier, via | ||
960 | * sys_set_robust_list()): | ||
961 | */ | ||
962 | if (get_user(entry, &head->list.next)) | ||
963 | return; | ||
964 | /* | ||
965 | * Fetch the relative futex offset: | ||
966 | */ | ||
967 | if (get_user(futex_offset, &head->futex_offset)) | ||
968 | return; | ||
969 | /* | ||
970 | * Fetch any possibly pending lock-add first, and handle it | ||
971 | * if it exists: | ||
972 | */ | ||
973 | if (get_user(pending, &head->list_op_pending)) | ||
974 | return; | ||
975 | if (pending) | ||
976 | handle_futex_death((void *)pending + futex_offset, curr); | ||
977 | |||
978 | while (entry != &head->list) { | ||
979 | /* | ||
980 | * A pending lock might already be on the list, so | ||
981 | * dont process it twice: | ||
982 | */ | ||
983 | if (entry != pending) | ||
984 | if (handle_futex_death((void *)entry + futex_offset, | ||
985 | curr)) | ||
986 | return; | ||
987 | /* | ||
988 | * Fetch the next entry in the list: | ||
989 | */ | ||
990 | if (get_user(entry, &entry->next)) | ||
991 | return; | ||
992 | /* | ||
993 | * Avoid excessively long or circular lists: | ||
994 | */ | ||
995 | if (!--limit) | ||
996 | break; | ||
997 | |||
998 | cond_resched(); | ||
999 | } | ||
1000 | } | ||
1001 | |||
832 | long do_futex(unsigned long uaddr, int op, int val, unsigned long timeout, | 1002 | long do_futex(unsigned long uaddr, int op, int val, unsigned long timeout, |
833 | unsigned long uaddr2, int val2, int val3) | 1003 | unsigned long uaddr2, int val2, int val3) |
834 | { | 1004 | { |
@@ -869,9 +1039,11 @@ asmlinkage long sys_futex(u32 __user *uaddr, int op, int val, | |||
869 | unsigned long timeout = MAX_SCHEDULE_TIMEOUT; | 1039 | unsigned long timeout = MAX_SCHEDULE_TIMEOUT; |
870 | int val2 = 0; | 1040 | int val2 = 0; |
871 | 1041 | ||
872 | if ((op == FUTEX_WAIT) && utime) { | 1042 | if (utime && (op == FUTEX_WAIT)) { |
873 | if (copy_from_user(&t, utime, sizeof(t)) != 0) | 1043 | if (copy_from_user(&t, utime, sizeof(t)) != 0) |
874 | return -EFAULT; | 1044 | return -EFAULT; |
1045 | if (!timespec_valid(&t)) | ||
1046 | return -EINVAL; | ||
875 | timeout = timespec_to_jiffies(&t) + 1; | 1047 | timeout = timespec_to_jiffies(&t) + 1; |
876 | } | 1048 | } |
877 | /* | 1049 | /* |
diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c new file mode 100644 index 0000000000..1ab6a0ea3d --- /dev/null +++ b/kernel/futex_compat.c | |||
@@ -0,0 +1,144 @@ | |||
1 | /* | ||
2 | * linux/kernel/futex_compat.c | ||
3 | * | ||
4 | * Futex compatibililty routines. | ||
5 | * | ||
6 | * Copyright 2006, Red Hat, Inc., Ingo Molnar | ||
7 | */ | ||
8 | |||
9 | #include <linux/linkage.h> | ||
10 | #include <linux/compat.h> | ||
11 | #include <linux/futex.h> | ||
12 | |||
13 | #include <asm/uaccess.h> | ||
14 | |||
15 | /* | ||
16 | * Walk curr->robust_list (very carefully, it's a userspace list!) | ||
17 | * and mark any locks found there dead, and notify any waiters. | ||
18 | * | ||
19 | * We silently return on any sign of list-walking problem. | ||
20 | */ | ||
21 | void compat_exit_robust_list(struct task_struct *curr) | ||
22 | { | ||
23 | struct compat_robust_list_head __user *head = curr->compat_robust_list; | ||
24 | struct robust_list __user *entry, *pending; | ||
25 | compat_uptr_t uentry, upending; | ||
26 | unsigned int limit = ROBUST_LIST_LIMIT; | ||
27 | compat_long_t futex_offset; | ||
28 | |||
29 | /* | ||
30 | * Fetch the list head (which was registered earlier, via | ||
31 | * sys_set_robust_list()): | ||
32 | */ | ||
33 | if (get_user(uentry, &head->list.next)) | ||
34 | return; | ||
35 | entry = compat_ptr(uentry); | ||
36 | /* | ||
37 | * Fetch the relative futex offset: | ||
38 | */ | ||
39 | if (get_user(futex_offset, &head->futex_offset)) | ||
40 | return; | ||
41 | /* | ||
42 | * Fetch any possibly pending lock-add first, and handle it | ||
43 | * if it exists: | ||
44 | */ | ||
45 | if (get_user(upending, &head->list_op_pending)) | ||
46 | return; | ||
47 | pending = compat_ptr(upending); | ||
48 | if (upending) | ||
49 | handle_futex_death((void *)pending + futex_offset, curr); | ||
50 | |||
51 | while (compat_ptr(uentry) != &head->list) { | ||
52 | /* | ||
53 | * A pending lock might already be on the list, so | ||
54 | * dont process it twice: | ||
55 | */ | ||
56 | if (entry != pending) | ||
57 | if (handle_futex_death((void *)entry + futex_offset, | ||
58 | curr)) | ||
59 | return; | ||
60 | |||
61 | /* | ||
62 | * Fetch the next entry in the list: | ||
63 | */ | ||
64 | if (get_user(uentry, (compat_uptr_t *)&entry->next)) | ||
65 | return; | ||
66 | entry = compat_ptr(uentry); | ||
67 | /* | ||
68 | * Avoid excessively long or circular lists: | ||
69 | */ | ||
70 | if (!--limit) | ||
71 | break; | ||
72 | |||
73 | cond_resched(); | ||
74 | } | ||
75 | } | ||
76 | |||
77 | asmlinkage long | ||
78 | compat_sys_set_robust_list(struct compat_robust_list_head __user *head, | ||
79 | compat_size_t len) | ||
80 | { | ||
81 | if (unlikely(len != sizeof(*head))) | ||
82 | return -EINVAL; | ||
83 | |||
84 | current->compat_robust_list = head; | ||
85 | |||
86 | return 0; | ||
87 | } | ||
88 | |||
89 | asmlinkage long | ||
90 | compat_sys_get_robust_list(int pid, compat_uptr_t *head_ptr, | ||
91 | compat_size_t __user *len_ptr) | ||
92 | { | ||
93 | struct compat_robust_list_head *head; | ||
94 | unsigned long ret; | ||
95 | |||
96 | if (!pid) | ||
97 | head = current->compat_robust_list; | ||
98 | else { | ||
99 | struct task_struct *p; | ||
100 | |||
101 | ret = -ESRCH; | ||
102 | read_lock(&tasklist_lock); | ||
103 | p = find_task_by_pid(pid); | ||
104 | if (!p) | ||
105 | goto err_unlock; | ||
106 | ret = -EPERM; | ||
107 | if ((current->euid != p->euid) && (current->euid != p->uid) && | ||
108 | !capable(CAP_SYS_PTRACE)) | ||
109 | goto err_unlock; | ||
110 | head = p->compat_robust_list; | ||
111 | read_unlock(&tasklist_lock); | ||
112 | } | ||
113 | |||
114 | if (put_user(sizeof(*head), len_ptr)) | ||
115 | return -EFAULT; | ||
116 | return put_user(ptr_to_compat(head), head_ptr); | ||
117 | |||
118 | err_unlock: | ||
119 | read_unlock(&tasklist_lock); | ||
120 | |||
121 | return ret; | ||
122 | } | ||
123 | |||
124 | asmlinkage long compat_sys_futex(u32 __user *uaddr, int op, u32 val, | ||
125 | struct compat_timespec __user *utime, u32 __user *uaddr2, | ||
126 | u32 val3) | ||
127 | { | ||
128 | struct timespec t; | ||
129 | unsigned long timeout = MAX_SCHEDULE_TIMEOUT; | ||
130 | int val2 = 0; | ||
131 | |||
132 | if (utime && (op == FUTEX_WAIT)) { | ||
133 | if (get_compat_timespec(&t, utime)) | ||
134 | return -EFAULT; | ||
135 | if (!timespec_valid(&t)) | ||
136 | return -EINVAL; | ||
137 | timeout = timespec_to_jiffies(&t) + 1; | ||
138 | } | ||
139 | if (op >= FUTEX_REQUEUE) | ||
140 | val2 = (int) (unsigned long) utime; | ||
141 | |||
142 | return do_futex((unsigned long)uaddr, op, val, timeout, | ||
143 | (unsigned long)uaddr2, val2, val3); | ||
144 | } | ||
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 0237a556eb..01fa2ae98a 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c | |||
@@ -456,6 +456,7 @@ hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode) | |||
456 | 456 | ||
457 | return ret; | 457 | return ret; |
458 | } | 458 | } |
459 | EXPORT_SYMBOL_GPL(hrtimer_start); | ||
459 | 460 | ||
460 | /** | 461 | /** |
461 | * hrtimer_try_to_cancel - try to deactivate a timer | 462 | * hrtimer_try_to_cancel - try to deactivate a timer |
@@ -484,6 +485,7 @@ int hrtimer_try_to_cancel(struct hrtimer *timer) | |||
484 | return ret; | 485 | return ret; |
485 | 486 | ||
486 | } | 487 | } |
488 | EXPORT_SYMBOL_GPL(hrtimer_try_to_cancel); | ||
487 | 489 | ||
488 | /** | 490 | /** |
489 | * hrtimer_cancel - cancel a timer and wait for the handler to finish. | 491 | * hrtimer_cancel - cancel a timer and wait for the handler to finish. |
@@ -501,8 +503,10 @@ int hrtimer_cancel(struct hrtimer *timer) | |||
501 | 503 | ||
502 | if (ret >= 0) | 504 | if (ret >= 0) |
503 | return ret; | 505 | return ret; |
506 | cpu_relax(); | ||
504 | } | 507 | } |
505 | } | 508 | } |
509 | EXPORT_SYMBOL_GPL(hrtimer_cancel); | ||
506 | 510 | ||
507 | /** | 511 | /** |
508 | * hrtimer_get_remaining - get remaining time for the timer | 512 | * hrtimer_get_remaining - get remaining time for the timer |
@@ -521,6 +525,7 @@ ktime_t hrtimer_get_remaining(const struct hrtimer *timer) | |||
521 | 525 | ||
522 | return rem; | 526 | return rem; |
523 | } | 527 | } |
528 | EXPORT_SYMBOL_GPL(hrtimer_get_remaining); | ||
524 | 529 | ||
525 | #ifdef CONFIG_NO_IDLE_HZ | 530 | #ifdef CONFIG_NO_IDLE_HZ |
526 | /** | 531 | /** |
@@ -579,6 +584,7 @@ void hrtimer_init(struct hrtimer *timer, clockid_t clock_id, | |||
579 | timer->base = &bases[clock_id]; | 584 | timer->base = &bases[clock_id]; |
580 | timer->node.rb_parent = HRTIMER_INACTIVE; | 585 | timer->node.rb_parent = HRTIMER_INACTIVE; |
581 | } | 586 | } |
587 | EXPORT_SYMBOL_GPL(hrtimer_init); | ||
582 | 588 | ||
583 | /** | 589 | /** |
584 | * hrtimer_get_res - get the timer resolution for a clock | 590 | * hrtimer_get_res - get the timer resolution for a clock |
@@ -598,6 +604,7 @@ int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp) | |||
598 | 604 | ||
599 | return 0; | 605 | return 0; |
600 | } | 606 | } |
607 | EXPORT_SYMBOL_GPL(hrtimer_get_res); | ||
601 | 608 | ||
602 | /* | 609 | /* |
603 | * Expire the per base hrtimer-queue: | 610 | * Expire the per base hrtimer-queue: |
@@ -606,6 +613,9 @@ static inline void run_hrtimer_queue(struct hrtimer_base *base) | |||
606 | { | 613 | { |
607 | struct rb_node *node; | 614 | struct rb_node *node; |
608 | 615 | ||
616 | if (!base->first) | ||
617 | return; | ||
618 | |||
609 | if (base->get_softirq_time) | 619 | if (base->get_softirq_time) |
610 | base->softirq_time = base->get_softirq_time(); | 620 | base->softirq_time = base->get_softirq_time(); |
611 | 621 | ||
@@ -655,29 +665,28 @@ void hrtimer_run_queues(void) | |||
655 | /* | 665 | /* |
656 | * Sleep related functions: | 666 | * Sleep related functions: |
657 | */ | 667 | */ |
658 | 668 | static int hrtimer_wakeup(struct hrtimer *timer) | |
659 | struct sleep_hrtimer { | ||
660 | struct hrtimer timer; | ||
661 | struct task_struct *task; | ||
662 | int expired; | ||
663 | }; | ||
664 | |||
665 | static int nanosleep_wakeup(struct hrtimer *timer) | ||
666 | { | 669 | { |
667 | struct sleep_hrtimer *t = | 670 | struct hrtimer_sleeper *t = |
668 | container_of(timer, struct sleep_hrtimer, timer); | 671 | container_of(timer, struct hrtimer_sleeper, timer); |
672 | struct task_struct *task = t->task; | ||
669 | 673 | ||
670 | t->expired = 1; | 674 | t->task = NULL; |
671 | wake_up_process(t->task); | 675 | if (task) |
676 | wake_up_process(task); | ||
672 | 677 | ||
673 | return HRTIMER_NORESTART; | 678 | return HRTIMER_NORESTART; |
674 | } | 679 | } |
675 | 680 | ||
676 | static int __sched do_nanosleep(struct sleep_hrtimer *t, enum hrtimer_mode mode) | 681 | void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, task_t *task) |
677 | { | 682 | { |
678 | t->timer.function = nanosleep_wakeup; | 683 | sl->timer.function = hrtimer_wakeup; |
679 | t->task = current; | 684 | sl->task = task; |
680 | t->expired = 0; | 685 | } |
686 | |||
687 | static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mode) | ||
688 | { | ||
689 | hrtimer_init_sleeper(t, current); | ||
681 | 690 | ||
682 | do { | 691 | do { |
683 | set_current_state(TASK_INTERRUPTIBLE); | 692 | set_current_state(TASK_INTERRUPTIBLE); |
@@ -685,18 +694,17 @@ static int __sched do_nanosleep(struct sleep_hrtimer *t, enum hrtimer_mode mode) | |||
685 | 694 | ||
686 | schedule(); | 695 | schedule(); |
687 | 696 | ||
688 | if (unlikely(!t->expired)) { | 697 | hrtimer_cancel(&t->timer); |
689 | hrtimer_cancel(&t->timer); | 698 | mode = HRTIMER_ABS; |
690 | mode = HRTIMER_ABS; | 699 | |
691 | } | 700 | } while (t->task && !signal_pending(current)); |
692 | } while (!t->expired && !signal_pending(current)); | ||
693 | 701 | ||
694 | return t->expired; | 702 | return t->task == NULL; |
695 | } | 703 | } |
696 | 704 | ||
697 | static long __sched nanosleep_restart(struct restart_block *restart) | 705 | static long __sched nanosleep_restart(struct restart_block *restart) |
698 | { | 706 | { |
699 | struct sleep_hrtimer t; | 707 | struct hrtimer_sleeper t; |
700 | struct timespec __user *rmtp; | 708 | struct timespec __user *rmtp; |
701 | struct timespec tu; | 709 | struct timespec tu; |
702 | ktime_t time; | 710 | ktime_t time; |
@@ -729,7 +737,7 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp, | |||
729 | const enum hrtimer_mode mode, const clockid_t clockid) | 737 | const enum hrtimer_mode mode, const clockid_t clockid) |
730 | { | 738 | { |
731 | struct restart_block *restart; | 739 | struct restart_block *restart; |
732 | struct sleep_hrtimer t; | 740 | struct hrtimer_sleeper t; |
733 | struct timespec tu; | 741 | struct timespec tu; |
734 | ktime_t rem; | 742 | ktime_t rem; |
735 | 743 | ||
@@ -834,7 +842,7 @@ static void migrate_hrtimers(int cpu) | |||
834 | } | 842 | } |
835 | #endif /* CONFIG_HOTPLUG_CPU */ | 843 | #endif /* CONFIG_HOTPLUG_CPU */ |
836 | 844 | ||
837 | static int __devinit hrtimer_cpu_notify(struct notifier_block *self, | 845 | static int hrtimer_cpu_notify(struct notifier_block *self, |
838 | unsigned long action, void *hcpu) | 846 | unsigned long action, void *hcpu) |
839 | { | 847 | { |
840 | long cpu = (long)hcpu; | 848 | long cpu = (long)hcpu; |
@@ -858,7 +866,7 @@ static int __devinit hrtimer_cpu_notify(struct notifier_block *self, | |||
858 | return NOTIFY_OK; | 866 | return NOTIFY_OK; |
859 | } | 867 | } |
860 | 868 | ||
861 | static struct notifier_block __devinitdata hrtimers_nb = { | 869 | static struct notifier_block hrtimers_nb = { |
862 | .notifier_call = hrtimer_cpu_notify, | 870 | .notifier_call = hrtimer_cpu_notify, |
863 | }; | 871 | }; |
864 | 872 | ||
diff --git a/kernel/irq/Makefile b/kernel/irq/Makefile index 2b33f852be..9f77f50d81 100644 --- a/kernel/irq/Makefile +++ b/kernel/irq/Makefile | |||
@@ -1,4 +1,5 @@ | |||
1 | 1 | ||
2 | obj-y := handle.o manage.o spurious.o migration.o | 2 | obj-y := handle.o manage.o spurious.o |
3 | obj-$(CONFIG_GENERIC_IRQ_PROBE) += autoprobe.o | 3 | obj-$(CONFIG_GENERIC_IRQ_PROBE) += autoprobe.o |
4 | obj-$(CONFIG_PROC_FS) += proc.o | 4 | obj-$(CONFIG_PROC_FS) += proc.o |
5 | obj-$(CONFIG_GENERIC_PENDING_IRQ) += migration.o | ||
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index ac766ad573..1279e34995 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c | |||
@@ -246,8 +246,10 @@ int setup_irq(unsigned int irq, struct irqaction * new) | |||
246 | 246 | ||
247 | mismatch: | 247 | mismatch: |
248 | spin_unlock_irqrestore(&desc->lock, flags); | 248 | spin_unlock_irqrestore(&desc->lock, flags); |
249 | printk(KERN_ERR "%s: irq handler mismatch\n", __FUNCTION__); | 249 | if (!(new->flags & SA_PROBEIRQ)) { |
250 | dump_stack(); | 250 | printk(KERN_ERR "%s: irq handler mismatch\n", __FUNCTION__); |
251 | dump_stack(); | ||
252 | } | ||
251 | return -EBUSY; | 253 | return -EBUSY; |
252 | } | 254 | } |
253 | 255 | ||
diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c index 52a8655fa0..134f9f2e0e 100644 --- a/kernel/irq/migration.c +++ b/kernel/irq/migration.c | |||
@@ -1,6 +1,5 @@ | |||
1 | #include <linux/irq.h> | ||
2 | 1 | ||
3 | #if defined(CONFIG_GENERIC_PENDING_IRQ) | 2 | #include <linux/irq.h> |
4 | 3 | ||
5 | void set_pending_irq(unsigned int irq, cpumask_t mask) | 4 | void set_pending_irq(unsigned int irq, cpumask_t mask) |
6 | { | 5 | { |
@@ -61,5 +60,3 @@ void move_native_irq(int irq) | |||
61 | } | 60 | } |
62 | cpus_clear(pending_irq_cpumask[irq]); | 61 | cpus_clear(pending_irq_cpumask[irq]); |
63 | } | 62 | } |
64 | |||
65 | #endif | ||
diff --git a/kernel/kmod.c b/kernel/kmod.c index 51a892063a..20a997c73c 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c | |||
@@ -170,7 +170,7 @@ static int wait_for_helper(void *data) | |||
170 | sa.sa.sa_handler = SIG_IGN; | 170 | sa.sa.sa_handler = SIG_IGN; |
171 | sa.sa.sa_flags = 0; | 171 | sa.sa.sa_flags = 0; |
172 | siginitset(&sa.sa.sa_mask, sigmask(SIGCHLD)); | 172 | siginitset(&sa.sa.sa_mask, sigmask(SIGCHLD)); |
173 | do_sigaction(SIGCHLD, &sa, (struct k_sigaction *)0); | 173 | do_sigaction(SIGCHLD, &sa, NULL); |
174 | allow_signal(SIGCHLD); | 174 | allow_signal(SIGCHLD); |
175 | 175 | ||
176 | pid = kernel_thread(____call_usermodehelper, sub_info, SIGCHLD); | 176 | pid = kernel_thread(____call_usermodehelper, sub_info, SIGCHLD); |
diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 1156eb0977..1fbf466a29 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c | |||
@@ -585,6 +585,9 @@ int __kprobes register_kretprobe(struct kretprobe *rp) | |||
585 | int i; | 585 | int i; |
586 | 586 | ||
587 | rp->kp.pre_handler = pre_handler_kretprobe; | 587 | rp->kp.pre_handler = pre_handler_kretprobe; |
588 | rp->kp.post_handler = NULL; | ||
589 | rp->kp.fault_handler = NULL; | ||
590 | rp->kp.break_handler = NULL; | ||
588 | 591 | ||
589 | /* Pre-allocate memory for max kretprobe instances */ | 592 | /* Pre-allocate memory for max kretprobe instances */ |
590 | if (rp->maxactive <= 0) { | 593 | if (rp->maxactive <= 0) { |
diff --git a/kernel/module.c b/kernel/module.c index ddfe45ac2f..bbe04862e1 100644 --- a/kernel/module.c +++ b/kernel/module.c | |||
@@ -64,26 +64,17 @@ static DEFINE_SPINLOCK(modlist_lock); | |||
64 | static DEFINE_MUTEX(module_mutex); | 64 | static DEFINE_MUTEX(module_mutex); |
65 | static LIST_HEAD(modules); | 65 | static LIST_HEAD(modules); |
66 | 66 | ||
67 | static DEFINE_MUTEX(notify_mutex); | 67 | static BLOCKING_NOTIFIER_HEAD(module_notify_list); |
68 | static struct notifier_block * module_notify_list; | ||
69 | 68 | ||
70 | int register_module_notifier(struct notifier_block * nb) | 69 | int register_module_notifier(struct notifier_block * nb) |
71 | { | 70 | { |
72 | int err; | 71 | return blocking_notifier_chain_register(&module_notify_list, nb); |
73 | mutex_lock(¬ify_mutex); | ||
74 | err = notifier_chain_register(&module_notify_list, nb); | ||
75 | mutex_unlock(¬ify_mutex); | ||
76 | return err; | ||
77 | } | 72 | } |
78 | EXPORT_SYMBOL(register_module_notifier); | 73 | EXPORT_SYMBOL(register_module_notifier); |
79 | 74 | ||
80 | int unregister_module_notifier(struct notifier_block * nb) | 75 | int unregister_module_notifier(struct notifier_block * nb) |
81 | { | 76 | { |
82 | int err; | 77 | return blocking_notifier_chain_unregister(&module_notify_list, nb); |
83 | mutex_lock(¬ify_mutex); | ||
84 | err = notifier_chain_unregister(&module_notify_list, nb); | ||
85 | mutex_unlock(¬ify_mutex); | ||
86 | return err; | ||
87 | } | 78 | } |
88 | EXPORT_SYMBOL(unregister_module_notifier); | 79 | EXPORT_SYMBOL(unregister_module_notifier); |
89 | 80 | ||
@@ -136,7 +127,7 @@ extern const unsigned long __start___kcrctab_gpl_future[]; | |||
136 | #ifndef CONFIG_MODVERSIONS | 127 | #ifndef CONFIG_MODVERSIONS |
137 | #define symversion(base, idx) NULL | 128 | #define symversion(base, idx) NULL |
138 | #else | 129 | #else |
139 | #define symversion(base, idx) ((base) ? ((base) + (idx)) : NULL) | 130 | #define symversion(base, idx) ((base != NULL) ? ((base) + (idx)) : NULL) |
140 | #endif | 131 | #endif |
141 | 132 | ||
142 | /* lookup symbol in given range of kernel_symbols */ | 133 | /* lookup symbol in given range of kernel_symbols */ |
@@ -714,14 +705,14 @@ EXPORT_SYMBOL(__symbol_put); | |||
714 | 705 | ||
715 | void symbol_put_addr(void *addr) | 706 | void symbol_put_addr(void *addr) |
716 | { | 707 | { |
717 | unsigned long flags; | 708 | struct module *modaddr; |
718 | 709 | ||
719 | spin_lock_irqsave(&modlist_lock, flags); | 710 | if (core_kernel_text((unsigned long)addr)) |
720 | if (!kernel_text_address((unsigned long)addr)) | 711 | return; |
721 | BUG(); | ||
722 | 712 | ||
723 | module_put(module_text_address((unsigned long)addr)); | 713 | if (!(modaddr = module_text_address((unsigned long)addr))) |
724 | spin_unlock_irqrestore(&modlist_lock, flags); | 714 | BUG(); |
715 | module_put(modaddr); | ||
725 | } | 716 | } |
726 | EXPORT_SYMBOL_GPL(symbol_put_addr); | 717 | EXPORT_SYMBOL_GPL(symbol_put_addr); |
727 | 718 | ||
@@ -1263,6 +1254,7 @@ static inline int license_is_gpl_compatible(const char *license) | |||
1263 | || strcmp(license, "GPL v2") == 0 | 1254 | || strcmp(license, "GPL v2") == 0 |
1264 | || strcmp(license, "GPL and additional rights") == 0 | 1255 | || strcmp(license, "GPL and additional rights") == 0 |
1265 | || strcmp(license, "Dual BSD/GPL") == 0 | 1256 | || strcmp(license, "Dual BSD/GPL") == 0 |
1257 | || strcmp(license, "Dual MIT/GPL") == 0 | ||
1266 | || strcmp(license, "Dual MPL/GPL") == 0); | 1258 | || strcmp(license, "Dual MPL/GPL") == 0); |
1267 | } | 1259 | } |
1268 | 1260 | ||
@@ -1816,9 +1808,8 @@ sys_init_module(void __user *umod, | |||
1816 | /* Drop lock so they can recurse */ | 1808 | /* Drop lock so they can recurse */ |
1817 | mutex_unlock(&module_mutex); | 1809 | mutex_unlock(&module_mutex); |
1818 | 1810 | ||
1819 | mutex_lock(¬ify_mutex); | 1811 | blocking_notifier_call_chain(&module_notify_list, |
1820 | notifier_call_chain(&module_notify_list, MODULE_STATE_COMING, mod); | 1812 | MODULE_STATE_COMING, mod); |
1821 | mutex_unlock(¬ify_mutex); | ||
1822 | 1813 | ||
1823 | /* Start the module */ | 1814 | /* Start the module */ |
1824 | if (mod->init != NULL) | 1815 | if (mod->init != NULL) |
diff --git a/kernel/panic.c b/kernel/panic.c index acd95adddb..cc2a4c9c36 100644 --- a/kernel/panic.c +++ b/kernel/panic.c | |||
@@ -27,9 +27,8 @@ static int pause_on_oops_flag; | |||
27 | static DEFINE_SPINLOCK(pause_on_oops_lock); | 27 | static DEFINE_SPINLOCK(pause_on_oops_lock); |
28 | 28 | ||
29 | int panic_timeout; | 29 | int panic_timeout; |
30 | EXPORT_SYMBOL(panic_timeout); | ||
31 | 30 | ||
32 | struct notifier_block *panic_notifier_list; | 31 | ATOMIC_NOTIFIER_HEAD(panic_notifier_list); |
33 | 32 | ||
34 | EXPORT_SYMBOL(panic_notifier_list); | 33 | EXPORT_SYMBOL(panic_notifier_list); |
35 | 34 | ||
@@ -97,7 +96,7 @@ NORET_TYPE void panic(const char * fmt, ...) | |||
97 | smp_send_stop(); | 96 | smp_send_stop(); |
98 | #endif | 97 | #endif |
99 | 98 | ||
100 | notifier_call_chain(&panic_notifier_list, 0, buf); | 99 | atomic_notifier_call_chain(&panic_notifier_list, 0, buf); |
101 | 100 | ||
102 | if (!panic_blink) | 101 | if (!panic_blink) |
103 | panic_blink = no_blink; | 102 | panic_blink = no_blink; |
diff --git a/kernel/params.c b/kernel/params.c index 9de637a5c8..af43ecdc8d 100644 --- a/kernel/params.c +++ b/kernel/params.c | |||
@@ -31,7 +31,7 @@ | |||
31 | #define DEBUGP(fmt, a...) | 31 | #define DEBUGP(fmt, a...) |
32 | #endif | 32 | #endif |
33 | 33 | ||
34 | static inline int dash2underscore(char c) | 34 | static inline char dash2underscore(char c) |
35 | { | 35 | { |
36 | if (c == '-') | 36 | if (c == '-') |
37 | return '_'; | 37 | return '_'; |
diff --git a/kernel/pid.c b/kernel/pid.c index 1acc072469..eeb836b65c 100644 --- a/kernel/pid.c +++ b/kernel/pid.c | |||
@@ -28,8 +28,9 @@ | |||
28 | #include <linux/hash.h> | 28 | #include <linux/hash.h> |
29 | 29 | ||
30 | #define pid_hashfn(nr) hash_long((unsigned long)nr, pidhash_shift) | 30 | #define pid_hashfn(nr) hash_long((unsigned long)nr, pidhash_shift) |
31 | static struct hlist_head *pid_hash[PIDTYPE_MAX]; | 31 | static struct hlist_head *pid_hash; |
32 | static int pidhash_shift; | 32 | static int pidhash_shift; |
33 | static kmem_cache_t *pid_cachep; | ||
33 | 34 | ||
34 | int pid_max = PID_MAX_DEFAULT; | 35 | int pid_max = PID_MAX_DEFAULT; |
35 | int last_pid; | 36 | int last_pid; |
@@ -60,9 +61,22 @@ typedef struct pidmap { | |||
60 | static pidmap_t pidmap_array[PIDMAP_ENTRIES] = | 61 | static pidmap_t pidmap_array[PIDMAP_ENTRIES] = |
61 | { [ 0 ... PIDMAP_ENTRIES-1 ] = { ATOMIC_INIT(BITS_PER_PAGE), NULL } }; | 62 | { [ 0 ... PIDMAP_ENTRIES-1 ] = { ATOMIC_INIT(BITS_PER_PAGE), NULL } }; |
62 | 63 | ||
64 | /* | ||
65 | * Note: disable interrupts while the pidmap_lock is held as an | ||
66 | * interrupt might come in and do read_lock(&tasklist_lock). | ||
67 | * | ||
68 | * If we don't disable interrupts there is a nasty deadlock between | ||
69 | * detach_pid()->free_pid() and another cpu that does | ||
70 | * spin_lock(&pidmap_lock) followed by an interrupt routine that does | ||
71 | * read_lock(&tasklist_lock); | ||
72 | * | ||
73 | * After we clean up the tasklist_lock and know there are no | ||
74 | * irq handlers that take it we can leave the interrupts enabled. | ||
75 | * For now it is easier to be safe than to prove it can't happen. | ||
76 | */ | ||
63 | static __cacheline_aligned_in_smp DEFINE_SPINLOCK(pidmap_lock); | 77 | static __cacheline_aligned_in_smp DEFINE_SPINLOCK(pidmap_lock); |
64 | 78 | ||
65 | fastcall void free_pidmap(int pid) | 79 | static fastcall void free_pidmap(int pid) |
66 | { | 80 | { |
67 | pidmap_t *map = pidmap_array + pid / BITS_PER_PAGE; | 81 | pidmap_t *map = pidmap_array + pid / BITS_PER_PAGE; |
68 | int offset = pid & BITS_PER_PAGE_MASK; | 82 | int offset = pid & BITS_PER_PAGE_MASK; |
@@ -71,7 +85,7 @@ fastcall void free_pidmap(int pid) | |||
71 | atomic_inc(&map->nr_free); | 85 | atomic_inc(&map->nr_free); |
72 | } | 86 | } |
73 | 87 | ||
74 | int alloc_pidmap(void) | 88 | static int alloc_pidmap(void) |
75 | { | 89 | { |
76 | int i, offset, max_scan, pid, last = last_pid; | 90 | int i, offset, max_scan, pid, last = last_pid; |
77 | pidmap_t *map; | 91 | pidmap_t *map; |
@@ -89,12 +103,12 @@ int alloc_pidmap(void) | |||
89 | * Free the page if someone raced with us | 103 | * Free the page if someone raced with us |
90 | * installing it: | 104 | * installing it: |
91 | */ | 105 | */ |
92 | spin_lock(&pidmap_lock); | 106 | spin_lock_irq(&pidmap_lock); |
93 | if (map->page) | 107 | if (map->page) |
94 | free_page(page); | 108 | free_page(page); |
95 | else | 109 | else |
96 | map->page = (void *)page; | 110 | map->page = (void *)page; |
97 | spin_unlock(&pidmap_lock); | 111 | spin_unlock_irq(&pidmap_lock); |
98 | if (unlikely(!map->page)) | 112 | if (unlikely(!map->page)) |
99 | break; | 113 | break; |
100 | } | 114 | } |
@@ -131,13 +145,73 @@ int alloc_pidmap(void) | |||
131 | return -1; | 145 | return -1; |
132 | } | 146 | } |
133 | 147 | ||
134 | struct pid * fastcall find_pid(enum pid_type type, int nr) | 148 | fastcall void put_pid(struct pid *pid) |
149 | { | ||
150 | if (!pid) | ||
151 | return; | ||
152 | if ((atomic_read(&pid->count) == 1) || | ||
153 | atomic_dec_and_test(&pid->count)) | ||
154 | kmem_cache_free(pid_cachep, pid); | ||
155 | } | ||
156 | |||
157 | static void delayed_put_pid(struct rcu_head *rhp) | ||
158 | { | ||
159 | struct pid *pid = container_of(rhp, struct pid, rcu); | ||
160 | put_pid(pid); | ||
161 | } | ||
162 | |||
163 | fastcall void free_pid(struct pid *pid) | ||
164 | { | ||
165 | /* We can be called with write_lock_irq(&tasklist_lock) held */ | ||
166 | unsigned long flags; | ||
167 | |||
168 | spin_lock_irqsave(&pidmap_lock, flags); | ||
169 | hlist_del_rcu(&pid->pid_chain); | ||
170 | spin_unlock_irqrestore(&pidmap_lock, flags); | ||
171 | |||
172 | free_pidmap(pid->nr); | ||
173 | call_rcu(&pid->rcu, delayed_put_pid); | ||
174 | } | ||
175 | |||
176 | struct pid *alloc_pid(void) | ||
177 | { | ||
178 | struct pid *pid; | ||
179 | enum pid_type type; | ||
180 | int nr = -1; | ||
181 | |||
182 | pid = kmem_cache_alloc(pid_cachep, GFP_KERNEL); | ||
183 | if (!pid) | ||
184 | goto out; | ||
185 | |||
186 | nr = alloc_pidmap(); | ||
187 | if (nr < 0) | ||
188 | goto out_free; | ||
189 | |||
190 | atomic_set(&pid->count, 1); | ||
191 | pid->nr = nr; | ||
192 | for (type = 0; type < PIDTYPE_MAX; ++type) | ||
193 | INIT_HLIST_HEAD(&pid->tasks[type]); | ||
194 | |||
195 | spin_lock_irq(&pidmap_lock); | ||
196 | hlist_add_head_rcu(&pid->pid_chain, &pid_hash[pid_hashfn(pid->nr)]); | ||
197 | spin_unlock_irq(&pidmap_lock); | ||
198 | |||
199 | out: | ||
200 | return pid; | ||
201 | |||
202 | out_free: | ||
203 | kmem_cache_free(pid_cachep, pid); | ||
204 | pid = NULL; | ||
205 | goto out; | ||
206 | } | ||
207 | |||
208 | struct pid * fastcall find_pid(int nr) | ||
135 | { | 209 | { |
136 | struct hlist_node *elem; | 210 | struct hlist_node *elem; |
137 | struct pid *pid; | 211 | struct pid *pid; |
138 | 212 | ||
139 | hlist_for_each_entry_rcu(pid, elem, | 213 | hlist_for_each_entry_rcu(pid, elem, |
140 | &pid_hash[type][pid_hashfn(nr)], pid_chain) { | 214 | &pid_hash[pid_hashfn(nr)], pid_chain) { |
141 | if (pid->nr == nr) | 215 | if (pid->nr == nr) |
142 | return pid; | 216 | return pid; |
143 | } | 217 | } |
@@ -146,105 +220,80 @@ struct pid * fastcall find_pid(enum pid_type type, int nr) | |||
146 | 220 | ||
147 | int fastcall attach_pid(task_t *task, enum pid_type type, int nr) | 221 | int fastcall attach_pid(task_t *task, enum pid_type type, int nr) |
148 | { | 222 | { |
149 | struct pid *pid, *task_pid; | 223 | struct pid_link *link; |
150 | 224 | struct pid *pid; | |
151 | task_pid = &task->pids[type]; | ||
152 | pid = find_pid(type, nr); | ||
153 | task_pid->nr = nr; | ||
154 | if (pid == NULL) { | ||
155 | INIT_LIST_HEAD(&task_pid->pid_list); | ||
156 | hlist_add_head_rcu(&task_pid->pid_chain, | ||
157 | &pid_hash[type][pid_hashfn(nr)]); | ||
158 | } else { | ||
159 | INIT_HLIST_NODE(&task_pid->pid_chain); | ||
160 | list_add_tail_rcu(&task_pid->pid_list, &pid->pid_list); | ||
161 | } | ||
162 | |||
163 | return 0; | ||
164 | } | ||
165 | |||
166 | static fastcall int __detach_pid(task_t *task, enum pid_type type) | ||
167 | { | ||
168 | struct pid *pid, *pid_next; | ||
169 | int nr = 0; | ||
170 | |||
171 | pid = &task->pids[type]; | ||
172 | if (!hlist_unhashed(&pid->pid_chain)) { | ||
173 | 225 | ||
174 | if (list_empty(&pid->pid_list)) { | 226 | WARN_ON(!task->pid); /* to be removed soon */ |
175 | nr = pid->nr; | 227 | WARN_ON(!nr); /* to be removed soon */ |
176 | hlist_del_rcu(&pid->pid_chain); | ||
177 | } else { | ||
178 | pid_next = list_entry(pid->pid_list.next, | ||
179 | struct pid, pid_list); | ||
180 | /* insert next pid from pid_list to hash */ | ||
181 | hlist_replace_rcu(&pid->pid_chain, | ||
182 | &pid_next->pid_chain); | ||
183 | } | ||
184 | } | ||
185 | 228 | ||
186 | list_del_rcu(&pid->pid_list); | 229 | link = &task->pids[type]; |
187 | pid->nr = 0; | 230 | link->pid = pid = find_pid(nr); |
231 | hlist_add_head_rcu(&link->node, &pid->tasks[type]); | ||
188 | 232 | ||
189 | return nr; | 233 | return 0; |
190 | } | 234 | } |
191 | 235 | ||
192 | void fastcall detach_pid(task_t *task, enum pid_type type) | 236 | void fastcall detach_pid(task_t *task, enum pid_type type) |
193 | { | 237 | { |
194 | int tmp, nr; | 238 | struct pid_link *link; |
239 | struct pid *pid; | ||
240 | int tmp; | ||
195 | 241 | ||
196 | nr = __detach_pid(task, type); | 242 | link = &task->pids[type]; |
197 | if (!nr) | 243 | pid = link->pid; |
198 | return; | 244 | |
245 | hlist_del_rcu(&link->node); | ||
246 | link->pid = NULL; | ||
199 | 247 | ||
200 | for (tmp = PIDTYPE_MAX; --tmp >= 0; ) | 248 | for (tmp = PIDTYPE_MAX; --tmp >= 0; ) |
201 | if (tmp != type && find_pid(tmp, nr)) | 249 | if (!hlist_empty(&pid->tasks[tmp])) |
202 | return; | 250 | return; |
203 | 251 | ||
204 | free_pidmap(nr); | 252 | free_pid(pid); |
205 | } | 253 | } |
206 | 254 | ||
207 | task_t *find_task_by_pid_type(int type, int nr) | 255 | struct task_struct * fastcall pid_task(struct pid *pid, enum pid_type type) |
208 | { | 256 | { |
209 | struct pid *pid; | 257 | struct task_struct *result = NULL; |
210 | 258 | if (pid) { | |
211 | pid = find_pid(type, nr); | 259 | struct hlist_node *first; |
212 | if (!pid) | 260 | first = rcu_dereference(pid->tasks[type].first); |
213 | return NULL; | 261 | if (first) |
262 | result = hlist_entry(first, struct task_struct, pids[(type)].node); | ||
263 | } | ||
264 | return result; | ||
265 | } | ||
214 | 266 | ||
215 | return pid_task(&pid->pid_list, type); | 267 | /* |
268 | * Must be called under rcu_read_lock() or with tasklist_lock read-held. | ||
269 | */ | ||
270 | task_t *find_task_by_pid_type(int type, int nr) | ||
271 | { | ||
272 | return pid_task(find_pid(nr), type); | ||
216 | } | 273 | } |
217 | 274 | ||
218 | EXPORT_SYMBOL(find_task_by_pid_type); | 275 | EXPORT_SYMBOL(find_task_by_pid_type); |
219 | 276 | ||
220 | /* | 277 | struct task_struct *fastcall get_pid_task(struct pid *pid, enum pid_type type) |
221 | * This function switches the PIDs if a non-leader thread calls | 278 | { |
222 | * sys_execve() - this must be done without releasing the PID. | 279 | struct task_struct *result; |
223 | * (which a detach_pid() would eventually do.) | 280 | rcu_read_lock(); |
224 | */ | 281 | result = pid_task(pid, type); |
225 | void switch_exec_pids(task_t *leader, task_t *thread) | 282 | if (result) |
283 | get_task_struct(result); | ||
284 | rcu_read_unlock(); | ||
285 | return result; | ||
286 | } | ||
287 | |||
288 | struct pid *find_get_pid(pid_t nr) | ||
226 | { | 289 | { |
227 | __detach_pid(leader, PIDTYPE_PID); | 290 | struct pid *pid; |
228 | __detach_pid(leader, PIDTYPE_TGID); | 291 | |
229 | __detach_pid(leader, PIDTYPE_PGID); | 292 | rcu_read_lock(); |
230 | __detach_pid(leader, PIDTYPE_SID); | 293 | pid = get_pid(find_pid(nr)); |
231 | 294 | rcu_read_unlock(); | |
232 | __detach_pid(thread, PIDTYPE_PID); | 295 | |
233 | __detach_pid(thread, PIDTYPE_TGID); | 296 | return pid; |
234 | |||
235 | leader->pid = leader->tgid = thread->pid; | ||
236 | thread->pid = thread->tgid; | ||
237 | |||
238 | attach_pid(thread, PIDTYPE_PID, thread->pid); | ||
239 | attach_pid(thread, PIDTYPE_TGID, thread->tgid); | ||
240 | attach_pid(thread, PIDTYPE_PGID, thread->signal->pgrp); | ||
241 | attach_pid(thread, PIDTYPE_SID, thread->signal->session); | ||
242 | list_add_tail(&thread->tasks, &init_task.tasks); | ||
243 | |||
244 | attach_pid(leader, PIDTYPE_PID, leader->pid); | ||
245 | attach_pid(leader, PIDTYPE_TGID, leader->tgid); | ||
246 | attach_pid(leader, PIDTYPE_PGID, leader->signal->pgrp); | ||
247 | attach_pid(leader, PIDTYPE_SID, leader->signal->session); | ||
248 | } | 297 | } |
249 | 298 | ||
250 | /* | 299 | /* |
@@ -254,7 +303,7 @@ void switch_exec_pids(task_t *leader, task_t *thread) | |||
254 | */ | 303 | */ |
255 | void __init pidhash_init(void) | 304 | void __init pidhash_init(void) |
256 | { | 305 | { |
257 | int i, j, pidhash_size; | 306 | int i, pidhash_size; |
258 | unsigned long megabytes = nr_kernel_pages >> (20 - PAGE_SHIFT); | 307 | unsigned long megabytes = nr_kernel_pages >> (20 - PAGE_SHIFT); |
259 | 308 | ||
260 | pidhash_shift = max(4, fls(megabytes * 4)); | 309 | pidhash_shift = max(4, fls(megabytes * 4)); |
@@ -263,30 +312,23 @@ void __init pidhash_init(void) | |||
263 | 312 | ||
264 | printk("PID hash table entries: %d (order: %d, %Zd bytes)\n", | 313 | printk("PID hash table entries: %d (order: %d, %Zd bytes)\n", |
265 | pidhash_size, pidhash_shift, | 314 | pidhash_size, pidhash_shift, |
266 | PIDTYPE_MAX * pidhash_size * sizeof(struct hlist_head)); | 315 | pidhash_size * sizeof(struct hlist_head)); |
267 | 316 | ||
268 | for (i = 0; i < PIDTYPE_MAX; i++) { | 317 | pid_hash = alloc_bootmem(pidhash_size * sizeof(*(pid_hash))); |
269 | pid_hash[i] = alloc_bootmem(pidhash_size * | 318 | if (!pid_hash) |
270 | sizeof(*(pid_hash[i]))); | 319 | panic("Could not alloc pidhash!\n"); |
271 | if (!pid_hash[i]) | 320 | for (i = 0; i < pidhash_size; i++) |
272 | panic("Could not alloc pidhash!\n"); | 321 | INIT_HLIST_HEAD(&pid_hash[i]); |
273 | for (j = 0; j < pidhash_size; j++) | ||
274 | INIT_HLIST_HEAD(&pid_hash[i][j]); | ||
275 | } | ||
276 | } | 322 | } |
277 | 323 | ||
278 | void __init pidmap_init(void) | 324 | void __init pidmap_init(void) |
279 | { | 325 | { |
280 | int i; | ||
281 | |||
282 | pidmap_array->page = (void *)get_zeroed_page(GFP_KERNEL); | 326 | pidmap_array->page = (void *)get_zeroed_page(GFP_KERNEL); |
327 | /* Reserve PID 0. We never call free_pidmap(0) */ | ||
283 | set_bit(0, pidmap_array->page); | 328 | set_bit(0, pidmap_array->page); |
284 | atomic_dec(&pidmap_array->nr_free); | 329 | atomic_dec(&pidmap_array->nr_free); |
285 | 330 | ||
286 | /* | 331 | pid_cachep = kmem_cache_create("pid", sizeof(struct pid), |
287 | * Allocate PID 0, and hash it via all PID types: | 332 | __alignof__(struct pid), |
288 | */ | 333 | SLAB_PANIC, NULL, NULL); |
289 | |||
290 | for (i = 0; i < PIDTYPE_MAX; i++) | ||
291 | attach_pid(current, i, 0); | ||
292 | } | 334 | } |
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index 9fd8d4f035..ce0dfb8f4a 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig | |||
@@ -41,7 +41,7 @@ config SOFTWARE_SUSPEND | |||
41 | depends on PM && SWAP && (X86 && (!SMP || SUSPEND_SMP)) || ((FRV || PPC32) && !SMP) | 41 | depends on PM && SWAP && (X86 && (!SMP || SUSPEND_SMP)) || ((FRV || PPC32) && !SMP) |
42 | ---help--- | 42 | ---help--- |
43 | Enable the possibility of suspending the machine. | 43 | Enable the possibility of suspending the machine. |
44 | It doesn't need APM. | 44 | It doesn't need ACPI or APM. |
45 | You may suspend your machine by 'swsusp' or 'shutdown -z <time>' | 45 | You may suspend your machine by 'swsusp' or 'shutdown -z <time>' |
46 | (patch for sysvinit needed). | 46 | (patch for sysvinit needed). |
47 | 47 | ||
diff --git a/kernel/power/main.c b/kernel/power/main.c index ee371f50cc..a6d9ef4600 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c | |||
@@ -272,7 +272,7 @@ static ssize_t state_store(struct subsystem * subsys, const char * buf, size_t n | |||
272 | if (*s && !strncmp(buf, *s, len)) | 272 | if (*s && !strncmp(buf, *s, len)) |
273 | break; | 273 | break; |
274 | } | 274 | } |
275 | if (*s) | 275 | if (state < PM_SUSPEND_MAX && *s) |
276 | error = enter_state(state); | 276 | error = enter_state(state); |
277 | else | 277 | else |
278 | error = -EINVAL; | 278 | error = -EINVAL; |
diff --git a/kernel/power/pm.c b/kernel/power/pm.c index 0f6908cce1..84063ac8fc 100644 --- a/kernel/power/pm.c +++ b/kernel/power/pm.c | |||
@@ -75,25 +75,6 @@ struct pm_dev *pm_register(pm_dev_t type, | |||
75 | return dev; | 75 | return dev; |
76 | } | 76 | } |
77 | 77 | ||
78 | /** | ||
79 | * pm_unregister - unregister a device with power management | ||
80 | * @dev: device to unregister | ||
81 | * | ||
82 | * Remove a device from the power management notification lists. The | ||
83 | * dev passed must be a handle previously returned by pm_register. | ||
84 | */ | ||
85 | |||
86 | void pm_unregister(struct pm_dev *dev) | ||
87 | { | ||
88 | if (dev) { | ||
89 | mutex_lock(&pm_devs_lock); | ||
90 | list_del(&dev->entry); | ||
91 | mutex_unlock(&pm_devs_lock); | ||
92 | |||
93 | kfree(dev); | ||
94 | } | ||
95 | } | ||
96 | |||
97 | static void __pm_unregister(struct pm_dev *dev) | 78 | static void __pm_unregister(struct pm_dev *dev) |
98 | { | 79 | { |
99 | if (dev) { | 80 | if (dev) { |
@@ -258,7 +239,6 @@ int pm_send_all(pm_request_t rqst, void *data) | |||
258 | } | 239 | } |
259 | 240 | ||
260 | EXPORT_SYMBOL(pm_register); | 241 | EXPORT_SYMBOL(pm_register); |
261 | EXPORT_SYMBOL(pm_unregister); | ||
262 | EXPORT_SYMBOL(pm_unregister_all); | 242 | EXPORT_SYMBOL(pm_unregister_all); |
263 | EXPORT_SYMBOL(pm_send_all); | 243 | EXPORT_SYMBOL(pm_send_all); |
264 | EXPORT_SYMBOL(pm_active); | 244 | EXPORT_SYMBOL(pm_active); |
diff --git a/kernel/power/process.c b/kernel/power/process.c index 8ac7c35fad..b2a5f671d6 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c | |||
@@ -26,8 +26,7 @@ static inline int freezeable(struct task_struct * p) | |||
26 | (p->flags & PF_NOFREEZE) || | 26 | (p->flags & PF_NOFREEZE) || |
27 | (p->exit_state == EXIT_ZOMBIE) || | 27 | (p->exit_state == EXIT_ZOMBIE) || |
28 | (p->exit_state == EXIT_DEAD) || | 28 | (p->exit_state == EXIT_DEAD) || |
29 | (p->state == TASK_STOPPED) || | 29 | (p->state == TASK_STOPPED)) |
30 | (p->state == TASK_TRACED)) | ||
31 | return 0; | 30 | return 0; |
32 | return 1; | 31 | return 1; |
33 | } | 32 | } |
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index c5863d02c8..3eeedbb13b 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c | |||
@@ -240,14 +240,15 @@ static void copy_data_pages(struct pbe *pblist) | |||
240 | * free_pagedir - free pages allocated with alloc_pagedir() | 240 | * free_pagedir - free pages allocated with alloc_pagedir() |
241 | */ | 241 | */ |
242 | 242 | ||
243 | static void free_pagedir(struct pbe *pblist) | 243 | static void free_pagedir(struct pbe *pblist, int clear_nosave_free) |
244 | { | 244 | { |
245 | struct pbe *pbe; | 245 | struct pbe *pbe; |
246 | 246 | ||
247 | while (pblist) { | 247 | while (pblist) { |
248 | pbe = (pblist + PB_PAGE_SKIP)->next; | 248 | pbe = (pblist + PB_PAGE_SKIP)->next; |
249 | ClearPageNosave(virt_to_page(pblist)); | 249 | ClearPageNosave(virt_to_page(pblist)); |
250 | ClearPageNosaveFree(virt_to_page(pblist)); | 250 | if (clear_nosave_free) |
251 | ClearPageNosaveFree(virt_to_page(pblist)); | ||
251 | free_page((unsigned long)pblist); | 252 | free_page((unsigned long)pblist); |
252 | pblist = pbe; | 253 | pblist = pbe; |
253 | } | 254 | } |
@@ -389,7 +390,7 @@ struct pbe *alloc_pagedir(unsigned int nr_pages, gfp_t gfp_mask, int safe_needed | |||
389 | pbe->next = alloc_image_page(gfp_mask, safe_needed); | 390 | pbe->next = alloc_image_page(gfp_mask, safe_needed); |
390 | } | 391 | } |
391 | if (!pbe) { /* get_zeroed_page() failed */ | 392 | if (!pbe) { /* get_zeroed_page() failed */ |
392 | free_pagedir(pblist); | 393 | free_pagedir(pblist, 1); |
393 | pblist = NULL; | 394 | pblist = NULL; |
394 | } else | 395 | } else |
395 | create_pbe_list(pblist, nr_pages); | 396 | create_pbe_list(pblist, nr_pages); |
@@ -736,7 +737,7 @@ static int create_image(struct snapshot_handle *handle) | |||
736 | pblist = alloc_pagedir(nr_copy_pages, GFP_ATOMIC, 1); | 737 | pblist = alloc_pagedir(nr_copy_pages, GFP_ATOMIC, 1); |
737 | if (pblist) | 738 | if (pblist) |
738 | copy_page_backup_list(pblist, p); | 739 | copy_page_backup_list(pblist, p); |
739 | free_pagedir(p); | 740 | free_pagedir(p, 0); |
740 | if (!pblist) | 741 | if (!pblist) |
741 | error = -ENOMEM; | 742 | error = -ENOMEM; |
742 | } | 743 | } |
diff --git a/kernel/printk.c b/kernel/printk.c index 8cc19431e7..c056f33244 100644 --- a/kernel/printk.c +++ b/kernel/printk.c | |||
@@ -360,8 +360,7 @@ static void call_console_drivers(unsigned long start, unsigned long end) | |||
360 | unsigned long cur_index, start_print; | 360 | unsigned long cur_index, start_print; |
361 | static int msg_level = -1; | 361 | static int msg_level = -1; |
362 | 362 | ||
363 | if (((long)(start - end)) > 0) | 363 | BUG_ON(((long)(start - end)) > 0); |
364 | BUG(); | ||
365 | 364 | ||
366 | cur_index = start; | 365 | cur_index = start; |
367 | start_print = start; | 366 | start_print = start; |
@@ -708,8 +707,7 @@ int __init add_preferred_console(char *name, int idx, char *options) | |||
708 | */ | 707 | */ |
709 | void acquire_console_sem(void) | 708 | void acquire_console_sem(void) |
710 | { | 709 | { |
711 | if (in_interrupt()) | 710 | BUG_ON(in_interrupt()); |
712 | BUG(); | ||
713 | down(&console_sem); | 711 | down(&console_sem); |
714 | console_locked = 1; | 712 | console_locked = 1; |
715 | console_may_schedule = 1; | 713 | console_may_schedule = 1; |
diff --git a/kernel/profile.c b/kernel/profile.c index ad81f799a9..68afe121e5 100644 --- a/kernel/profile.c +++ b/kernel/profile.c | |||
@@ -87,72 +87,52 @@ void __init profile_init(void) | |||
87 | 87 | ||
88 | #ifdef CONFIG_PROFILING | 88 | #ifdef CONFIG_PROFILING |
89 | 89 | ||
90 | static DECLARE_RWSEM(profile_rwsem); | 90 | static BLOCKING_NOTIFIER_HEAD(task_exit_notifier); |
91 | static DEFINE_RWLOCK(handoff_lock); | 91 | static ATOMIC_NOTIFIER_HEAD(task_free_notifier); |
92 | static struct notifier_block * task_exit_notifier; | 92 | static BLOCKING_NOTIFIER_HEAD(munmap_notifier); |
93 | static struct notifier_block * task_free_notifier; | ||
94 | static struct notifier_block * munmap_notifier; | ||
95 | 93 | ||
96 | void profile_task_exit(struct task_struct * task) | 94 | void profile_task_exit(struct task_struct * task) |
97 | { | 95 | { |
98 | down_read(&profile_rwsem); | 96 | blocking_notifier_call_chain(&task_exit_notifier, 0, task); |
99 | notifier_call_chain(&task_exit_notifier, 0, task); | ||
100 | up_read(&profile_rwsem); | ||
101 | } | 97 | } |
102 | 98 | ||
103 | int profile_handoff_task(struct task_struct * task) | 99 | int profile_handoff_task(struct task_struct * task) |
104 | { | 100 | { |
105 | int ret; | 101 | int ret; |
106 | read_lock(&handoff_lock); | 102 | ret = atomic_notifier_call_chain(&task_free_notifier, 0, task); |
107 | ret = notifier_call_chain(&task_free_notifier, 0, task); | ||
108 | read_unlock(&handoff_lock); | ||
109 | return (ret == NOTIFY_OK) ? 1 : 0; | 103 | return (ret == NOTIFY_OK) ? 1 : 0; |
110 | } | 104 | } |
111 | 105 | ||
112 | void profile_munmap(unsigned long addr) | 106 | void profile_munmap(unsigned long addr) |
113 | { | 107 | { |
114 | down_read(&profile_rwsem); | 108 | blocking_notifier_call_chain(&munmap_notifier, 0, (void *)addr); |
115 | notifier_call_chain(&munmap_notifier, 0, (void *)addr); | ||
116 | up_read(&profile_rwsem); | ||
117 | } | 109 | } |
118 | 110 | ||
119 | int task_handoff_register(struct notifier_block * n) | 111 | int task_handoff_register(struct notifier_block * n) |
120 | { | 112 | { |
121 | int err = -EINVAL; | 113 | return atomic_notifier_chain_register(&task_free_notifier, n); |
122 | |||
123 | write_lock(&handoff_lock); | ||
124 | err = notifier_chain_register(&task_free_notifier, n); | ||
125 | write_unlock(&handoff_lock); | ||
126 | return err; | ||
127 | } | 114 | } |
128 | 115 | ||
129 | int task_handoff_unregister(struct notifier_block * n) | 116 | int task_handoff_unregister(struct notifier_block * n) |
130 | { | 117 | { |
131 | int err = -EINVAL; | 118 | return atomic_notifier_chain_unregister(&task_free_notifier, n); |
132 | |||
133 | write_lock(&handoff_lock); | ||
134 | err = notifier_chain_unregister(&task_free_notifier, n); | ||
135 | write_unlock(&handoff_lock); | ||
136 | return err; | ||
137 | } | 119 | } |
138 | 120 | ||
139 | int profile_event_register(enum profile_type type, struct notifier_block * n) | 121 | int profile_event_register(enum profile_type type, struct notifier_block * n) |
140 | { | 122 | { |
141 | int err = -EINVAL; | 123 | int err = -EINVAL; |
142 | 124 | ||
143 | down_write(&profile_rwsem); | ||
144 | |||
145 | switch (type) { | 125 | switch (type) { |
146 | case PROFILE_TASK_EXIT: | 126 | case PROFILE_TASK_EXIT: |
147 | err = notifier_chain_register(&task_exit_notifier, n); | 127 | err = blocking_notifier_chain_register( |
128 | &task_exit_notifier, n); | ||
148 | break; | 129 | break; |
149 | case PROFILE_MUNMAP: | 130 | case PROFILE_MUNMAP: |
150 | err = notifier_chain_register(&munmap_notifier, n); | 131 | err = blocking_notifier_chain_register( |
132 | &munmap_notifier, n); | ||
151 | break; | 133 | break; |
152 | } | 134 | } |
153 | 135 | ||
154 | up_write(&profile_rwsem); | ||
155 | |||
156 | return err; | 136 | return err; |
157 | } | 137 | } |
158 | 138 | ||
@@ -161,18 +141,17 @@ int profile_event_unregister(enum profile_type type, struct notifier_block * n) | |||
161 | { | 141 | { |
162 | int err = -EINVAL; | 142 | int err = -EINVAL; |
163 | 143 | ||
164 | down_write(&profile_rwsem); | ||
165 | |||
166 | switch (type) { | 144 | switch (type) { |
167 | case PROFILE_TASK_EXIT: | 145 | case PROFILE_TASK_EXIT: |
168 | err = notifier_chain_unregister(&task_exit_notifier, n); | 146 | err = blocking_notifier_chain_unregister( |
147 | &task_exit_notifier, n); | ||
169 | break; | 148 | break; |
170 | case PROFILE_MUNMAP: | 149 | case PROFILE_MUNMAP: |
171 | err = notifier_chain_unregister(&munmap_notifier, n); | 150 | err = blocking_notifier_chain_unregister( |
151 | &munmap_notifier, n); | ||
172 | break; | 152 | break; |
173 | } | 153 | } |
174 | 154 | ||
175 | up_write(&profile_rwsem); | ||
176 | return err; | 155 | return err; |
177 | } | 156 | } |
178 | 157 | ||
@@ -320,7 +299,7 @@ out: | |||
320 | } | 299 | } |
321 | 300 | ||
322 | #ifdef CONFIG_HOTPLUG_CPU | 301 | #ifdef CONFIG_HOTPLUG_CPU |
323 | static int __devinit profile_cpu_callback(struct notifier_block *info, | 302 | static int profile_cpu_callback(struct notifier_block *info, |
324 | unsigned long action, void *__cpu) | 303 | unsigned long action, void *__cpu) |
325 | { | 304 | { |
326 | int node, cpu = (unsigned long)__cpu; | 305 | int node, cpu = (unsigned long)__cpu; |
diff --git a/kernel/ptrace.c b/kernel/ptrace.c index d95a72c927..921c22ad16 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c | |||
@@ -30,14 +30,13 @@ | |||
30 | */ | 30 | */ |
31 | void __ptrace_link(task_t *child, task_t *new_parent) | 31 | void __ptrace_link(task_t *child, task_t *new_parent) |
32 | { | 32 | { |
33 | if (!list_empty(&child->ptrace_list)) | 33 | BUG_ON(!list_empty(&child->ptrace_list)); |
34 | BUG(); | ||
35 | if (child->parent == new_parent) | 34 | if (child->parent == new_parent) |
36 | return; | 35 | return; |
37 | list_add(&child->ptrace_list, &child->parent->ptrace_children); | 36 | list_add(&child->ptrace_list, &child->parent->ptrace_children); |
38 | REMOVE_LINKS(child); | 37 | remove_parent(child); |
39 | child->parent = new_parent; | 38 | child->parent = new_parent; |
40 | SET_LINKS(child); | 39 | add_parent(child); |
41 | } | 40 | } |
42 | 41 | ||
43 | /* | 42 | /* |
@@ -57,10 +56,6 @@ void ptrace_untrace(task_t *child) | |||
57 | signal_wake_up(child, 1); | 56 | signal_wake_up(child, 1); |
58 | } | 57 | } |
59 | } | 58 | } |
60 | if (child->signal->flags & SIGNAL_GROUP_EXIT) { | ||
61 | sigaddset(&child->pending.signal, SIGKILL); | ||
62 | signal_wake_up(child, 1); | ||
63 | } | ||
64 | spin_unlock(&child->sighand->siglock); | 59 | spin_unlock(&child->sighand->siglock); |
65 | } | 60 | } |
66 | 61 | ||
@@ -77,12 +72,13 @@ void __ptrace_unlink(task_t *child) | |||
77 | child->ptrace = 0; | 72 | child->ptrace = 0; |
78 | if (!list_empty(&child->ptrace_list)) { | 73 | if (!list_empty(&child->ptrace_list)) { |
79 | list_del_init(&child->ptrace_list); | 74 | list_del_init(&child->ptrace_list); |
80 | REMOVE_LINKS(child); | 75 | remove_parent(child); |
81 | child->parent = child->real_parent; | 76 | child->parent = child->real_parent; |
82 | SET_LINKS(child); | 77 | add_parent(child); |
83 | } | 78 | } |
84 | 79 | ||
85 | ptrace_untrace(child); | 80 | if (child->state == TASK_TRACED) |
81 | ptrace_untrace(child); | ||
86 | } | 82 | } |
87 | 83 | ||
88 | /* | 84 | /* |
@@ -152,12 +148,34 @@ int ptrace_may_attach(struct task_struct *task) | |||
152 | int ptrace_attach(struct task_struct *task) | 148 | int ptrace_attach(struct task_struct *task) |
153 | { | 149 | { |
154 | int retval; | 150 | int retval; |
155 | task_lock(task); | 151 | |
156 | retval = -EPERM; | 152 | retval = -EPERM; |
157 | if (task->pid <= 1) | 153 | if (task->pid <= 1) |
158 | goto bad; | 154 | goto out; |
159 | if (task->tgid == current->tgid) | 155 | if (task->tgid == current->tgid) |
160 | goto bad; | 156 | goto out; |
157 | |||
158 | repeat: | ||
159 | /* | ||
160 | * Nasty, nasty. | ||
161 | * | ||
162 | * We want to hold both the task-lock and the | ||
163 | * tasklist_lock for writing at the same time. | ||
164 | * But that's against the rules (tasklist_lock | ||
165 | * is taken for reading by interrupts on other | ||
166 | * cpu's that may have task_lock). | ||
167 | */ | ||
168 | task_lock(task); | ||
169 | local_irq_disable(); | ||
170 | if (!write_trylock(&tasklist_lock)) { | ||
171 | local_irq_enable(); | ||
172 | task_unlock(task); | ||
173 | do { | ||
174 | cpu_relax(); | ||
175 | } while (!write_can_lock(&tasklist_lock)); | ||
176 | goto repeat; | ||
177 | } | ||
178 | |||
161 | /* the same process cannot be attached many times */ | 179 | /* the same process cannot be attached many times */ |
162 | if (task->ptrace & PT_PTRACED) | 180 | if (task->ptrace & PT_PTRACED) |
163 | goto bad; | 181 | goto bad; |
@@ -170,17 +188,15 @@ int ptrace_attach(struct task_struct *task) | |||
170 | ? PT_ATTACHED : 0); | 188 | ? PT_ATTACHED : 0); |
171 | if (capable(CAP_SYS_PTRACE)) | 189 | if (capable(CAP_SYS_PTRACE)) |
172 | task->ptrace |= PT_PTRACE_CAP; | 190 | task->ptrace |= PT_PTRACE_CAP; |
173 | task_unlock(task); | ||
174 | 191 | ||
175 | write_lock_irq(&tasklist_lock); | ||
176 | __ptrace_link(task, current); | 192 | __ptrace_link(task, current); |
177 | write_unlock_irq(&tasklist_lock); | ||
178 | 193 | ||
179 | force_sig_specific(SIGSTOP, task); | 194 | force_sig_specific(SIGSTOP, task); |
180 | return 0; | ||
181 | 195 | ||
182 | bad: | 196 | bad: |
197 | write_unlock_irq(&tasklist_lock); | ||
183 | task_unlock(task); | 198 | task_unlock(task); |
199 | out: | ||
184 | return retval; | 200 | return retval; |
185 | } | 201 | } |
186 | 202 | ||
@@ -421,21 +437,22 @@ int ptrace_request(struct task_struct *child, long request, | |||
421 | */ | 437 | */ |
422 | int ptrace_traceme(void) | 438 | int ptrace_traceme(void) |
423 | { | 439 | { |
424 | int ret; | 440 | int ret = -EPERM; |
425 | 441 | ||
426 | /* | 442 | /* |
427 | * Are we already being traced? | 443 | * Are we already being traced? |
428 | */ | 444 | */ |
429 | if (current->ptrace & PT_PTRACED) | 445 | task_lock(current); |
430 | return -EPERM; | 446 | if (!(current->ptrace & PT_PTRACED)) { |
431 | ret = security_ptrace(current->parent, current); | 447 | ret = security_ptrace(current->parent, current); |
432 | if (ret) | 448 | /* |
433 | return -EPERM; | 449 | * Set the ptrace bit in the process ptrace flags. |
434 | /* | 450 | */ |
435 | * Set the ptrace bit in the process ptrace flags. | 451 | if (!ret) |
436 | */ | 452 | current->ptrace |= PT_PTRACED; |
437 | current->ptrace |= PT_PTRACED; | 453 | } |
438 | return 0; | 454 | task_unlock(current); |
455 | return ret; | ||
439 | } | 456 | } |
440 | 457 | ||
441 | /** | 458 | /** |
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index 13458bbaa1..2058f88c7b 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c | |||
@@ -479,12 +479,31 @@ static int __rcu_pending(struct rcu_ctrlblk *rcp, struct rcu_data *rdp) | |||
479 | return 0; | 479 | return 0; |
480 | } | 480 | } |
481 | 481 | ||
482 | /* | ||
483 | * Check to see if there is any immediate RCU-related work to be done | ||
484 | * by the current CPU, returning 1 if so. This function is part of the | ||
485 | * RCU implementation; it is -not- an exported member of the RCU API. | ||
486 | */ | ||
482 | int rcu_pending(int cpu) | 487 | int rcu_pending(int cpu) |
483 | { | 488 | { |
484 | return __rcu_pending(&rcu_ctrlblk, &per_cpu(rcu_data, cpu)) || | 489 | return __rcu_pending(&rcu_ctrlblk, &per_cpu(rcu_data, cpu)) || |
485 | __rcu_pending(&rcu_bh_ctrlblk, &per_cpu(rcu_bh_data, cpu)); | 490 | __rcu_pending(&rcu_bh_ctrlblk, &per_cpu(rcu_bh_data, cpu)); |
486 | } | 491 | } |
487 | 492 | ||
493 | /* | ||
494 | * Check to see if any future RCU-related work will need to be done | ||
495 | * by the current CPU, even if none need be done immediately, returning | ||
496 | * 1 if so. This function is part of the RCU implementation; it is -not- | ||
497 | * an exported member of the RCU API. | ||
498 | */ | ||
499 | int rcu_needs_cpu(int cpu) | ||
500 | { | ||
501 | struct rcu_data *rdp = &per_cpu(rcu_data, cpu); | ||
502 | struct rcu_data *rdp_bh = &per_cpu(rcu_bh_data, cpu); | ||
503 | |||
504 | return (!!rdp->curlist || !!rdp_bh->curlist || rcu_pending(cpu)); | ||
505 | } | ||
506 | |||
488 | void rcu_check_callbacks(int cpu, int user) | 507 | void rcu_check_callbacks(int cpu, int user) |
489 | { | 508 | { |
490 | if (user || | 509 | if (user || |
@@ -520,7 +539,7 @@ static void __devinit rcu_online_cpu(int cpu) | |||
520 | tasklet_init(&per_cpu(rcu_tasklet, cpu), rcu_process_callbacks, 0UL); | 539 | tasklet_init(&per_cpu(rcu_tasklet, cpu), rcu_process_callbacks, 0UL); |
521 | } | 540 | } |
522 | 541 | ||
523 | static int __devinit rcu_cpu_notify(struct notifier_block *self, | 542 | static int rcu_cpu_notify(struct notifier_block *self, |
524 | unsigned long action, void *hcpu) | 543 | unsigned long action, void *hcpu) |
525 | { | 544 | { |
526 | long cpu = (long)hcpu; | 545 | long cpu = (long)hcpu; |
@@ -537,7 +556,7 @@ static int __devinit rcu_cpu_notify(struct notifier_block *self, | |||
537 | return NOTIFY_OK; | 556 | return NOTIFY_OK; |
538 | } | 557 | } |
539 | 558 | ||
540 | static struct notifier_block __devinitdata rcu_nb = { | 559 | static struct notifier_block rcu_nb = { |
541 | .notifier_call = rcu_cpu_notify, | 560 | .notifier_call = rcu_cpu_notify, |
542 | }; | 561 | }; |
543 | 562 | ||
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c index b4b362b5ba..8154e7589d 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcutorture.c | |||
@@ -301,7 +301,7 @@ rcu_torture_printk(char *page) | |||
301 | long pipesummary[RCU_TORTURE_PIPE_LEN + 1] = { 0 }; | 301 | long pipesummary[RCU_TORTURE_PIPE_LEN + 1] = { 0 }; |
302 | long batchsummary[RCU_TORTURE_PIPE_LEN + 1] = { 0 }; | 302 | long batchsummary[RCU_TORTURE_PIPE_LEN + 1] = { 0 }; |
303 | 303 | ||
304 | for_each_cpu(cpu) { | 304 | for_each_possible_cpu(cpu) { |
305 | for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++) { | 305 | for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++) { |
306 | pipesummary[i] += per_cpu(rcu_torture_count, cpu)[i]; | 306 | pipesummary[i] += per_cpu(rcu_torture_count, cpu)[i]; |
307 | batchsummary[i] += per_cpu(rcu_torture_batch, cpu)[i]; | 307 | batchsummary[i] += per_cpu(rcu_torture_batch, cpu)[i]; |
@@ -535,7 +535,7 @@ rcu_torture_init(void) | |||
535 | atomic_set(&n_rcu_torture_error, 0); | 535 | atomic_set(&n_rcu_torture_error, 0); |
536 | for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++) | 536 | for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++) |
537 | atomic_set(&rcu_torture_wcount[i], 0); | 537 | atomic_set(&rcu_torture_wcount[i], 0); |
538 | for_each_cpu(cpu) { | 538 | for_each_possible_cpu(cpu) { |
539 | for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++) { | 539 | for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++) { |
540 | per_cpu(rcu_torture_count, cpu)[i] = 0; | 540 | per_cpu(rcu_torture_count, cpu)[i] = 0; |
541 | per_cpu(rcu_torture_batch, cpu)[i] = 0; | 541 | per_cpu(rcu_torture_batch, cpu)[i] = 0; |
diff --git a/kernel/sched.c b/kernel/sched.c index 78acdefecc..c13f1bd2df 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -145,7 +145,8 @@ | |||
145 | (v1) * (v2_max) / (v1_max) | 145 | (v1) * (v2_max) / (v1_max) |
146 | 146 | ||
147 | #define DELTA(p) \ | 147 | #define DELTA(p) \ |
148 | (SCALE(TASK_NICE(p), 40, MAX_BONUS) + INTERACTIVE_DELTA) | 148 | (SCALE(TASK_NICE(p) + 20, 40, MAX_BONUS) - 20 * MAX_BONUS / 40 + \ |
149 | INTERACTIVE_DELTA) | ||
149 | 150 | ||
150 | #define TASK_INTERACTIVE(p) \ | 151 | #define TASK_INTERACTIVE(p) \ |
151 | ((p)->prio <= (p)->static_prio - DELTA(p)) | 152 | ((p)->prio <= (p)->static_prio - DELTA(p)) |
@@ -666,9 +667,13 @@ static int effective_prio(task_t *p) | |||
666 | /* | 667 | /* |
667 | * __activate_task - move a task to the runqueue. | 668 | * __activate_task - move a task to the runqueue. |
668 | */ | 669 | */ |
669 | static inline void __activate_task(task_t *p, runqueue_t *rq) | 670 | static void __activate_task(task_t *p, runqueue_t *rq) |
670 | { | 671 | { |
671 | enqueue_task(p, rq->active); | 672 | prio_array_t *target = rq->active; |
673 | |||
674 | if (batch_task(p)) | ||
675 | target = rq->expired; | ||
676 | enqueue_task(p, target); | ||
672 | rq->nr_running++; | 677 | rq->nr_running++; |
673 | } | 678 | } |
674 | 679 | ||
@@ -687,7 +692,7 @@ static int recalc_task_prio(task_t *p, unsigned long long now) | |||
687 | unsigned long long __sleep_time = now - p->timestamp; | 692 | unsigned long long __sleep_time = now - p->timestamp; |
688 | unsigned long sleep_time; | 693 | unsigned long sleep_time; |
689 | 694 | ||
690 | if (unlikely(p->policy == SCHED_BATCH)) | 695 | if (batch_task(p)) |
691 | sleep_time = 0; | 696 | sleep_time = 0; |
692 | else { | 697 | else { |
693 | if (__sleep_time > NS_MAX_SLEEP_AVG) | 698 | if (__sleep_time > NS_MAX_SLEEP_AVG) |
@@ -699,21 +704,25 @@ static int recalc_task_prio(task_t *p, unsigned long long now) | |||
699 | if (likely(sleep_time > 0)) { | 704 | if (likely(sleep_time > 0)) { |
700 | /* | 705 | /* |
701 | * User tasks that sleep a long time are categorised as | 706 | * User tasks that sleep a long time are categorised as |
702 | * idle and will get just interactive status to stay active & | 707 | * idle. They will only have their sleep_avg increased to a |
703 | * prevent them suddenly becoming cpu hogs and starving | 708 | * level that makes them just interactive priority to stay |
704 | * other processes. | 709 | * active yet prevent them suddenly becoming cpu hogs and |
710 | * starving other processes. | ||
705 | */ | 711 | */ |
706 | if (p->mm && p->activated != -1 && | 712 | if (p->mm && sleep_time > INTERACTIVE_SLEEP(p)) { |
707 | sleep_time > INTERACTIVE_SLEEP(p)) { | 713 | unsigned long ceiling; |
708 | p->sleep_avg = JIFFIES_TO_NS(MAX_SLEEP_AVG - | 714 | |
709 | DEF_TIMESLICE); | 715 | ceiling = JIFFIES_TO_NS(MAX_SLEEP_AVG - |
716 | DEF_TIMESLICE); | ||
717 | if (p->sleep_avg < ceiling) | ||
718 | p->sleep_avg = ceiling; | ||
710 | } else { | 719 | } else { |
711 | /* | 720 | /* |
712 | * Tasks waking from uninterruptible sleep are | 721 | * Tasks waking from uninterruptible sleep are |
713 | * limited in their sleep_avg rise as they | 722 | * limited in their sleep_avg rise as they |
714 | * are likely to be waiting on I/O | 723 | * are likely to be waiting on I/O |
715 | */ | 724 | */ |
716 | if (p->activated == -1 && p->mm) { | 725 | if (p->sleep_type == SLEEP_NONINTERACTIVE && p->mm) { |
717 | if (p->sleep_avg >= INTERACTIVE_SLEEP(p)) | 726 | if (p->sleep_avg >= INTERACTIVE_SLEEP(p)) |
718 | sleep_time = 0; | 727 | sleep_time = 0; |
719 | else if (p->sleep_avg + sleep_time >= | 728 | else if (p->sleep_avg + sleep_time >= |
@@ -768,7 +777,7 @@ static void activate_task(task_t *p, runqueue_t *rq, int local) | |||
768 | * This checks to make sure it's not an uninterruptible task | 777 | * This checks to make sure it's not an uninterruptible task |
769 | * that is now waking up. | 778 | * that is now waking up. |
770 | */ | 779 | */ |
771 | if (!p->activated) { | 780 | if (p->sleep_type == SLEEP_NORMAL) { |
772 | /* | 781 | /* |
773 | * Tasks which were woken up by interrupts (ie. hw events) | 782 | * Tasks which were woken up by interrupts (ie. hw events) |
774 | * are most likely of interactive nature. So we give them | 783 | * are most likely of interactive nature. So we give them |
@@ -777,13 +786,13 @@ static void activate_task(task_t *p, runqueue_t *rq, int local) | |||
777 | * on a CPU, first time around: | 786 | * on a CPU, first time around: |
778 | */ | 787 | */ |
779 | if (in_interrupt()) | 788 | if (in_interrupt()) |
780 | p->activated = 2; | 789 | p->sleep_type = SLEEP_INTERRUPTED; |
781 | else { | 790 | else { |
782 | /* | 791 | /* |
783 | * Normal first-time wakeups get a credit too for | 792 | * Normal first-time wakeups get a credit too for |
784 | * on-runqueue time, but it will be weighted down: | 793 | * on-runqueue time, but it will be weighted down: |
785 | */ | 794 | */ |
786 | p->activated = 1; | 795 | p->sleep_type = SLEEP_INTERACTIVE; |
787 | } | 796 | } |
788 | } | 797 | } |
789 | p->timestamp = now; | 798 | p->timestamp = now; |
@@ -1271,19 +1280,19 @@ out_activate: | |||
1271 | * Tasks on involuntary sleep don't earn | 1280 | * Tasks on involuntary sleep don't earn |
1272 | * sleep_avg beyond just interactive state. | 1281 | * sleep_avg beyond just interactive state. |
1273 | */ | 1282 | */ |
1274 | p->activated = -1; | 1283 | p->sleep_type = SLEEP_NONINTERACTIVE; |
1275 | } | 1284 | } else |
1276 | 1285 | ||
1277 | /* | 1286 | /* |
1278 | * Tasks that have marked their sleep as noninteractive get | 1287 | * Tasks that have marked their sleep as noninteractive get |
1279 | * woken up without updating their sleep average. (i.e. their | 1288 | * woken up with their sleep average not weighted in an |
1280 | * sleep is handled in a priority-neutral manner, no priority | 1289 | * interactive way. |
1281 | * boost and no penalty.) | ||
1282 | */ | 1290 | */ |
1283 | if (old_state & TASK_NONINTERACTIVE) | 1291 | if (old_state & TASK_NONINTERACTIVE) |
1284 | __activate_task(p, rq); | 1292 | p->sleep_type = SLEEP_NONINTERACTIVE; |
1285 | else | 1293 | |
1286 | activate_task(p, rq, cpu == this_cpu); | 1294 | |
1295 | activate_task(p, rq, cpu == this_cpu); | ||
1287 | /* | 1296 | /* |
1288 | * Sync wakeups (i.e. those types of wakeups where the waker | 1297 | * Sync wakeups (i.e. those types of wakeups where the waker |
1289 | * has indicated that it will leave the CPU in short order) | 1298 | * has indicated that it will leave the CPU in short order) |
@@ -1624,7 +1633,7 @@ unsigned long nr_uninterruptible(void) | |||
1624 | { | 1633 | { |
1625 | unsigned long i, sum = 0; | 1634 | unsigned long i, sum = 0; |
1626 | 1635 | ||
1627 | for_each_cpu(i) | 1636 | for_each_possible_cpu(i) |
1628 | sum += cpu_rq(i)->nr_uninterruptible; | 1637 | sum += cpu_rq(i)->nr_uninterruptible; |
1629 | 1638 | ||
1630 | /* | 1639 | /* |
@@ -1641,7 +1650,7 @@ unsigned long long nr_context_switches(void) | |||
1641 | { | 1650 | { |
1642 | unsigned long long i, sum = 0; | 1651 | unsigned long long i, sum = 0; |
1643 | 1652 | ||
1644 | for_each_cpu(i) | 1653 | for_each_possible_cpu(i) |
1645 | sum += cpu_rq(i)->nr_switches; | 1654 | sum += cpu_rq(i)->nr_switches; |
1646 | 1655 | ||
1647 | return sum; | 1656 | return sum; |
@@ -1651,12 +1660,27 @@ unsigned long nr_iowait(void) | |||
1651 | { | 1660 | { |
1652 | unsigned long i, sum = 0; | 1661 | unsigned long i, sum = 0; |
1653 | 1662 | ||
1654 | for_each_cpu(i) | 1663 | for_each_possible_cpu(i) |
1655 | sum += atomic_read(&cpu_rq(i)->nr_iowait); | 1664 | sum += atomic_read(&cpu_rq(i)->nr_iowait); |
1656 | 1665 | ||
1657 | return sum; | 1666 | return sum; |
1658 | } | 1667 | } |
1659 | 1668 | ||
1669 | unsigned long nr_active(void) | ||
1670 | { | ||
1671 | unsigned long i, running = 0, uninterruptible = 0; | ||
1672 | |||
1673 | for_each_online_cpu(i) { | ||
1674 | running += cpu_rq(i)->nr_running; | ||
1675 | uninterruptible += cpu_rq(i)->nr_uninterruptible; | ||
1676 | } | ||
1677 | |||
1678 | if (unlikely((long)uninterruptible < 0)) | ||
1679 | uninterruptible = 0; | ||
1680 | |||
1681 | return running + uninterruptible; | ||
1682 | } | ||
1683 | |||
1660 | #ifdef CONFIG_SMP | 1684 | #ifdef CONFIG_SMP |
1661 | 1685 | ||
1662 | /* | 1686 | /* |
@@ -2859,6 +2883,12 @@ EXPORT_SYMBOL(sub_preempt_count); | |||
2859 | 2883 | ||
2860 | #endif | 2884 | #endif |
2861 | 2885 | ||
2886 | static inline int interactive_sleep(enum sleep_type sleep_type) | ||
2887 | { | ||
2888 | return (sleep_type == SLEEP_INTERACTIVE || | ||
2889 | sleep_type == SLEEP_INTERRUPTED); | ||
2890 | } | ||
2891 | |||
2862 | /* | 2892 | /* |
2863 | * schedule() is the main scheduler function. | 2893 | * schedule() is the main scheduler function. |
2864 | */ | 2894 | */ |
@@ -2878,13 +2908,11 @@ asmlinkage void __sched schedule(void) | |||
2878 | * schedule() atomically, we ignore that path for now. | 2908 | * schedule() atomically, we ignore that path for now. |
2879 | * Otherwise, whine if we are scheduling when we should not be. | 2909 | * Otherwise, whine if we are scheduling when we should not be. |
2880 | */ | 2910 | */ |
2881 | if (likely(!current->exit_state)) { | 2911 | if (unlikely(in_atomic() && !current->exit_state)) { |
2882 | if (unlikely(in_atomic())) { | 2912 | printk(KERN_ERR "BUG: scheduling while atomic: " |
2883 | printk(KERN_ERR "BUG: scheduling while atomic: " | 2913 | "%s/0x%08x/%d\n", |
2884 | "%s/0x%08x/%d\n", | 2914 | current->comm, preempt_count(), current->pid); |
2885 | current->comm, preempt_count(), current->pid); | 2915 | dump_stack(); |
2886 | dump_stack(); | ||
2887 | } | ||
2888 | } | 2916 | } |
2889 | profile_hit(SCHED_PROFILING, __builtin_return_address(0)); | 2917 | profile_hit(SCHED_PROFILING, __builtin_return_address(0)); |
2890 | 2918 | ||
@@ -2984,12 +3012,12 @@ go_idle: | |||
2984 | queue = array->queue + idx; | 3012 | queue = array->queue + idx; |
2985 | next = list_entry(queue->next, task_t, run_list); | 3013 | next = list_entry(queue->next, task_t, run_list); |
2986 | 3014 | ||
2987 | if (!rt_task(next) && next->activated > 0) { | 3015 | if (!rt_task(next) && interactive_sleep(next->sleep_type)) { |
2988 | unsigned long long delta = now - next->timestamp; | 3016 | unsigned long long delta = now - next->timestamp; |
2989 | if (unlikely((long long)(now - next->timestamp) < 0)) | 3017 | if (unlikely((long long)(now - next->timestamp) < 0)) |
2990 | delta = 0; | 3018 | delta = 0; |
2991 | 3019 | ||
2992 | if (next->activated == 1) | 3020 | if (next->sleep_type == SLEEP_INTERACTIVE) |
2993 | delta = delta * (ON_RUNQUEUE_WEIGHT * 128 / 100) / 128; | 3021 | delta = delta * (ON_RUNQUEUE_WEIGHT * 128 / 100) / 128; |
2994 | 3022 | ||
2995 | array = next->array; | 3023 | array = next->array; |
@@ -2999,10 +3027,9 @@ go_idle: | |||
2999 | dequeue_task(next, array); | 3027 | dequeue_task(next, array); |
3000 | next->prio = new_prio; | 3028 | next->prio = new_prio; |
3001 | enqueue_task(next, array); | 3029 | enqueue_task(next, array); |
3002 | } else | 3030 | } |
3003 | requeue_task(next, array); | ||
3004 | } | 3031 | } |
3005 | next->activated = 0; | 3032 | next->sleep_type = SLEEP_NORMAL; |
3006 | switch_tasks: | 3033 | switch_tasks: |
3007 | if (next == rq->idle) | 3034 | if (next == rq->idle) |
3008 | schedstat_inc(rq, sched_goidle); | 3035 | schedstat_inc(rq, sched_goidle); |
@@ -4761,7 +4788,7 @@ static int migration_call(struct notifier_block *nfb, unsigned long action, | |||
4761 | /* Register at highest priority so that task migration (migrate_all_tasks) | 4788 | /* Register at highest priority so that task migration (migrate_all_tasks) |
4762 | * happens before everything else. | 4789 | * happens before everything else. |
4763 | */ | 4790 | */ |
4764 | static struct notifier_block __devinitdata migration_notifier = { | 4791 | static struct notifier_block migration_notifier = { |
4765 | .notifier_call = migration_call, | 4792 | .notifier_call = migration_call, |
4766 | .priority = 10 | 4793 | .priority = 10 |
4767 | }; | 4794 | }; |
@@ -5575,11 +5602,31 @@ static int cpu_to_cpu_group(int cpu) | |||
5575 | } | 5602 | } |
5576 | #endif | 5603 | #endif |
5577 | 5604 | ||
5605 | #ifdef CONFIG_SCHED_MC | ||
5606 | static DEFINE_PER_CPU(struct sched_domain, core_domains); | ||
5607 | static struct sched_group sched_group_core[NR_CPUS]; | ||
5608 | #endif | ||
5609 | |||
5610 | #if defined(CONFIG_SCHED_MC) && defined(CONFIG_SCHED_SMT) | ||
5611 | static int cpu_to_core_group(int cpu) | ||
5612 | { | ||
5613 | return first_cpu(cpu_sibling_map[cpu]); | ||
5614 | } | ||
5615 | #elif defined(CONFIG_SCHED_MC) | ||
5616 | static int cpu_to_core_group(int cpu) | ||
5617 | { | ||
5618 | return cpu; | ||
5619 | } | ||
5620 | #endif | ||
5621 | |||
5578 | static DEFINE_PER_CPU(struct sched_domain, phys_domains); | 5622 | static DEFINE_PER_CPU(struct sched_domain, phys_domains); |
5579 | static struct sched_group sched_group_phys[NR_CPUS]; | 5623 | static struct sched_group sched_group_phys[NR_CPUS]; |
5580 | static int cpu_to_phys_group(int cpu) | 5624 | static int cpu_to_phys_group(int cpu) |
5581 | { | 5625 | { |
5582 | #ifdef CONFIG_SCHED_SMT | 5626 | #if defined(CONFIG_SCHED_MC) |
5627 | cpumask_t mask = cpu_coregroup_map(cpu); | ||
5628 | return first_cpu(mask); | ||
5629 | #elif defined(CONFIG_SCHED_SMT) | ||
5583 | return first_cpu(cpu_sibling_map[cpu]); | 5630 | return first_cpu(cpu_sibling_map[cpu]); |
5584 | #else | 5631 | #else |
5585 | return cpu; | 5632 | return cpu; |
@@ -5602,6 +5649,32 @@ static int cpu_to_allnodes_group(int cpu) | |||
5602 | { | 5649 | { |
5603 | return cpu_to_node(cpu); | 5650 | return cpu_to_node(cpu); |
5604 | } | 5651 | } |
5652 | static void init_numa_sched_groups_power(struct sched_group *group_head) | ||
5653 | { | ||
5654 | struct sched_group *sg = group_head; | ||
5655 | int j; | ||
5656 | |||
5657 | if (!sg) | ||
5658 | return; | ||
5659 | next_sg: | ||
5660 | for_each_cpu_mask(j, sg->cpumask) { | ||
5661 | struct sched_domain *sd; | ||
5662 | |||
5663 | sd = &per_cpu(phys_domains, j); | ||
5664 | if (j != first_cpu(sd->groups->cpumask)) { | ||
5665 | /* | ||
5666 | * Only add "power" once for each | ||
5667 | * physical package. | ||
5668 | */ | ||
5669 | continue; | ||
5670 | } | ||
5671 | |||
5672 | sg->cpu_power += sd->groups->cpu_power; | ||
5673 | } | ||
5674 | sg = sg->next; | ||
5675 | if (sg != group_head) | ||
5676 | goto next_sg; | ||
5677 | } | ||
5605 | #endif | 5678 | #endif |
5606 | 5679 | ||
5607 | /* | 5680 | /* |
@@ -5677,6 +5750,17 @@ void build_sched_domains(const cpumask_t *cpu_map) | |||
5677 | sd->parent = p; | 5750 | sd->parent = p; |
5678 | sd->groups = &sched_group_phys[group]; | 5751 | sd->groups = &sched_group_phys[group]; |
5679 | 5752 | ||
5753 | #ifdef CONFIG_SCHED_MC | ||
5754 | p = sd; | ||
5755 | sd = &per_cpu(core_domains, i); | ||
5756 | group = cpu_to_core_group(i); | ||
5757 | *sd = SD_MC_INIT; | ||
5758 | sd->span = cpu_coregroup_map(i); | ||
5759 | cpus_and(sd->span, sd->span, *cpu_map); | ||
5760 | sd->parent = p; | ||
5761 | sd->groups = &sched_group_core[group]; | ||
5762 | #endif | ||
5763 | |||
5680 | #ifdef CONFIG_SCHED_SMT | 5764 | #ifdef CONFIG_SCHED_SMT |
5681 | p = sd; | 5765 | p = sd; |
5682 | sd = &per_cpu(cpu_domains, i); | 5766 | sd = &per_cpu(cpu_domains, i); |
@@ -5702,6 +5786,19 @@ void build_sched_domains(const cpumask_t *cpu_map) | |||
5702 | } | 5786 | } |
5703 | #endif | 5787 | #endif |
5704 | 5788 | ||
5789 | #ifdef CONFIG_SCHED_MC | ||
5790 | /* Set up multi-core groups */ | ||
5791 | for_each_cpu_mask(i, *cpu_map) { | ||
5792 | cpumask_t this_core_map = cpu_coregroup_map(i); | ||
5793 | cpus_and(this_core_map, this_core_map, *cpu_map); | ||
5794 | if (i != first_cpu(this_core_map)) | ||
5795 | continue; | ||
5796 | init_sched_build_groups(sched_group_core, this_core_map, | ||
5797 | &cpu_to_core_group); | ||
5798 | } | ||
5799 | #endif | ||
5800 | |||
5801 | |||
5705 | /* Set up physical groups */ | 5802 | /* Set up physical groups */ |
5706 | for (i = 0; i < MAX_NUMNODES; i++) { | 5803 | for (i = 0; i < MAX_NUMNODES; i++) { |
5707 | cpumask_t nodemask = node_to_cpumask(i); | 5804 | cpumask_t nodemask = node_to_cpumask(i); |
@@ -5798,51 +5895,38 @@ void build_sched_domains(const cpumask_t *cpu_map) | |||
5798 | power = SCHED_LOAD_SCALE; | 5895 | power = SCHED_LOAD_SCALE; |
5799 | sd->groups->cpu_power = power; | 5896 | sd->groups->cpu_power = power; |
5800 | #endif | 5897 | #endif |
5898 | #ifdef CONFIG_SCHED_MC | ||
5899 | sd = &per_cpu(core_domains, i); | ||
5900 | power = SCHED_LOAD_SCALE + (cpus_weight(sd->groups->cpumask)-1) | ||
5901 | * SCHED_LOAD_SCALE / 10; | ||
5902 | sd->groups->cpu_power = power; | ||
5801 | 5903 | ||
5802 | sd = &per_cpu(phys_domains, i); | 5904 | sd = &per_cpu(phys_domains, i); |
5905 | |||
5906 | /* | ||
5907 | * This has to be < 2 * SCHED_LOAD_SCALE | ||
5908 | * Lets keep it SCHED_LOAD_SCALE, so that | ||
5909 | * while calculating NUMA group's cpu_power | ||
5910 | * we can simply do | ||
5911 | * numa_group->cpu_power += phys_group->cpu_power; | ||
5912 | * | ||
5913 | * See "only add power once for each physical pkg" | ||
5914 | * comment below | ||
5915 | */ | ||
5916 | sd->groups->cpu_power = SCHED_LOAD_SCALE; | ||
5917 | #else | ||
5918 | sd = &per_cpu(phys_domains, i); | ||
5803 | power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE * | 5919 | power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE * |
5804 | (cpus_weight(sd->groups->cpumask)-1) / 10; | 5920 | (cpus_weight(sd->groups->cpumask)-1) / 10; |
5805 | sd->groups->cpu_power = power; | 5921 | sd->groups->cpu_power = power; |
5806 | |||
5807 | #ifdef CONFIG_NUMA | ||
5808 | sd = &per_cpu(allnodes_domains, i); | ||
5809 | if (sd->groups) { | ||
5810 | power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE * | ||
5811 | (cpus_weight(sd->groups->cpumask)-1) / 10; | ||
5812 | sd->groups->cpu_power = power; | ||
5813 | } | ||
5814 | #endif | 5922 | #endif |
5815 | } | 5923 | } |
5816 | 5924 | ||
5817 | #ifdef CONFIG_NUMA | 5925 | #ifdef CONFIG_NUMA |
5818 | for (i = 0; i < MAX_NUMNODES; i++) { | 5926 | for (i = 0; i < MAX_NUMNODES; i++) |
5819 | struct sched_group *sg = sched_group_nodes[i]; | 5927 | init_numa_sched_groups_power(sched_group_nodes[i]); |
5820 | int j; | ||
5821 | 5928 | ||
5822 | if (sg == NULL) | 5929 | init_numa_sched_groups_power(sched_group_allnodes); |
5823 | continue; | ||
5824 | next_sg: | ||
5825 | for_each_cpu_mask(j, sg->cpumask) { | ||
5826 | struct sched_domain *sd; | ||
5827 | int power; | ||
5828 | |||
5829 | sd = &per_cpu(phys_domains, j); | ||
5830 | if (j != first_cpu(sd->groups->cpumask)) { | ||
5831 | /* | ||
5832 | * Only add "power" once for each | ||
5833 | * physical package. | ||
5834 | */ | ||
5835 | continue; | ||
5836 | } | ||
5837 | power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE * | ||
5838 | (cpus_weight(sd->groups->cpumask)-1) / 10; | ||
5839 | |||
5840 | sg->cpu_power += power; | ||
5841 | } | ||
5842 | sg = sg->next; | ||
5843 | if (sg != sched_group_nodes[i]) | ||
5844 | goto next_sg; | ||
5845 | } | ||
5846 | #endif | 5930 | #endif |
5847 | 5931 | ||
5848 | /* Attach the domains */ | 5932 | /* Attach the domains */ |
@@ -5850,6 +5934,8 @@ next_sg: | |||
5850 | struct sched_domain *sd; | 5934 | struct sched_domain *sd; |
5851 | #ifdef CONFIG_SCHED_SMT | 5935 | #ifdef CONFIG_SCHED_SMT |
5852 | sd = &per_cpu(cpu_domains, i); | 5936 | sd = &per_cpu(cpu_domains, i); |
5937 | #elif defined(CONFIG_SCHED_MC) | ||
5938 | sd = &per_cpu(core_domains, i); | ||
5853 | #else | 5939 | #else |
5854 | sd = &per_cpu(phys_domains, i); | 5940 | sd = &per_cpu(phys_domains, i); |
5855 | #endif | 5941 | #endif |
@@ -6022,7 +6108,7 @@ void __init sched_init(void) | |||
6022 | runqueue_t *rq; | 6108 | runqueue_t *rq; |
6023 | int i, j, k; | 6109 | int i, j, k; |
6024 | 6110 | ||
6025 | for_each_cpu(i) { | 6111 | for_each_possible_cpu(i) { |
6026 | prio_array_t *array; | 6112 | prio_array_t *array; |
6027 | 6113 | ||
6028 | rq = cpu_rq(i); | 6114 | rq = cpu_rq(i); |
diff --git a/kernel/signal.c b/kernel/signal.c index 75f7341b0c..e5f8aea78f 100644 --- a/kernel/signal.c +++ b/kernel/signal.c | |||
@@ -22,7 +22,6 @@ | |||
22 | #include <linux/security.h> | 22 | #include <linux/security.h> |
23 | #include <linux/syscalls.h> | 23 | #include <linux/syscalls.h> |
24 | #include <linux/ptrace.h> | 24 | #include <linux/ptrace.h> |
25 | #include <linux/posix-timers.h> | ||
26 | #include <linux/signal.h> | 25 | #include <linux/signal.h> |
27 | #include <linux/audit.h> | 26 | #include <linux/audit.h> |
28 | #include <linux/capability.h> | 27 | #include <linux/capability.h> |
@@ -147,6 +146,8 @@ static kmem_cache_t *sigqueue_cachep; | |||
147 | #define sig_kernel_stop(sig) \ | 146 | #define sig_kernel_stop(sig) \ |
148 | (((sig) < SIGRTMIN) && T(sig, SIG_KERNEL_STOP_MASK)) | 147 | (((sig) < SIGRTMIN) && T(sig, SIG_KERNEL_STOP_MASK)) |
149 | 148 | ||
149 | #define sig_needs_tasklist(sig) ((sig) == SIGCONT) | ||
150 | |||
150 | #define sig_user_defined(t, signr) \ | 151 | #define sig_user_defined(t, signr) \ |
151 | (((t)->sighand->action[(signr)-1].sa.sa_handler != SIG_DFL) && \ | 152 | (((t)->sighand->action[(signr)-1].sa.sa_handler != SIG_DFL) && \ |
152 | ((t)->sighand->action[(signr)-1].sa.sa_handler != SIG_IGN)) | 153 | ((t)->sighand->action[(signr)-1].sa.sa_handler != SIG_IGN)) |
@@ -292,7 +293,7 @@ static void __sigqueue_free(struct sigqueue *q) | |||
292 | kmem_cache_free(sigqueue_cachep, q); | 293 | kmem_cache_free(sigqueue_cachep, q); |
293 | } | 294 | } |
294 | 295 | ||
295 | static void flush_sigqueue(struct sigpending *queue) | 296 | void flush_sigqueue(struct sigpending *queue) |
296 | { | 297 | { |
297 | struct sigqueue *q; | 298 | struct sigqueue *q; |
298 | 299 | ||
@@ -307,9 +308,7 @@ static void flush_sigqueue(struct sigpending *queue) | |||
307 | /* | 308 | /* |
308 | * Flush all pending signals for a task. | 309 | * Flush all pending signals for a task. |
309 | */ | 310 | */ |
310 | 311 | void flush_signals(struct task_struct *t) | |
311 | void | ||
312 | flush_signals(struct task_struct *t) | ||
313 | { | 312 | { |
314 | unsigned long flags; | 313 | unsigned long flags; |
315 | 314 | ||
@@ -321,109 +320,6 @@ flush_signals(struct task_struct *t) | |||
321 | } | 320 | } |
322 | 321 | ||
323 | /* | 322 | /* |
324 | * This function expects the tasklist_lock write-locked. | ||
325 | */ | ||
326 | void __exit_sighand(struct task_struct *tsk) | ||
327 | { | ||
328 | struct sighand_struct * sighand = tsk->sighand; | ||
329 | |||
330 | /* Ok, we're done with the signal handlers */ | ||
331 | tsk->sighand = NULL; | ||
332 | if (atomic_dec_and_test(&sighand->count)) | ||
333 | sighand_free(sighand); | ||
334 | } | ||
335 | |||
336 | void exit_sighand(struct task_struct *tsk) | ||
337 | { | ||
338 | write_lock_irq(&tasklist_lock); | ||
339 | rcu_read_lock(); | ||
340 | if (tsk->sighand != NULL) { | ||
341 | struct sighand_struct *sighand = rcu_dereference(tsk->sighand); | ||
342 | spin_lock(&sighand->siglock); | ||
343 | __exit_sighand(tsk); | ||
344 | spin_unlock(&sighand->siglock); | ||
345 | } | ||
346 | rcu_read_unlock(); | ||
347 | write_unlock_irq(&tasklist_lock); | ||
348 | } | ||
349 | |||
350 | /* | ||
351 | * This function expects the tasklist_lock write-locked. | ||
352 | */ | ||
353 | void __exit_signal(struct task_struct *tsk) | ||
354 | { | ||
355 | struct signal_struct * sig = tsk->signal; | ||
356 | struct sighand_struct * sighand; | ||
357 | |||
358 | if (!sig) | ||
359 | BUG(); | ||
360 | if (!atomic_read(&sig->count)) | ||
361 | BUG(); | ||
362 | rcu_read_lock(); | ||
363 | sighand = rcu_dereference(tsk->sighand); | ||
364 | spin_lock(&sighand->siglock); | ||
365 | posix_cpu_timers_exit(tsk); | ||
366 | if (atomic_dec_and_test(&sig->count)) { | ||
367 | posix_cpu_timers_exit_group(tsk); | ||
368 | tsk->signal = NULL; | ||
369 | __exit_sighand(tsk); | ||
370 | spin_unlock(&sighand->siglock); | ||
371 | flush_sigqueue(&sig->shared_pending); | ||
372 | } else { | ||
373 | /* | ||
374 | * If there is any task waiting for the group exit | ||
375 | * then notify it: | ||
376 | */ | ||
377 | if (sig->group_exit_task && atomic_read(&sig->count) == sig->notify_count) { | ||
378 | wake_up_process(sig->group_exit_task); | ||
379 | sig->group_exit_task = NULL; | ||
380 | } | ||
381 | if (tsk == sig->curr_target) | ||
382 | sig->curr_target = next_thread(tsk); | ||
383 | tsk->signal = NULL; | ||
384 | /* | ||
385 | * Accumulate here the counters for all threads but the | ||
386 | * group leader as they die, so they can be added into | ||
387 | * the process-wide totals when those are taken. | ||
388 | * The group leader stays around as a zombie as long | ||
389 | * as there are other threads. When it gets reaped, | ||
390 | * the exit.c code will add its counts into these totals. | ||
391 | * We won't ever get here for the group leader, since it | ||
392 | * will have been the last reference on the signal_struct. | ||
393 | */ | ||
394 | sig->utime = cputime_add(sig->utime, tsk->utime); | ||
395 | sig->stime = cputime_add(sig->stime, tsk->stime); | ||
396 | sig->min_flt += tsk->min_flt; | ||
397 | sig->maj_flt += tsk->maj_flt; | ||
398 | sig->nvcsw += tsk->nvcsw; | ||
399 | sig->nivcsw += tsk->nivcsw; | ||
400 | sig->sched_time += tsk->sched_time; | ||
401 | __exit_sighand(tsk); | ||
402 | spin_unlock(&sighand->siglock); | ||
403 | sig = NULL; /* Marker for below. */ | ||
404 | } | ||
405 | rcu_read_unlock(); | ||
406 | clear_tsk_thread_flag(tsk,TIF_SIGPENDING); | ||
407 | flush_sigqueue(&tsk->pending); | ||
408 | if (sig) { | ||
409 | /* | ||
410 | * We are cleaning up the signal_struct here. | ||
411 | */ | ||
412 | exit_thread_group_keys(sig); | ||
413 | kmem_cache_free(signal_cachep, sig); | ||
414 | } | ||
415 | } | ||
416 | |||
417 | void exit_signal(struct task_struct *tsk) | ||
418 | { | ||
419 | atomic_dec(&tsk->signal->live); | ||
420 | |||
421 | write_lock_irq(&tasklist_lock); | ||
422 | __exit_signal(tsk); | ||
423 | write_unlock_irq(&tasklist_lock); | ||
424 | } | ||
425 | |||
426 | /* | ||
427 | * Flush all handlers for a task. | 323 | * Flush all handlers for a task. |
428 | */ | 324 | */ |
429 | 325 | ||
@@ -695,9 +591,7 @@ static int check_kill_permission(int sig, struct siginfo *info, | |||
695 | } | 591 | } |
696 | 592 | ||
697 | /* forward decl */ | 593 | /* forward decl */ |
698 | static void do_notify_parent_cldstop(struct task_struct *tsk, | 594 | static void do_notify_parent_cldstop(struct task_struct *tsk, int why); |
699 | int to_self, | ||
700 | int why); | ||
701 | 595 | ||
702 | /* | 596 | /* |
703 | * Handle magic process-wide effects of stop/continue signals. | 597 | * Handle magic process-wide effects of stop/continue signals. |
@@ -747,7 +641,7 @@ static void handle_stop_signal(int sig, struct task_struct *p) | |||
747 | p->signal->group_stop_count = 0; | 641 | p->signal->group_stop_count = 0; |
748 | p->signal->flags = SIGNAL_STOP_CONTINUED; | 642 | p->signal->flags = SIGNAL_STOP_CONTINUED; |
749 | spin_unlock(&p->sighand->siglock); | 643 | spin_unlock(&p->sighand->siglock); |
750 | do_notify_parent_cldstop(p, (p->ptrace & PT_PTRACED), CLD_STOPPED); | 644 | do_notify_parent_cldstop(p, CLD_STOPPED); |
751 | spin_lock(&p->sighand->siglock); | 645 | spin_lock(&p->sighand->siglock); |
752 | } | 646 | } |
753 | rm_from_queue(SIG_KERNEL_STOP_MASK, &p->signal->shared_pending); | 647 | rm_from_queue(SIG_KERNEL_STOP_MASK, &p->signal->shared_pending); |
@@ -788,7 +682,7 @@ static void handle_stop_signal(int sig, struct task_struct *p) | |||
788 | p->signal->flags = SIGNAL_STOP_CONTINUED; | 682 | p->signal->flags = SIGNAL_STOP_CONTINUED; |
789 | p->signal->group_exit_code = 0; | 683 | p->signal->group_exit_code = 0; |
790 | spin_unlock(&p->sighand->siglock); | 684 | spin_unlock(&p->sighand->siglock); |
791 | do_notify_parent_cldstop(p, (p->ptrace & PT_PTRACED), CLD_CONTINUED); | 685 | do_notify_parent_cldstop(p, CLD_CONTINUED); |
792 | spin_lock(&p->sighand->siglock); | 686 | spin_lock(&p->sighand->siglock); |
793 | } else { | 687 | } else { |
794 | /* | 688 | /* |
@@ -875,8 +769,7 @@ specific_send_sig_info(int sig, struct siginfo *info, struct task_struct *t) | |||
875 | { | 769 | { |
876 | int ret = 0; | 770 | int ret = 0; |
877 | 771 | ||
878 | if (!irqs_disabled()) | 772 | BUG_ON(!irqs_disabled()); |
879 | BUG(); | ||
880 | assert_spin_locked(&t->sighand->siglock); | 773 | assert_spin_locked(&t->sighand->siglock); |
881 | 774 | ||
882 | /* Short-circuit ignored signals. */ | 775 | /* Short-circuit ignored signals. */ |
@@ -975,7 +868,6 @@ __group_complete_signal(int sig, struct task_struct *p) | |||
975 | if (t == NULL) | 868 | if (t == NULL) |
976 | /* restart balancing at this thread */ | 869 | /* restart balancing at this thread */ |
977 | t = p->signal->curr_target = p; | 870 | t = p->signal->curr_target = p; |
978 | BUG_ON(t->tgid != p->tgid); | ||
979 | 871 | ||
980 | while (!wants_signal(sig, t)) { | 872 | while (!wants_signal(sig, t)) { |
981 | t = next_thread(t); | 873 | t = next_thread(t); |
@@ -1120,27 +1012,37 @@ void zap_other_threads(struct task_struct *p) | |||
1120 | /* | 1012 | /* |
1121 | * Must be called under rcu_read_lock() or with tasklist_lock read-held. | 1013 | * Must be called under rcu_read_lock() or with tasklist_lock read-held. |
1122 | */ | 1014 | */ |
1015 | struct sighand_struct *lock_task_sighand(struct task_struct *tsk, unsigned long *flags) | ||
1016 | { | ||
1017 | struct sighand_struct *sighand; | ||
1018 | |||
1019 | for (;;) { | ||
1020 | sighand = rcu_dereference(tsk->sighand); | ||
1021 | if (unlikely(sighand == NULL)) | ||
1022 | break; | ||
1023 | |||
1024 | spin_lock_irqsave(&sighand->siglock, *flags); | ||
1025 | if (likely(sighand == tsk->sighand)) | ||
1026 | break; | ||
1027 | spin_unlock_irqrestore(&sighand->siglock, *flags); | ||
1028 | } | ||
1029 | |||
1030 | return sighand; | ||
1031 | } | ||
1032 | |||
1123 | int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p) | 1033 | int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p) |
1124 | { | 1034 | { |
1125 | unsigned long flags; | 1035 | unsigned long flags; |
1126 | struct sighand_struct *sp; | ||
1127 | int ret; | 1036 | int ret; |
1128 | 1037 | ||
1129 | retry: | ||
1130 | ret = check_kill_permission(sig, info, p); | 1038 | ret = check_kill_permission(sig, info, p); |
1131 | if (!ret && sig && (sp = rcu_dereference(p->sighand))) { | 1039 | |
1132 | spin_lock_irqsave(&sp->siglock, flags); | 1040 | if (!ret && sig) { |
1133 | if (p->sighand != sp) { | 1041 | ret = -ESRCH; |
1134 | spin_unlock_irqrestore(&sp->siglock, flags); | 1042 | if (lock_task_sighand(p, &flags)) { |
1135 | goto retry; | 1043 | ret = __group_send_sig_info(sig, info, p); |
1136 | } | 1044 | unlock_task_sighand(p, &flags); |
1137 | if ((atomic_read(&sp->count) == 0) || | ||
1138 | (atomic_read(&p->usage) == 0)) { | ||
1139 | spin_unlock_irqrestore(&sp->siglock, flags); | ||
1140 | return -ESRCH; | ||
1141 | } | 1045 | } |
1142 | ret = __group_send_sig_info(sig, info, p); | ||
1143 | spin_unlock_irqrestore(&sp->siglock, flags); | ||
1144 | } | 1046 | } |
1145 | 1047 | ||
1146 | return ret; | 1048 | return ret; |
@@ -1189,7 +1091,7 @@ kill_proc_info(int sig, struct siginfo *info, pid_t pid) | |||
1189 | struct task_struct *p; | 1091 | struct task_struct *p; |
1190 | 1092 | ||
1191 | rcu_read_lock(); | 1093 | rcu_read_lock(); |
1192 | if (unlikely(sig_kernel_stop(sig) || sig == SIGCONT)) { | 1094 | if (unlikely(sig_needs_tasklist(sig))) { |
1193 | read_lock(&tasklist_lock); | 1095 | read_lock(&tasklist_lock); |
1194 | acquired_tasklist_lock = 1; | 1096 | acquired_tasklist_lock = 1; |
1195 | } | 1097 | } |
@@ -1405,12 +1307,10 @@ void sigqueue_free(struct sigqueue *q) | |||
1405 | __sigqueue_free(q); | 1307 | __sigqueue_free(q); |
1406 | } | 1308 | } |
1407 | 1309 | ||
1408 | int | 1310 | int send_sigqueue(int sig, struct sigqueue *q, struct task_struct *p) |
1409 | send_sigqueue(int sig, struct sigqueue *q, struct task_struct *p) | ||
1410 | { | 1311 | { |
1411 | unsigned long flags; | 1312 | unsigned long flags; |
1412 | int ret = 0; | 1313 | int ret = 0; |
1413 | struct sighand_struct *sh; | ||
1414 | 1314 | ||
1415 | BUG_ON(!(q->flags & SIGQUEUE_PREALLOC)); | 1315 | BUG_ON(!(q->flags & SIGQUEUE_PREALLOC)); |
1416 | 1316 | ||
@@ -1424,48 +1324,17 @@ send_sigqueue(int sig, struct sigqueue *q, struct task_struct *p) | |||
1424 | */ | 1324 | */ |
1425 | rcu_read_lock(); | 1325 | rcu_read_lock(); |
1426 | 1326 | ||
1427 | if (unlikely(p->flags & PF_EXITING)) { | 1327 | if (!likely(lock_task_sighand(p, &flags))) { |
1428 | ret = -1; | 1328 | ret = -1; |
1429 | goto out_err; | 1329 | goto out_err; |
1430 | } | 1330 | } |
1431 | 1331 | ||
1432 | retry: | ||
1433 | sh = rcu_dereference(p->sighand); | ||
1434 | |||
1435 | spin_lock_irqsave(&sh->siglock, flags); | ||
1436 | if (p->sighand != sh) { | ||
1437 | /* We raced with exec() in a multithreaded process... */ | ||
1438 | spin_unlock_irqrestore(&sh->siglock, flags); | ||
1439 | goto retry; | ||
1440 | } | ||
1441 | |||
1442 | /* | ||
1443 | * We do the check here again to handle the following scenario: | ||
1444 | * | ||
1445 | * CPU 0 CPU 1 | ||
1446 | * send_sigqueue | ||
1447 | * check PF_EXITING | ||
1448 | * interrupt exit code running | ||
1449 | * __exit_signal | ||
1450 | * lock sighand->siglock | ||
1451 | * unlock sighand->siglock | ||
1452 | * lock sh->siglock | ||
1453 | * add(tsk->pending) flush_sigqueue(tsk->pending) | ||
1454 | * | ||
1455 | */ | ||
1456 | |||
1457 | if (unlikely(p->flags & PF_EXITING)) { | ||
1458 | ret = -1; | ||
1459 | goto out; | ||
1460 | } | ||
1461 | |||
1462 | if (unlikely(!list_empty(&q->list))) { | 1332 | if (unlikely(!list_empty(&q->list))) { |
1463 | /* | 1333 | /* |
1464 | * If an SI_TIMER entry is already queue just increment | 1334 | * If an SI_TIMER entry is already queue just increment |
1465 | * the overrun count. | 1335 | * the overrun count. |
1466 | */ | 1336 | */ |
1467 | if (q->info.si_code != SI_TIMER) | 1337 | BUG_ON(q->info.si_code != SI_TIMER); |
1468 | BUG(); | ||
1469 | q->info.si_overrun++; | 1338 | q->info.si_overrun++; |
1470 | goto out; | 1339 | goto out; |
1471 | } | 1340 | } |
@@ -1481,7 +1350,7 @@ retry: | |||
1481 | signal_wake_up(p, sig == SIGKILL); | 1350 | signal_wake_up(p, sig == SIGKILL); |
1482 | 1351 | ||
1483 | out: | 1352 | out: |
1484 | spin_unlock_irqrestore(&sh->siglock, flags); | 1353 | unlock_task_sighand(p, &flags); |
1485 | out_err: | 1354 | out_err: |
1486 | rcu_read_unlock(); | 1355 | rcu_read_unlock(); |
1487 | 1356 | ||
@@ -1513,8 +1382,7 @@ send_group_sigqueue(int sig, struct sigqueue *q, struct task_struct *p) | |||
1513 | * the overrun count. Other uses should not try to | 1382 | * the overrun count. Other uses should not try to |
1514 | * send the signal multiple times. | 1383 | * send the signal multiple times. |
1515 | */ | 1384 | */ |
1516 | if (q->info.si_code != SI_TIMER) | 1385 | BUG_ON(q->info.si_code != SI_TIMER); |
1517 | BUG(); | ||
1518 | q->info.si_overrun++; | 1386 | q->info.si_overrun++; |
1519 | goto out; | 1387 | goto out; |
1520 | } | 1388 | } |
@@ -1613,14 +1481,14 @@ void do_notify_parent(struct task_struct *tsk, int sig) | |||
1613 | spin_unlock_irqrestore(&psig->siglock, flags); | 1481 | spin_unlock_irqrestore(&psig->siglock, flags); |
1614 | } | 1482 | } |
1615 | 1483 | ||
1616 | static void do_notify_parent_cldstop(struct task_struct *tsk, int to_self, int why) | 1484 | static void do_notify_parent_cldstop(struct task_struct *tsk, int why) |
1617 | { | 1485 | { |
1618 | struct siginfo info; | 1486 | struct siginfo info; |
1619 | unsigned long flags; | 1487 | unsigned long flags; |
1620 | struct task_struct *parent; | 1488 | struct task_struct *parent; |
1621 | struct sighand_struct *sighand; | 1489 | struct sighand_struct *sighand; |
1622 | 1490 | ||
1623 | if (to_self) | 1491 | if (tsk->ptrace & PT_PTRACED) |
1624 | parent = tsk->parent; | 1492 | parent = tsk->parent; |
1625 | else { | 1493 | else { |
1626 | tsk = tsk->group_leader; | 1494 | tsk = tsk->group_leader; |
@@ -1689,13 +1557,14 @@ static void ptrace_stop(int exit_code, int nostop_code, siginfo_t *info) | |||
1689 | /* Let the debugger run. */ | 1557 | /* Let the debugger run. */ |
1690 | set_current_state(TASK_TRACED); | 1558 | set_current_state(TASK_TRACED); |
1691 | spin_unlock_irq(¤t->sighand->siglock); | 1559 | spin_unlock_irq(¤t->sighand->siglock); |
1560 | try_to_freeze(); | ||
1692 | read_lock(&tasklist_lock); | 1561 | read_lock(&tasklist_lock); |
1693 | if (likely(current->ptrace & PT_PTRACED) && | 1562 | if (likely(current->ptrace & PT_PTRACED) && |
1694 | likely(current->parent != current->real_parent || | 1563 | likely(current->parent != current->real_parent || |
1695 | !(current->ptrace & PT_ATTACHED)) && | 1564 | !(current->ptrace & PT_ATTACHED)) && |
1696 | (likely(current->parent->signal != current->signal) || | 1565 | (likely(current->parent->signal != current->signal) || |
1697 | !unlikely(current->signal->flags & SIGNAL_GROUP_EXIT))) { | 1566 | !unlikely(current->signal->flags & SIGNAL_GROUP_EXIT))) { |
1698 | do_notify_parent_cldstop(current, 1, CLD_TRAPPED); | 1567 | do_notify_parent_cldstop(current, CLD_TRAPPED); |
1699 | read_unlock(&tasklist_lock); | 1568 | read_unlock(&tasklist_lock); |
1700 | schedule(); | 1569 | schedule(); |
1701 | } else { | 1570 | } else { |
@@ -1744,25 +1613,17 @@ void ptrace_notify(int exit_code) | |||
1744 | static void | 1613 | static void |
1745 | finish_stop(int stop_count) | 1614 | finish_stop(int stop_count) |
1746 | { | 1615 | { |
1747 | int to_self; | ||
1748 | |||
1749 | /* | 1616 | /* |
1750 | * If there are no other threads in the group, or if there is | 1617 | * If there are no other threads in the group, or if there is |
1751 | * a group stop in progress and we are the last to stop, | 1618 | * a group stop in progress and we are the last to stop, |
1752 | * report to the parent. When ptraced, every thread reports itself. | 1619 | * report to the parent. When ptraced, every thread reports itself. |
1753 | */ | 1620 | */ |
1754 | if (stop_count < 0 || (current->ptrace & PT_PTRACED)) | 1621 | if (stop_count == 0 || (current->ptrace & PT_PTRACED)) { |
1755 | to_self = 1; | 1622 | read_lock(&tasklist_lock); |
1756 | else if (stop_count == 0) | 1623 | do_notify_parent_cldstop(current, CLD_STOPPED); |
1757 | to_self = 0; | 1624 | read_unlock(&tasklist_lock); |
1758 | else | 1625 | } |
1759 | goto out; | ||
1760 | |||
1761 | read_lock(&tasklist_lock); | ||
1762 | do_notify_parent_cldstop(current, to_self, CLD_STOPPED); | ||
1763 | read_unlock(&tasklist_lock); | ||
1764 | 1626 | ||
1765 | out: | ||
1766 | schedule(); | 1627 | schedule(); |
1767 | /* | 1628 | /* |
1768 | * Now we don't run again until continued. | 1629 | * Now we don't run again until continued. |
@@ -1776,12 +1637,10 @@ out: | |||
1776 | * Returns nonzero if we've actually stopped and released the siglock. | 1637 | * Returns nonzero if we've actually stopped and released the siglock. |
1777 | * Returns zero if we didn't stop and still hold the siglock. | 1638 | * Returns zero if we didn't stop and still hold the siglock. |
1778 | */ | 1639 | */ |
1779 | static int | 1640 | static int do_signal_stop(int signr) |
1780 | do_signal_stop(int signr) | ||
1781 | { | 1641 | { |
1782 | struct signal_struct *sig = current->signal; | 1642 | struct signal_struct *sig = current->signal; |
1783 | struct sighand_struct *sighand = current->sighand; | 1643 | int stop_count; |
1784 | int stop_count = -1; | ||
1785 | 1644 | ||
1786 | if (!likely(sig->flags & SIGNAL_STOP_DEQUEUED)) | 1645 | if (!likely(sig->flags & SIGNAL_STOP_DEQUEUED)) |
1787 | return 0; | 1646 | return 0; |
@@ -1791,86 +1650,37 @@ do_signal_stop(int signr) | |||
1791 | * There is a group stop in progress. We don't need to | 1650 | * There is a group stop in progress. We don't need to |
1792 | * start another one. | 1651 | * start another one. |
1793 | */ | 1652 | */ |
1794 | signr = sig->group_exit_code; | ||
1795 | stop_count = --sig->group_stop_count; | 1653 | stop_count = --sig->group_stop_count; |
1796 | current->exit_code = signr; | 1654 | } else { |
1797 | set_current_state(TASK_STOPPED); | ||
1798 | if (stop_count == 0) | ||
1799 | sig->flags = SIGNAL_STOP_STOPPED; | ||
1800 | spin_unlock_irq(&sighand->siglock); | ||
1801 | } | ||
1802 | else if (thread_group_empty(current)) { | ||
1803 | /* | ||
1804 | * Lock must be held through transition to stopped state. | ||
1805 | */ | ||
1806 | current->exit_code = current->signal->group_exit_code = signr; | ||
1807 | set_current_state(TASK_STOPPED); | ||
1808 | sig->flags = SIGNAL_STOP_STOPPED; | ||
1809 | spin_unlock_irq(&sighand->siglock); | ||
1810 | } | ||
1811 | else { | ||
1812 | /* | 1655 | /* |
1813 | * There is no group stop already in progress. | 1656 | * There is no group stop already in progress. |
1814 | * We must initiate one now, but that requires | 1657 | * We must initiate one now. |
1815 | * dropping siglock to get both the tasklist lock | ||
1816 | * and siglock again in the proper order. Note that | ||
1817 | * this allows an intervening SIGCONT to be posted. | ||
1818 | * We need to check for that and bail out if necessary. | ||
1819 | */ | 1658 | */ |
1820 | struct task_struct *t; | 1659 | struct task_struct *t; |
1821 | 1660 | ||
1822 | spin_unlock_irq(&sighand->siglock); | 1661 | sig->group_exit_code = signr; |
1823 | 1662 | ||
1824 | /* signals can be posted during this window */ | 1663 | stop_count = 0; |
1825 | 1664 | for (t = next_thread(current); t != current; t = next_thread(t)) | |
1826 | read_lock(&tasklist_lock); | ||
1827 | spin_lock_irq(&sighand->siglock); | ||
1828 | |||
1829 | if (!likely(sig->flags & SIGNAL_STOP_DEQUEUED)) { | ||
1830 | /* | 1665 | /* |
1831 | * Another stop or continue happened while we | 1666 | * Setting state to TASK_STOPPED for a group |
1832 | * didn't have the lock. We can just swallow this | 1667 | * stop is always done with the siglock held, |
1833 | * signal now. If we raced with a SIGCONT, that | 1668 | * so this check has no races. |
1834 | * should have just cleared it now. If we raced | ||
1835 | * with another processor delivering a stop signal, | ||
1836 | * then the SIGCONT that wakes us up should clear it. | ||
1837 | */ | 1669 | */ |
1838 | read_unlock(&tasklist_lock); | 1670 | if (!t->exit_state && |
1839 | return 0; | 1671 | !(t->state & (TASK_STOPPED|TASK_TRACED))) { |
1840 | } | 1672 | stop_count++; |
1841 | 1673 | signal_wake_up(t, 0); | |
1842 | if (sig->group_stop_count == 0) { | 1674 | } |
1843 | sig->group_exit_code = signr; | 1675 | sig->group_stop_count = stop_count; |
1844 | stop_count = 0; | ||
1845 | for (t = next_thread(current); t != current; | ||
1846 | t = next_thread(t)) | ||
1847 | /* | ||
1848 | * Setting state to TASK_STOPPED for a group | ||
1849 | * stop is always done with the siglock held, | ||
1850 | * so this check has no races. | ||
1851 | */ | ||
1852 | if (!t->exit_state && | ||
1853 | !(t->state & (TASK_STOPPED|TASK_TRACED))) { | ||
1854 | stop_count++; | ||
1855 | signal_wake_up(t, 0); | ||
1856 | } | ||
1857 | sig->group_stop_count = stop_count; | ||
1858 | } | ||
1859 | else { | ||
1860 | /* A race with another thread while unlocked. */ | ||
1861 | signr = sig->group_exit_code; | ||
1862 | stop_count = --sig->group_stop_count; | ||
1863 | } | ||
1864 | |||
1865 | current->exit_code = signr; | ||
1866 | set_current_state(TASK_STOPPED); | ||
1867 | if (stop_count == 0) | ||
1868 | sig->flags = SIGNAL_STOP_STOPPED; | ||
1869 | |||
1870 | spin_unlock_irq(&sighand->siglock); | ||
1871 | read_unlock(&tasklist_lock); | ||
1872 | } | 1676 | } |
1873 | 1677 | ||
1678 | if (stop_count == 0) | ||
1679 | sig->flags = SIGNAL_STOP_STOPPED; | ||
1680 | current->exit_code = sig->group_exit_code; | ||
1681 | __set_current_state(TASK_STOPPED); | ||
1682 | |||
1683 | spin_unlock_irq(¤t->sighand->siglock); | ||
1874 | finish_stop(stop_count); | 1684 | finish_stop(stop_count); |
1875 | return 1; | 1685 | return 1; |
1876 | } | 1686 | } |
@@ -1944,9 +1754,9 @@ relock: | |||
1944 | /* Let the debugger run. */ | 1754 | /* Let the debugger run. */ |
1945 | ptrace_stop(signr, signr, info); | 1755 | ptrace_stop(signr, signr, info); |
1946 | 1756 | ||
1947 | /* We're back. Did the debugger cancel the sig or group_exit? */ | 1757 | /* We're back. Did the debugger cancel the sig? */ |
1948 | signr = current->exit_code; | 1758 | signr = current->exit_code; |
1949 | if (signr == 0 || current->signal->flags & SIGNAL_GROUP_EXIT) | 1759 | if (signr == 0) |
1950 | continue; | 1760 | continue; |
1951 | 1761 | ||
1952 | current->exit_code = 0; | 1762 | current->exit_code = 0; |
@@ -1990,7 +1800,7 @@ relock: | |||
1990 | continue; | 1800 | continue; |
1991 | 1801 | ||
1992 | /* Init gets no signals it doesn't want. */ | 1802 | /* Init gets no signals it doesn't want. */ |
1993 | if (current->pid == 1) | 1803 | if (current == child_reaper) |
1994 | continue; | 1804 | continue; |
1995 | 1805 | ||
1996 | if (sig_kernel_stop(signr)) { | 1806 | if (sig_kernel_stop(signr)) { |
@@ -2430,8 +2240,7 @@ sys_rt_sigqueueinfo(int pid, int sig, siginfo_t __user *uinfo) | |||
2430 | return kill_proc_info(sig, &info, pid); | 2240 | return kill_proc_info(sig, &info, pid); |
2431 | } | 2241 | } |
2432 | 2242 | ||
2433 | int | 2243 | int do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact) |
2434 | do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact) | ||
2435 | { | 2244 | { |
2436 | struct k_sigaction *k; | 2245 | struct k_sigaction *k; |
2437 | sigset_t mask; | 2246 | sigset_t mask; |
@@ -2457,6 +2266,7 @@ do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact) | |||
2457 | if (act) { | 2266 | if (act) { |
2458 | sigdelsetmask(&act->sa.sa_mask, | 2267 | sigdelsetmask(&act->sa.sa_mask, |
2459 | sigmask(SIGKILL) | sigmask(SIGSTOP)); | 2268 | sigmask(SIGKILL) | sigmask(SIGSTOP)); |
2269 | *k = *act; | ||
2460 | /* | 2270 | /* |
2461 | * POSIX 3.3.1.3: | 2271 | * POSIX 3.3.1.3: |
2462 | * "Setting a signal action to SIG_IGN for a signal that is | 2272 | * "Setting a signal action to SIG_IGN for a signal that is |
@@ -2469,19 +2279,8 @@ do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact) | |||
2469 | * be discarded, whether or not it is blocked" | 2279 | * be discarded, whether or not it is blocked" |
2470 | */ | 2280 | */ |
2471 | if (act->sa.sa_handler == SIG_IGN || | 2281 | if (act->sa.sa_handler == SIG_IGN || |
2472 | (act->sa.sa_handler == SIG_DFL && | 2282 | (act->sa.sa_handler == SIG_DFL && sig_kernel_ignore(sig))) { |
2473 | sig_kernel_ignore(sig))) { | ||
2474 | /* | ||
2475 | * This is a fairly rare case, so we only take the | ||
2476 | * tasklist_lock once we're sure we'll need it. | ||
2477 | * Now we must do this little unlock and relock | ||
2478 | * dance to maintain the lock hierarchy. | ||
2479 | */ | ||
2480 | struct task_struct *t = current; | 2283 | struct task_struct *t = current; |
2481 | spin_unlock_irq(&t->sighand->siglock); | ||
2482 | read_lock(&tasklist_lock); | ||
2483 | spin_lock_irq(&t->sighand->siglock); | ||
2484 | *k = *act; | ||
2485 | sigemptyset(&mask); | 2284 | sigemptyset(&mask); |
2486 | sigaddset(&mask, sig); | 2285 | sigaddset(&mask, sig); |
2487 | rm_from_queue_full(&mask, &t->signal->shared_pending); | 2286 | rm_from_queue_full(&mask, &t->signal->shared_pending); |
@@ -2490,12 +2289,7 @@ do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact) | |||
2490 | recalc_sigpending_tsk(t); | 2289 | recalc_sigpending_tsk(t); |
2491 | t = next_thread(t); | 2290 | t = next_thread(t); |
2492 | } while (t != current); | 2291 | } while (t != current); |
2493 | spin_unlock_irq(¤t->sighand->siglock); | ||
2494 | read_unlock(&tasklist_lock); | ||
2495 | return 0; | ||
2496 | } | 2292 | } |
2497 | |||
2498 | *k = *act; | ||
2499 | } | 2293 | } |
2500 | 2294 | ||
2501 | spin_unlock_irq(¤t->sighand->siglock); | 2295 | spin_unlock_irq(¤t->sighand->siglock); |
diff --git a/kernel/softirq.c b/kernel/softirq.c index ec8fed42a8..336f92d64e 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c | |||
@@ -446,7 +446,7 @@ static void takeover_tasklets(unsigned int cpu) | |||
446 | } | 446 | } |
447 | #endif /* CONFIG_HOTPLUG_CPU */ | 447 | #endif /* CONFIG_HOTPLUG_CPU */ |
448 | 448 | ||
449 | static int __devinit cpu_callback(struct notifier_block *nfb, | 449 | static int cpu_callback(struct notifier_block *nfb, |
450 | unsigned long action, | 450 | unsigned long action, |
451 | void *hcpu) | 451 | void *hcpu) |
452 | { | 452 | { |
@@ -484,7 +484,7 @@ static int __devinit cpu_callback(struct notifier_block *nfb, | |||
484 | return NOTIFY_OK; | 484 | return NOTIFY_OK; |
485 | } | 485 | } |
486 | 486 | ||
487 | static struct notifier_block __devinitdata cpu_nfb = { | 487 | static struct notifier_block cpu_nfb = { |
488 | .notifier_call = cpu_callback | 488 | .notifier_call = cpu_callback |
489 | }; | 489 | }; |
490 | 490 | ||
diff --git a/kernel/softlockup.c b/kernel/softlockup.c index d9b3d5847e..14c7faf029 100644 --- a/kernel/softlockup.c +++ b/kernel/softlockup.c | |||
@@ -104,7 +104,7 @@ static int watchdog(void * __bind_cpu) | |||
104 | /* | 104 | /* |
105 | * Create/destroy watchdog threads as CPUs come and go: | 105 | * Create/destroy watchdog threads as CPUs come and go: |
106 | */ | 106 | */ |
107 | static int __devinit | 107 | static int |
108 | cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) | 108 | cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) |
109 | { | 109 | { |
110 | int hotcpu = (unsigned long)hcpu; | 110 | int hotcpu = (unsigned long)hcpu; |
@@ -140,7 +140,7 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
140 | return NOTIFY_OK; | 140 | return NOTIFY_OK; |
141 | } | 141 | } |
142 | 142 | ||
143 | static struct notifier_block __devinitdata cpu_nfb = { | 143 | static struct notifier_block cpu_nfb = { |
144 | .notifier_call = cpu_callback | 144 | .notifier_call = cpu_callback |
145 | }; | 145 | }; |
146 | 146 | ||
@@ -152,5 +152,5 @@ __init void spawn_softlockup_task(void) | |||
152 | cpu_callback(&cpu_nfb, CPU_ONLINE, cpu); | 152 | cpu_callback(&cpu_nfb, CPU_ONLINE, cpu); |
153 | register_cpu_notifier(&cpu_nfb); | 153 | register_cpu_notifier(&cpu_nfb); |
154 | 154 | ||
155 | notifier_chain_register(&panic_notifier_list, &panic_block); | 155 | atomic_notifier_chain_register(&panic_notifier_list, &panic_block); |
156 | } | 156 | } |
diff --git a/kernel/sys.c b/kernel/sys.c index 38bc73ede2..0b6ec0e793 100644 --- a/kernel/sys.c +++ b/kernel/sys.c | |||
@@ -95,99 +95,304 @@ int cad_pid = 1; | |||
95 | * and the like. | 95 | * and the like. |
96 | */ | 96 | */ |
97 | 97 | ||
98 | static struct notifier_block *reboot_notifier_list; | 98 | static BLOCKING_NOTIFIER_HEAD(reboot_notifier_list); |
99 | static DEFINE_RWLOCK(notifier_lock); | 99 | |
100 | /* | ||
101 | * Notifier chain core routines. The exported routines below | ||
102 | * are layered on top of these, with appropriate locking added. | ||
103 | */ | ||
104 | |||
105 | static int notifier_chain_register(struct notifier_block **nl, | ||
106 | struct notifier_block *n) | ||
107 | { | ||
108 | while ((*nl) != NULL) { | ||
109 | if (n->priority > (*nl)->priority) | ||
110 | break; | ||
111 | nl = &((*nl)->next); | ||
112 | } | ||
113 | n->next = *nl; | ||
114 | rcu_assign_pointer(*nl, n); | ||
115 | return 0; | ||
116 | } | ||
117 | |||
118 | static int notifier_chain_unregister(struct notifier_block **nl, | ||
119 | struct notifier_block *n) | ||
120 | { | ||
121 | while ((*nl) != NULL) { | ||
122 | if ((*nl) == n) { | ||
123 | rcu_assign_pointer(*nl, n->next); | ||
124 | return 0; | ||
125 | } | ||
126 | nl = &((*nl)->next); | ||
127 | } | ||
128 | return -ENOENT; | ||
129 | } | ||
130 | |||
131 | static int __kprobes notifier_call_chain(struct notifier_block **nl, | ||
132 | unsigned long val, void *v) | ||
133 | { | ||
134 | int ret = NOTIFY_DONE; | ||
135 | struct notifier_block *nb; | ||
136 | |||
137 | nb = rcu_dereference(*nl); | ||
138 | while (nb) { | ||
139 | ret = nb->notifier_call(nb, val, v); | ||
140 | if ((ret & NOTIFY_STOP_MASK) == NOTIFY_STOP_MASK) | ||
141 | break; | ||
142 | nb = rcu_dereference(nb->next); | ||
143 | } | ||
144 | return ret; | ||
145 | } | ||
146 | |||
147 | /* | ||
148 | * Atomic notifier chain routines. Registration and unregistration | ||
149 | * use a mutex, and call_chain is synchronized by RCU (no locks). | ||
150 | */ | ||
100 | 151 | ||
101 | /** | 152 | /** |
102 | * notifier_chain_register - Add notifier to a notifier chain | 153 | * atomic_notifier_chain_register - Add notifier to an atomic notifier chain |
103 | * @list: Pointer to root list pointer | 154 | * @nh: Pointer to head of the atomic notifier chain |
104 | * @n: New entry in notifier chain | 155 | * @n: New entry in notifier chain |
105 | * | 156 | * |
106 | * Adds a notifier to a notifier chain. | 157 | * Adds a notifier to an atomic notifier chain. |
107 | * | 158 | * |
108 | * Currently always returns zero. | 159 | * Currently always returns zero. |
109 | */ | 160 | */ |
161 | |||
162 | int atomic_notifier_chain_register(struct atomic_notifier_head *nh, | ||
163 | struct notifier_block *n) | ||
164 | { | ||
165 | unsigned long flags; | ||
166 | int ret; | ||
167 | |||
168 | spin_lock_irqsave(&nh->lock, flags); | ||
169 | ret = notifier_chain_register(&nh->head, n); | ||
170 | spin_unlock_irqrestore(&nh->lock, flags); | ||
171 | return ret; | ||
172 | } | ||
173 | |||
174 | EXPORT_SYMBOL_GPL(atomic_notifier_chain_register); | ||
175 | |||
176 | /** | ||
177 | * atomic_notifier_chain_unregister - Remove notifier from an atomic notifier chain | ||
178 | * @nh: Pointer to head of the atomic notifier chain | ||
179 | * @n: Entry to remove from notifier chain | ||
180 | * | ||
181 | * Removes a notifier from an atomic notifier chain. | ||
182 | * | ||
183 | * Returns zero on success or %-ENOENT on failure. | ||
184 | */ | ||
185 | int atomic_notifier_chain_unregister(struct atomic_notifier_head *nh, | ||
186 | struct notifier_block *n) | ||
187 | { | ||
188 | unsigned long flags; | ||
189 | int ret; | ||
190 | |||
191 | spin_lock_irqsave(&nh->lock, flags); | ||
192 | ret = notifier_chain_unregister(&nh->head, n); | ||
193 | spin_unlock_irqrestore(&nh->lock, flags); | ||
194 | synchronize_rcu(); | ||
195 | return ret; | ||
196 | } | ||
197 | |||
198 | EXPORT_SYMBOL_GPL(atomic_notifier_chain_unregister); | ||
199 | |||
200 | /** | ||
201 | * atomic_notifier_call_chain - Call functions in an atomic notifier chain | ||
202 | * @nh: Pointer to head of the atomic notifier chain | ||
203 | * @val: Value passed unmodified to notifier function | ||
204 | * @v: Pointer passed unmodified to notifier function | ||
205 | * | ||
206 | * Calls each function in a notifier chain in turn. The functions | ||
207 | * run in an atomic context, so they must not block. | ||
208 | * This routine uses RCU to synchronize with changes to the chain. | ||
209 | * | ||
210 | * If the return value of the notifier can be and'ed | ||
211 | * with %NOTIFY_STOP_MASK then atomic_notifier_call_chain | ||
212 | * will return immediately, with the return value of | ||
213 | * the notifier function which halted execution. | ||
214 | * Otherwise the return value is the return value | ||
215 | * of the last notifier function called. | ||
216 | */ | ||
110 | 217 | ||
111 | int notifier_chain_register(struct notifier_block **list, struct notifier_block *n) | 218 | int atomic_notifier_call_chain(struct atomic_notifier_head *nh, |
219 | unsigned long val, void *v) | ||
112 | { | 220 | { |
113 | write_lock(¬ifier_lock); | 221 | int ret; |
114 | while(*list) | 222 | |
115 | { | 223 | rcu_read_lock(); |
116 | if(n->priority > (*list)->priority) | 224 | ret = notifier_call_chain(&nh->head, val, v); |
117 | break; | 225 | rcu_read_unlock(); |
118 | list= &((*list)->next); | 226 | return ret; |
119 | } | ||
120 | n->next = *list; | ||
121 | *list=n; | ||
122 | write_unlock(¬ifier_lock); | ||
123 | return 0; | ||
124 | } | 227 | } |
125 | 228 | ||
126 | EXPORT_SYMBOL(notifier_chain_register); | 229 | EXPORT_SYMBOL_GPL(atomic_notifier_call_chain); |
230 | |||
231 | /* | ||
232 | * Blocking notifier chain routines. All access to the chain is | ||
233 | * synchronized by an rwsem. | ||
234 | */ | ||
127 | 235 | ||
128 | /** | 236 | /** |
129 | * notifier_chain_unregister - Remove notifier from a notifier chain | 237 | * blocking_notifier_chain_register - Add notifier to a blocking notifier chain |
130 | * @nl: Pointer to root list pointer | 238 | * @nh: Pointer to head of the blocking notifier chain |
131 | * @n: New entry in notifier chain | 239 | * @n: New entry in notifier chain |
132 | * | 240 | * |
133 | * Removes a notifier from a notifier chain. | 241 | * Adds a notifier to a blocking notifier chain. |
242 | * Must be called in process context. | ||
134 | * | 243 | * |
135 | * Returns zero on success, or %-ENOENT on failure. | 244 | * Currently always returns zero. |
136 | */ | 245 | */ |
137 | 246 | ||
138 | int notifier_chain_unregister(struct notifier_block **nl, struct notifier_block *n) | 247 | int blocking_notifier_chain_register(struct blocking_notifier_head *nh, |
248 | struct notifier_block *n) | ||
139 | { | 249 | { |
140 | write_lock(¬ifier_lock); | 250 | int ret; |
141 | while((*nl)!=NULL) | 251 | |
142 | { | 252 | /* |
143 | if((*nl)==n) | 253 | * This code gets used during boot-up, when task switching is |
144 | { | 254 | * not yet working and interrupts must remain disabled. At |
145 | *nl=n->next; | 255 | * such times we must not call down_write(). |
146 | write_unlock(¬ifier_lock); | 256 | */ |
147 | return 0; | 257 | if (unlikely(system_state == SYSTEM_BOOTING)) |
148 | } | 258 | return notifier_chain_register(&nh->head, n); |
149 | nl=&((*nl)->next); | 259 | |
150 | } | 260 | down_write(&nh->rwsem); |
151 | write_unlock(¬ifier_lock); | 261 | ret = notifier_chain_register(&nh->head, n); |
152 | return -ENOENT; | 262 | up_write(&nh->rwsem); |
263 | return ret; | ||
153 | } | 264 | } |
154 | 265 | ||
155 | EXPORT_SYMBOL(notifier_chain_unregister); | 266 | EXPORT_SYMBOL_GPL(blocking_notifier_chain_register); |
156 | 267 | ||
157 | /** | 268 | /** |
158 | * notifier_call_chain - Call functions in a notifier chain | 269 | * blocking_notifier_chain_unregister - Remove notifier from a blocking notifier chain |
159 | * @n: Pointer to root pointer of notifier chain | 270 | * @nh: Pointer to head of the blocking notifier chain |
271 | * @n: Entry to remove from notifier chain | ||
272 | * | ||
273 | * Removes a notifier from a blocking notifier chain. | ||
274 | * Must be called from process context. | ||
275 | * | ||
276 | * Returns zero on success or %-ENOENT on failure. | ||
277 | */ | ||
278 | int blocking_notifier_chain_unregister(struct blocking_notifier_head *nh, | ||
279 | struct notifier_block *n) | ||
280 | { | ||
281 | int ret; | ||
282 | |||
283 | /* | ||
284 | * This code gets used during boot-up, when task switching is | ||
285 | * not yet working and interrupts must remain disabled. At | ||
286 | * such times we must not call down_write(). | ||
287 | */ | ||
288 | if (unlikely(system_state == SYSTEM_BOOTING)) | ||
289 | return notifier_chain_unregister(&nh->head, n); | ||
290 | |||
291 | down_write(&nh->rwsem); | ||
292 | ret = notifier_chain_unregister(&nh->head, n); | ||
293 | up_write(&nh->rwsem); | ||
294 | return ret; | ||
295 | } | ||
296 | |||
297 | EXPORT_SYMBOL_GPL(blocking_notifier_chain_unregister); | ||
298 | |||
299 | /** | ||
300 | * blocking_notifier_call_chain - Call functions in a blocking notifier chain | ||
301 | * @nh: Pointer to head of the blocking notifier chain | ||
160 | * @val: Value passed unmodified to notifier function | 302 | * @val: Value passed unmodified to notifier function |
161 | * @v: Pointer passed unmodified to notifier function | 303 | * @v: Pointer passed unmodified to notifier function |
162 | * | 304 | * |
163 | * Calls each function in a notifier chain in turn. | 305 | * Calls each function in a notifier chain in turn. The functions |
306 | * run in a process context, so they are allowed to block. | ||
164 | * | 307 | * |
165 | * If the return value of the notifier can be and'd | 308 | * If the return value of the notifier can be and'ed |
166 | * with %NOTIFY_STOP_MASK, then notifier_call_chain | 309 | * with %NOTIFY_STOP_MASK then blocking_notifier_call_chain |
167 | * will return immediately, with the return value of | 310 | * will return immediately, with the return value of |
168 | * the notifier function which halted execution. | 311 | * the notifier function which halted execution. |
169 | * Otherwise, the return value is the return value | 312 | * Otherwise the return value is the return value |
170 | * of the last notifier function called. | 313 | * of the last notifier function called. |
171 | */ | 314 | */ |
172 | 315 | ||
173 | int __kprobes notifier_call_chain(struct notifier_block **n, unsigned long val, void *v) | 316 | int blocking_notifier_call_chain(struct blocking_notifier_head *nh, |
317 | unsigned long val, void *v) | ||
174 | { | 318 | { |
175 | int ret=NOTIFY_DONE; | 319 | int ret; |
176 | struct notifier_block *nb = *n; | ||
177 | 320 | ||
178 | while(nb) | 321 | down_read(&nh->rwsem); |
179 | { | 322 | ret = notifier_call_chain(&nh->head, val, v); |
180 | ret=nb->notifier_call(nb,val,v); | 323 | up_read(&nh->rwsem); |
181 | if(ret&NOTIFY_STOP_MASK) | ||
182 | { | ||
183 | return ret; | ||
184 | } | ||
185 | nb=nb->next; | ||
186 | } | ||
187 | return ret; | 324 | return ret; |
188 | } | 325 | } |
189 | 326 | ||
190 | EXPORT_SYMBOL(notifier_call_chain); | 327 | EXPORT_SYMBOL_GPL(blocking_notifier_call_chain); |
328 | |||
329 | /* | ||
330 | * Raw notifier chain routines. There is no protection; | ||
331 | * the caller must provide it. Use at your own risk! | ||
332 | */ | ||
333 | |||
334 | /** | ||
335 | * raw_notifier_chain_register - Add notifier to a raw notifier chain | ||
336 | * @nh: Pointer to head of the raw notifier chain | ||
337 | * @n: New entry in notifier chain | ||
338 | * | ||
339 | * Adds a notifier to a raw notifier chain. | ||
340 | * All locking must be provided by the caller. | ||
341 | * | ||
342 | * Currently always returns zero. | ||
343 | */ | ||
344 | |||
345 | int raw_notifier_chain_register(struct raw_notifier_head *nh, | ||
346 | struct notifier_block *n) | ||
347 | { | ||
348 | return notifier_chain_register(&nh->head, n); | ||
349 | } | ||
350 | |||
351 | EXPORT_SYMBOL_GPL(raw_notifier_chain_register); | ||
352 | |||
353 | /** | ||
354 | * raw_notifier_chain_unregister - Remove notifier from a raw notifier chain | ||
355 | * @nh: Pointer to head of the raw notifier chain | ||
356 | * @n: Entry to remove from notifier chain | ||
357 | * | ||
358 | * Removes a notifier from a raw notifier chain. | ||
359 | * All locking must be provided by the caller. | ||
360 | * | ||
361 | * Returns zero on success or %-ENOENT on failure. | ||
362 | */ | ||
363 | int raw_notifier_chain_unregister(struct raw_notifier_head *nh, | ||
364 | struct notifier_block *n) | ||
365 | { | ||
366 | return notifier_chain_unregister(&nh->head, n); | ||
367 | } | ||
368 | |||
369 | EXPORT_SYMBOL_GPL(raw_notifier_chain_unregister); | ||
370 | |||
371 | /** | ||
372 | * raw_notifier_call_chain - Call functions in a raw notifier chain | ||
373 | * @nh: Pointer to head of the raw notifier chain | ||
374 | * @val: Value passed unmodified to notifier function | ||
375 | * @v: Pointer passed unmodified to notifier function | ||
376 | * | ||
377 | * Calls each function in a notifier chain in turn. The functions | ||
378 | * run in an undefined context. | ||
379 | * All locking must be provided by the caller. | ||
380 | * | ||
381 | * If the return value of the notifier can be and'ed | ||
382 | * with %NOTIFY_STOP_MASK then raw_notifier_call_chain | ||
383 | * will return immediately, with the return value of | ||
384 | * the notifier function which halted execution. | ||
385 | * Otherwise the return value is the return value | ||
386 | * of the last notifier function called. | ||
387 | */ | ||
388 | |||
389 | int raw_notifier_call_chain(struct raw_notifier_head *nh, | ||
390 | unsigned long val, void *v) | ||
391 | { | ||
392 | return notifier_call_chain(&nh->head, val, v); | ||
393 | } | ||
394 | |||
395 | EXPORT_SYMBOL_GPL(raw_notifier_call_chain); | ||
191 | 396 | ||
192 | /** | 397 | /** |
193 | * register_reboot_notifier - Register function to be called at reboot time | 398 | * register_reboot_notifier - Register function to be called at reboot time |
@@ -196,13 +401,13 @@ EXPORT_SYMBOL(notifier_call_chain); | |||
196 | * Registers a function with the list of functions | 401 | * Registers a function with the list of functions |
197 | * to be called at reboot time. | 402 | * to be called at reboot time. |
198 | * | 403 | * |
199 | * Currently always returns zero, as notifier_chain_register | 404 | * Currently always returns zero, as blocking_notifier_chain_register |
200 | * always returns zero. | 405 | * always returns zero. |
201 | */ | 406 | */ |
202 | 407 | ||
203 | int register_reboot_notifier(struct notifier_block * nb) | 408 | int register_reboot_notifier(struct notifier_block * nb) |
204 | { | 409 | { |
205 | return notifier_chain_register(&reboot_notifier_list, nb); | 410 | return blocking_notifier_chain_register(&reboot_notifier_list, nb); |
206 | } | 411 | } |
207 | 412 | ||
208 | EXPORT_SYMBOL(register_reboot_notifier); | 413 | EXPORT_SYMBOL(register_reboot_notifier); |
@@ -219,7 +424,7 @@ EXPORT_SYMBOL(register_reboot_notifier); | |||
219 | 424 | ||
220 | int unregister_reboot_notifier(struct notifier_block * nb) | 425 | int unregister_reboot_notifier(struct notifier_block * nb) |
221 | { | 426 | { |
222 | return notifier_chain_unregister(&reboot_notifier_list, nb); | 427 | return blocking_notifier_chain_unregister(&reboot_notifier_list, nb); |
223 | } | 428 | } |
224 | 429 | ||
225 | EXPORT_SYMBOL(unregister_reboot_notifier); | 430 | EXPORT_SYMBOL(unregister_reboot_notifier); |
@@ -380,7 +585,7 @@ EXPORT_SYMBOL_GPL(emergency_restart); | |||
380 | 585 | ||
381 | void kernel_restart_prepare(char *cmd) | 586 | void kernel_restart_prepare(char *cmd) |
382 | { | 587 | { |
383 | notifier_call_chain(&reboot_notifier_list, SYS_RESTART, cmd); | 588 | blocking_notifier_call_chain(&reboot_notifier_list, SYS_RESTART, cmd); |
384 | system_state = SYSTEM_RESTART; | 589 | system_state = SYSTEM_RESTART; |
385 | device_shutdown(); | 590 | device_shutdown(); |
386 | } | 591 | } |
@@ -430,7 +635,7 @@ EXPORT_SYMBOL_GPL(kernel_kexec); | |||
430 | 635 | ||
431 | void kernel_shutdown_prepare(enum system_states state) | 636 | void kernel_shutdown_prepare(enum system_states state) |
432 | { | 637 | { |
433 | notifier_call_chain(&reboot_notifier_list, | 638 | blocking_notifier_call_chain(&reboot_notifier_list, |
434 | (state == SYSTEM_HALT)?SYS_HALT:SYS_POWER_OFF, NULL); | 639 | (state == SYSTEM_HALT)?SYS_HALT:SYS_POWER_OFF, NULL); |
435 | system_state = state; | 640 | system_state = state; |
436 | device_shutdown(); | 641 | device_shutdown(); |
@@ -997,69 +1202,24 @@ asmlinkage long sys_times(struct tms __user * tbuf) | |||
997 | */ | 1202 | */ |
998 | if (tbuf) { | 1203 | if (tbuf) { |
999 | struct tms tmp; | 1204 | struct tms tmp; |
1205 | struct task_struct *tsk = current; | ||
1206 | struct task_struct *t; | ||
1000 | cputime_t utime, stime, cutime, cstime; | 1207 | cputime_t utime, stime, cutime, cstime; |
1001 | 1208 | ||
1002 | #ifdef CONFIG_SMP | 1209 | spin_lock_irq(&tsk->sighand->siglock); |
1003 | if (thread_group_empty(current)) { | 1210 | utime = tsk->signal->utime; |
1004 | /* | 1211 | stime = tsk->signal->stime; |
1005 | * Single thread case without the use of any locks. | 1212 | t = tsk; |
1006 | * | 1213 | do { |
1007 | * We may race with release_task if two threads are | 1214 | utime = cputime_add(utime, t->utime); |
1008 | * executing. However, release task first adds up the | 1215 | stime = cputime_add(stime, t->stime); |
1009 | * counters (__exit_signal) before removing the task | 1216 | t = next_thread(t); |
1010 | * from the process tasklist (__unhash_process). | 1217 | } while (t != tsk); |
1011 | * __exit_signal also acquires and releases the | ||
1012 | * siglock which results in the proper memory ordering | ||
1013 | * so that the list modifications are always visible | ||
1014 | * after the counters have been updated. | ||
1015 | * | ||
1016 | * If the counters have been updated by the second thread | ||
1017 | * but the thread has not yet been removed from the list | ||
1018 | * then the other branch will be executing which will | ||
1019 | * block on tasklist_lock until the exit handling of the | ||
1020 | * other task is finished. | ||
1021 | * | ||
1022 | * This also implies that the sighand->siglock cannot | ||
1023 | * be held by another processor. So we can also | ||
1024 | * skip acquiring that lock. | ||
1025 | */ | ||
1026 | utime = cputime_add(current->signal->utime, current->utime); | ||
1027 | stime = cputime_add(current->signal->utime, current->stime); | ||
1028 | cutime = current->signal->cutime; | ||
1029 | cstime = current->signal->cstime; | ||
1030 | } else | ||
1031 | #endif | ||
1032 | { | ||
1033 | |||
1034 | /* Process with multiple threads */ | ||
1035 | struct task_struct *tsk = current; | ||
1036 | struct task_struct *t; | ||
1037 | 1218 | ||
1038 | read_lock(&tasklist_lock); | 1219 | cutime = tsk->signal->cutime; |
1039 | utime = tsk->signal->utime; | 1220 | cstime = tsk->signal->cstime; |
1040 | stime = tsk->signal->stime; | 1221 | spin_unlock_irq(&tsk->sighand->siglock); |
1041 | t = tsk; | ||
1042 | do { | ||
1043 | utime = cputime_add(utime, t->utime); | ||
1044 | stime = cputime_add(stime, t->stime); | ||
1045 | t = next_thread(t); | ||
1046 | } while (t != tsk); | ||
1047 | 1222 | ||
1048 | /* | ||
1049 | * While we have tasklist_lock read-locked, no dying thread | ||
1050 | * can be updating current->signal->[us]time. Instead, | ||
1051 | * we got their counts included in the live thread loop. | ||
1052 | * However, another thread can come in right now and | ||
1053 | * do a wait call that updates current->signal->c[us]time. | ||
1054 | * To make sure we always see that pair updated atomically, | ||
1055 | * we take the siglock around fetching them. | ||
1056 | */ | ||
1057 | spin_lock_irq(&tsk->sighand->siglock); | ||
1058 | cutime = tsk->signal->cutime; | ||
1059 | cstime = tsk->signal->cstime; | ||
1060 | spin_unlock_irq(&tsk->sighand->siglock); | ||
1061 | read_unlock(&tasklist_lock); | ||
1062 | } | ||
1063 | tmp.tms_utime = cputime_to_clock_t(utime); | 1223 | tmp.tms_utime = cputime_to_clock_t(utime); |
1064 | tmp.tms_stime = cputime_to_clock_t(stime); | 1224 | tmp.tms_stime = cputime_to_clock_t(stime); |
1065 | tmp.tms_cutime = cputime_to_clock_t(cutime); | 1225 | tmp.tms_cutime = cputime_to_clock_t(cutime); |
@@ -1212,18 +1372,29 @@ asmlinkage long sys_getsid(pid_t pid) | |||
1212 | asmlinkage long sys_setsid(void) | 1372 | asmlinkage long sys_setsid(void) |
1213 | { | 1373 | { |
1214 | struct task_struct *group_leader = current->group_leader; | 1374 | struct task_struct *group_leader = current->group_leader; |
1215 | struct pid *pid; | 1375 | pid_t session; |
1216 | int err = -EPERM; | 1376 | int err = -EPERM; |
1217 | 1377 | ||
1218 | mutex_lock(&tty_mutex); | 1378 | mutex_lock(&tty_mutex); |
1219 | write_lock_irq(&tasklist_lock); | 1379 | write_lock_irq(&tasklist_lock); |
1220 | 1380 | ||
1221 | pid = find_pid(PIDTYPE_PGID, group_leader->pid); | 1381 | /* Fail if I am already a session leader */ |
1222 | if (pid) | 1382 | if (group_leader->signal->leader) |
1383 | goto out; | ||
1384 | |||
1385 | session = group_leader->pid; | ||
1386 | /* Fail if a process group id already exists that equals the | ||
1387 | * proposed session id. | ||
1388 | * | ||
1389 | * Don't check if session id == 1 because kernel threads use this | ||
1390 | * session id and so the check will always fail and make it so | ||
1391 | * init cannot successfully call setsid. | ||
1392 | */ | ||
1393 | if (session > 1 && find_task_by_pid_type(PIDTYPE_PGID, session)) | ||
1223 | goto out; | 1394 | goto out; |
1224 | 1395 | ||
1225 | group_leader->signal->leader = 1; | 1396 | group_leader->signal->leader = 1; |
1226 | __set_special_pids(group_leader->pid, group_leader->pid); | 1397 | __set_special_pids(session, session); |
1227 | group_leader->signal->tty = NULL; | 1398 | group_leader->signal->tty = NULL; |
1228 | group_leader->signal->tty_old_pgrp = 0; | 1399 | group_leader->signal->tty_old_pgrp = 0; |
1229 | err = process_group(group_leader); | 1400 | err = process_group(group_leader); |
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index 1067090db6..5433195040 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c | |||
@@ -42,6 +42,10 @@ cond_syscall(sys_recvmsg); | |||
42 | cond_syscall(sys_socketcall); | 42 | cond_syscall(sys_socketcall); |
43 | cond_syscall(sys_futex); | 43 | cond_syscall(sys_futex); |
44 | cond_syscall(compat_sys_futex); | 44 | cond_syscall(compat_sys_futex); |
45 | cond_syscall(sys_set_robust_list); | ||
46 | cond_syscall(compat_sys_set_robust_list); | ||
47 | cond_syscall(sys_get_robust_list); | ||
48 | cond_syscall(compat_sys_get_robust_list); | ||
45 | cond_syscall(sys_epoll_create); | 49 | cond_syscall(sys_epoll_create); |
46 | cond_syscall(sys_epoll_ctl); | 50 | cond_syscall(sys_epoll_ctl); |
47 | cond_syscall(sys_epoll_wait); | 51 | cond_syscall(sys_epoll_wait); |
@@ -116,3 +120,15 @@ cond_syscall(sys32_sysctl); | |||
116 | cond_syscall(ppc_rtas); | 120 | cond_syscall(ppc_rtas); |
117 | cond_syscall(sys_spu_run); | 121 | cond_syscall(sys_spu_run); |
118 | cond_syscall(sys_spu_create); | 122 | cond_syscall(sys_spu_create); |
123 | |||
124 | /* mmu depending weak syscall entries */ | ||
125 | cond_syscall(sys_mprotect); | ||
126 | cond_syscall(sys_msync); | ||
127 | cond_syscall(sys_mlock); | ||
128 | cond_syscall(sys_munlock); | ||
129 | cond_syscall(sys_mlockall); | ||
130 | cond_syscall(sys_munlockall); | ||
131 | cond_syscall(sys_mincore); | ||
132 | cond_syscall(sys_madvise); | ||
133 | cond_syscall(sys_mremap); | ||
134 | cond_syscall(sys_remap_file_pages); | ||
diff --git a/kernel/time.c b/kernel/time.c index ff8e7019c4..b00ddc71ce 100644 --- a/kernel/time.c +++ b/kernel/time.c | |||
@@ -410,7 +410,7 @@ EXPORT_SYMBOL(current_kernel_time); | |||
410 | * current_fs_time - Return FS time | 410 | * current_fs_time - Return FS time |
411 | * @sb: Superblock. | 411 | * @sb: Superblock. |
412 | * | 412 | * |
413 | * Return the current time truncated to the time granuality supported by | 413 | * Return the current time truncated to the time granularity supported by |
414 | * the fs. | 414 | * the fs. |
415 | */ | 415 | */ |
416 | struct timespec current_fs_time(struct super_block *sb) | 416 | struct timespec current_fs_time(struct super_block *sb) |
@@ -421,11 +421,11 @@ struct timespec current_fs_time(struct super_block *sb) | |||
421 | EXPORT_SYMBOL(current_fs_time); | 421 | EXPORT_SYMBOL(current_fs_time); |
422 | 422 | ||
423 | /** | 423 | /** |
424 | * timespec_trunc - Truncate timespec to a granuality | 424 | * timespec_trunc - Truncate timespec to a granularity |
425 | * @t: Timespec | 425 | * @t: Timespec |
426 | * @gran: Granuality in ns. | 426 | * @gran: Granularity in ns. |
427 | * | 427 | * |
428 | * Truncate a timespec to a granuality. gran must be smaller than a second. | 428 | * Truncate a timespec to a granularity. gran must be smaller than a second. |
429 | * Always rounds down. | 429 | * Always rounds down. |
430 | * | 430 | * |
431 | * This function should be only used for timestamps returned by | 431 | * This function should be only used for timestamps returned by |
diff --git a/kernel/timer.c b/kernel/timer.c index ab189dd187..9e49deed46 100644 --- a/kernel/timer.c +++ b/kernel/timer.c | |||
@@ -54,7 +54,6 @@ EXPORT_SYMBOL(jiffies_64); | |||
54 | /* | 54 | /* |
55 | * per-CPU timer vector definitions: | 55 | * per-CPU timer vector definitions: |
56 | */ | 56 | */ |
57 | |||
58 | #define TVN_BITS (CONFIG_BASE_SMALL ? 4 : 6) | 57 | #define TVN_BITS (CONFIG_BASE_SMALL ? 4 : 6) |
59 | #define TVR_BITS (CONFIG_BASE_SMALL ? 6 : 8) | 58 | #define TVR_BITS (CONFIG_BASE_SMALL ? 6 : 8) |
60 | #define TVN_SIZE (1 << TVN_BITS) | 59 | #define TVN_SIZE (1 << TVN_BITS) |
@@ -62,11 +61,6 @@ EXPORT_SYMBOL(jiffies_64); | |||
62 | #define TVN_MASK (TVN_SIZE - 1) | 61 | #define TVN_MASK (TVN_SIZE - 1) |
63 | #define TVR_MASK (TVR_SIZE - 1) | 62 | #define TVR_MASK (TVR_SIZE - 1) |
64 | 63 | ||
65 | struct timer_base_s { | ||
66 | spinlock_t lock; | ||
67 | struct timer_list *running_timer; | ||
68 | }; | ||
69 | |||
70 | typedef struct tvec_s { | 64 | typedef struct tvec_s { |
71 | struct list_head vec[TVN_SIZE]; | 65 | struct list_head vec[TVN_SIZE]; |
72 | } tvec_t; | 66 | } tvec_t; |
@@ -76,7 +70,8 @@ typedef struct tvec_root_s { | |||
76 | } tvec_root_t; | 70 | } tvec_root_t; |
77 | 71 | ||
78 | struct tvec_t_base_s { | 72 | struct tvec_t_base_s { |
79 | struct timer_base_s t_base; | 73 | spinlock_t lock; |
74 | struct timer_list *running_timer; | ||
80 | unsigned long timer_jiffies; | 75 | unsigned long timer_jiffies; |
81 | tvec_root_t tv1; | 76 | tvec_root_t tv1; |
82 | tvec_t tv2; | 77 | tvec_t tv2; |
@@ -86,14 +81,16 @@ struct tvec_t_base_s { | |||
86 | } ____cacheline_aligned_in_smp; | 81 | } ____cacheline_aligned_in_smp; |
87 | 82 | ||
88 | typedef struct tvec_t_base_s tvec_base_t; | 83 | typedef struct tvec_t_base_s tvec_base_t; |
89 | static DEFINE_PER_CPU(tvec_base_t *, tvec_bases); | 84 | |
90 | static tvec_base_t boot_tvec_bases; | 85 | tvec_base_t boot_tvec_bases; |
86 | EXPORT_SYMBOL(boot_tvec_bases); | ||
87 | static DEFINE_PER_CPU(tvec_base_t *, tvec_bases) = { &boot_tvec_bases }; | ||
91 | 88 | ||
92 | static inline void set_running_timer(tvec_base_t *base, | 89 | static inline void set_running_timer(tvec_base_t *base, |
93 | struct timer_list *timer) | 90 | struct timer_list *timer) |
94 | { | 91 | { |
95 | #ifdef CONFIG_SMP | 92 | #ifdef CONFIG_SMP |
96 | base->t_base.running_timer = timer; | 93 | base->running_timer = timer; |
97 | #endif | 94 | #endif |
98 | } | 95 | } |
99 | 96 | ||
@@ -139,15 +136,6 @@ static void internal_add_timer(tvec_base_t *base, struct timer_list *timer) | |||
139 | list_add_tail(&timer->entry, vec); | 136 | list_add_tail(&timer->entry, vec); |
140 | } | 137 | } |
141 | 138 | ||
142 | typedef struct timer_base_s timer_base_t; | ||
143 | /* | ||
144 | * Used by TIMER_INITIALIZER, we can't use per_cpu(tvec_bases) | ||
145 | * at compile time, and we need timer->base to lock the timer. | ||
146 | */ | ||
147 | timer_base_t __init_timer_base | ||
148 | ____cacheline_aligned_in_smp = { .lock = SPIN_LOCK_UNLOCKED }; | ||
149 | EXPORT_SYMBOL(__init_timer_base); | ||
150 | |||
151 | /*** | 139 | /*** |
152 | * init_timer - initialize a timer. | 140 | * init_timer - initialize a timer. |
153 | * @timer: the timer to be initialized | 141 | * @timer: the timer to be initialized |
@@ -158,7 +146,7 @@ EXPORT_SYMBOL(__init_timer_base); | |||
158 | void fastcall init_timer(struct timer_list *timer) | 146 | void fastcall init_timer(struct timer_list *timer) |
159 | { | 147 | { |
160 | timer->entry.next = NULL; | 148 | timer->entry.next = NULL; |
161 | timer->base = &per_cpu(tvec_bases, raw_smp_processor_id())->t_base; | 149 | timer->base = per_cpu(tvec_bases, raw_smp_processor_id()); |
162 | } | 150 | } |
163 | EXPORT_SYMBOL(init_timer); | 151 | EXPORT_SYMBOL(init_timer); |
164 | 152 | ||
@@ -174,7 +162,7 @@ static inline void detach_timer(struct timer_list *timer, | |||
174 | } | 162 | } |
175 | 163 | ||
176 | /* | 164 | /* |
177 | * We are using hashed locking: holding per_cpu(tvec_bases).t_base.lock | 165 | * We are using hashed locking: holding per_cpu(tvec_bases).lock |
178 | * means that all timers which are tied to this base via timer->base are | 166 | * means that all timers which are tied to this base via timer->base are |
179 | * locked, and the base itself is locked too. | 167 | * locked, and the base itself is locked too. |
180 | * | 168 | * |
@@ -185,10 +173,10 @@ static inline void detach_timer(struct timer_list *timer, | |||
185 | * possible to set timer->base = NULL and drop the lock: the timer remains | 173 | * possible to set timer->base = NULL and drop the lock: the timer remains |
186 | * locked. | 174 | * locked. |
187 | */ | 175 | */ |
188 | static timer_base_t *lock_timer_base(struct timer_list *timer, | 176 | static tvec_base_t *lock_timer_base(struct timer_list *timer, |
189 | unsigned long *flags) | 177 | unsigned long *flags) |
190 | { | 178 | { |
191 | timer_base_t *base; | 179 | tvec_base_t *base; |
192 | 180 | ||
193 | for (;;) { | 181 | for (;;) { |
194 | base = timer->base; | 182 | base = timer->base; |
@@ -205,8 +193,7 @@ static timer_base_t *lock_timer_base(struct timer_list *timer, | |||
205 | 193 | ||
206 | int __mod_timer(struct timer_list *timer, unsigned long expires) | 194 | int __mod_timer(struct timer_list *timer, unsigned long expires) |
207 | { | 195 | { |
208 | timer_base_t *base; | 196 | tvec_base_t *base, *new_base; |
209 | tvec_base_t *new_base; | ||
210 | unsigned long flags; | 197 | unsigned long flags; |
211 | int ret = 0; | 198 | int ret = 0; |
212 | 199 | ||
@@ -221,7 +208,7 @@ int __mod_timer(struct timer_list *timer, unsigned long expires) | |||
221 | 208 | ||
222 | new_base = __get_cpu_var(tvec_bases); | 209 | new_base = __get_cpu_var(tvec_bases); |
223 | 210 | ||
224 | if (base != &new_base->t_base) { | 211 | if (base != new_base) { |
225 | /* | 212 | /* |
226 | * We are trying to schedule the timer on the local CPU. | 213 | * We are trying to schedule the timer on the local CPU. |
227 | * However we can't change timer's base while it is running, | 214 | * However we can't change timer's base while it is running, |
@@ -229,21 +216,19 @@ int __mod_timer(struct timer_list *timer, unsigned long expires) | |||
229 | * handler yet has not finished. This also guarantees that | 216 | * handler yet has not finished. This also guarantees that |
230 | * the timer is serialized wrt itself. | 217 | * the timer is serialized wrt itself. |
231 | */ | 218 | */ |
232 | if (unlikely(base->running_timer == timer)) { | 219 | if (likely(base->running_timer != timer)) { |
233 | /* The timer remains on a former base */ | ||
234 | new_base = container_of(base, tvec_base_t, t_base); | ||
235 | } else { | ||
236 | /* See the comment in lock_timer_base() */ | 220 | /* See the comment in lock_timer_base() */ |
237 | timer->base = NULL; | 221 | timer->base = NULL; |
238 | spin_unlock(&base->lock); | 222 | spin_unlock(&base->lock); |
239 | spin_lock(&new_base->t_base.lock); | 223 | base = new_base; |
240 | timer->base = &new_base->t_base; | 224 | spin_lock(&base->lock); |
225 | timer->base = base; | ||
241 | } | 226 | } |
242 | } | 227 | } |
243 | 228 | ||
244 | timer->expires = expires; | 229 | timer->expires = expires; |
245 | internal_add_timer(new_base, timer); | 230 | internal_add_timer(base, timer); |
246 | spin_unlock_irqrestore(&new_base->t_base.lock, flags); | 231 | spin_unlock_irqrestore(&base->lock, flags); |
247 | 232 | ||
248 | return ret; | 233 | return ret; |
249 | } | 234 | } |
@@ -263,10 +248,10 @@ void add_timer_on(struct timer_list *timer, int cpu) | |||
263 | unsigned long flags; | 248 | unsigned long flags; |
264 | 249 | ||
265 | BUG_ON(timer_pending(timer) || !timer->function); | 250 | BUG_ON(timer_pending(timer) || !timer->function); |
266 | spin_lock_irqsave(&base->t_base.lock, flags); | 251 | spin_lock_irqsave(&base->lock, flags); |
267 | timer->base = &base->t_base; | 252 | timer->base = base; |
268 | internal_add_timer(base, timer); | 253 | internal_add_timer(base, timer); |
269 | spin_unlock_irqrestore(&base->t_base.lock, flags); | 254 | spin_unlock_irqrestore(&base->lock, flags); |
270 | } | 255 | } |
271 | 256 | ||
272 | 257 | ||
@@ -319,7 +304,7 @@ EXPORT_SYMBOL(mod_timer); | |||
319 | */ | 304 | */ |
320 | int del_timer(struct timer_list *timer) | 305 | int del_timer(struct timer_list *timer) |
321 | { | 306 | { |
322 | timer_base_t *base; | 307 | tvec_base_t *base; |
323 | unsigned long flags; | 308 | unsigned long flags; |
324 | int ret = 0; | 309 | int ret = 0; |
325 | 310 | ||
@@ -346,7 +331,7 @@ EXPORT_SYMBOL(del_timer); | |||
346 | */ | 331 | */ |
347 | int try_to_del_timer_sync(struct timer_list *timer) | 332 | int try_to_del_timer_sync(struct timer_list *timer) |
348 | { | 333 | { |
349 | timer_base_t *base; | 334 | tvec_base_t *base; |
350 | unsigned long flags; | 335 | unsigned long flags; |
351 | int ret = -1; | 336 | int ret = -1; |
352 | 337 | ||
@@ -410,7 +395,7 @@ static int cascade(tvec_base_t *base, tvec_t *tv, int index) | |||
410 | struct timer_list *tmp; | 395 | struct timer_list *tmp; |
411 | 396 | ||
412 | tmp = list_entry(curr, struct timer_list, entry); | 397 | tmp = list_entry(curr, struct timer_list, entry); |
413 | BUG_ON(tmp->base != &base->t_base); | 398 | BUG_ON(tmp->base != base); |
414 | curr = curr->next; | 399 | curr = curr->next; |
415 | internal_add_timer(base, tmp); | 400 | internal_add_timer(base, tmp); |
416 | } | 401 | } |
@@ -432,7 +417,7 @@ static inline void __run_timers(tvec_base_t *base) | |||
432 | { | 417 | { |
433 | struct timer_list *timer; | 418 | struct timer_list *timer; |
434 | 419 | ||
435 | spin_lock_irq(&base->t_base.lock); | 420 | spin_lock_irq(&base->lock); |
436 | while (time_after_eq(jiffies, base->timer_jiffies)) { | 421 | while (time_after_eq(jiffies, base->timer_jiffies)) { |
437 | struct list_head work_list = LIST_HEAD_INIT(work_list); | 422 | struct list_head work_list = LIST_HEAD_INIT(work_list); |
438 | struct list_head *head = &work_list; | 423 | struct list_head *head = &work_list; |
@@ -458,7 +443,7 @@ static inline void __run_timers(tvec_base_t *base) | |||
458 | 443 | ||
459 | set_running_timer(base, timer); | 444 | set_running_timer(base, timer); |
460 | detach_timer(timer, 1); | 445 | detach_timer(timer, 1); |
461 | spin_unlock_irq(&base->t_base.lock); | 446 | spin_unlock_irq(&base->lock); |
462 | { | 447 | { |
463 | int preempt_count = preempt_count(); | 448 | int preempt_count = preempt_count(); |
464 | fn(data); | 449 | fn(data); |
@@ -471,11 +456,11 @@ static inline void __run_timers(tvec_base_t *base) | |||
471 | BUG(); | 456 | BUG(); |
472 | } | 457 | } |
473 | } | 458 | } |
474 | spin_lock_irq(&base->t_base.lock); | 459 | spin_lock_irq(&base->lock); |
475 | } | 460 | } |
476 | } | 461 | } |
477 | set_running_timer(base, NULL); | 462 | set_running_timer(base, NULL); |
478 | spin_unlock_irq(&base->t_base.lock); | 463 | spin_unlock_irq(&base->lock); |
479 | } | 464 | } |
480 | 465 | ||
481 | #ifdef CONFIG_NO_IDLE_HZ | 466 | #ifdef CONFIG_NO_IDLE_HZ |
@@ -506,7 +491,7 @@ unsigned long next_timer_interrupt(void) | |||
506 | hr_expires += jiffies; | 491 | hr_expires += jiffies; |
507 | 492 | ||
508 | base = __get_cpu_var(tvec_bases); | 493 | base = __get_cpu_var(tvec_bases); |
509 | spin_lock(&base->t_base.lock); | 494 | spin_lock(&base->lock); |
510 | expires = base->timer_jiffies + (LONG_MAX >> 1); | 495 | expires = base->timer_jiffies + (LONG_MAX >> 1); |
511 | list = NULL; | 496 | list = NULL; |
512 | 497 | ||
@@ -554,7 +539,23 @@ found: | |||
554 | expires = nte->expires; | 539 | expires = nte->expires; |
555 | } | 540 | } |
556 | } | 541 | } |
557 | spin_unlock(&base->t_base.lock); | 542 | spin_unlock(&base->lock); |
543 | |||
544 | /* | ||
545 | * It can happen that other CPUs service timer IRQs and increment | ||
546 | * jiffies, but we have not yet got a local timer tick to process | ||
547 | * the timer wheels. In that case, the expiry time can be before | ||
548 | * jiffies, but since the high-resolution timer here is relative to | ||
549 | * jiffies, the default expression when high-resolution timers are | ||
550 | * not active, | ||
551 | * | ||
552 | * time_before(MAX_JIFFY_OFFSET + jiffies, expires) | ||
553 | * | ||
554 | * would falsely evaluate to true. If that is the case, just | ||
555 | * return jiffies so that we can immediately fire the local timer | ||
556 | */ | ||
557 | if (time_before(expires, jiffies)) | ||
558 | return jiffies; | ||
558 | 559 | ||
559 | if (time_before(hr_expires, expires)) | 560 | if (time_before(hr_expires, expires)) |
560 | return hr_expires; | 561 | return hr_expires; |
@@ -841,7 +842,7 @@ void update_process_times(int user_tick) | |||
841 | */ | 842 | */ |
842 | static unsigned long count_active_tasks(void) | 843 | static unsigned long count_active_tasks(void) |
843 | { | 844 | { |
844 | return (nr_running() + nr_uninterruptible()) * FIXED_1; | 845 | return nr_active() * FIXED_1; |
845 | } | 846 | } |
846 | 847 | ||
847 | /* | 848 | /* |
@@ -1240,29 +1241,37 @@ static int __devinit init_timers_cpu(int cpu) | |||
1240 | { | 1241 | { |
1241 | int j; | 1242 | int j; |
1242 | tvec_base_t *base; | 1243 | tvec_base_t *base; |
1244 | static char __devinitdata tvec_base_done[NR_CPUS]; | ||
1243 | 1245 | ||
1244 | base = per_cpu(tvec_bases, cpu); | 1246 | if (!tvec_base_done[cpu]) { |
1245 | if (!base) { | ||
1246 | static char boot_done; | 1247 | static char boot_done; |
1247 | 1248 | ||
1248 | /* | ||
1249 | * Cannot do allocation in init_timers as that runs before the | ||
1250 | * allocator initializes (and would waste memory if there are | ||
1251 | * more possible CPUs than will ever be installed/brought up). | ||
1252 | */ | ||
1253 | if (boot_done) { | 1249 | if (boot_done) { |
1250 | /* | ||
1251 | * The APs use this path later in boot | ||
1252 | */ | ||
1254 | base = kmalloc_node(sizeof(*base), GFP_KERNEL, | 1253 | base = kmalloc_node(sizeof(*base), GFP_KERNEL, |
1255 | cpu_to_node(cpu)); | 1254 | cpu_to_node(cpu)); |
1256 | if (!base) | 1255 | if (!base) |
1257 | return -ENOMEM; | 1256 | return -ENOMEM; |
1258 | memset(base, 0, sizeof(*base)); | 1257 | memset(base, 0, sizeof(*base)); |
1258 | per_cpu(tvec_bases, cpu) = base; | ||
1259 | } else { | 1259 | } else { |
1260 | base = &boot_tvec_bases; | 1260 | /* |
1261 | * This is for the boot CPU - we use compile-time | ||
1262 | * static initialisation because per-cpu memory isn't | ||
1263 | * ready yet and because the memory allocators are not | ||
1264 | * initialised either. | ||
1265 | */ | ||
1261 | boot_done = 1; | 1266 | boot_done = 1; |
1267 | base = &boot_tvec_bases; | ||
1262 | } | 1268 | } |
1263 | per_cpu(tvec_bases, cpu) = base; | 1269 | tvec_base_done[cpu] = 1; |
1270 | } else { | ||
1271 | base = per_cpu(tvec_bases, cpu); | ||
1264 | } | 1272 | } |
1265 | spin_lock_init(&base->t_base.lock); | 1273 | |
1274 | spin_lock_init(&base->lock); | ||
1266 | for (j = 0; j < TVN_SIZE; j++) { | 1275 | for (j = 0; j < TVN_SIZE; j++) { |
1267 | INIT_LIST_HEAD(base->tv5.vec + j); | 1276 | INIT_LIST_HEAD(base->tv5.vec + j); |
1268 | INIT_LIST_HEAD(base->tv4.vec + j); | 1277 | INIT_LIST_HEAD(base->tv4.vec + j); |
@@ -1284,7 +1293,7 @@ static void migrate_timer_list(tvec_base_t *new_base, struct list_head *head) | |||
1284 | while (!list_empty(head)) { | 1293 | while (!list_empty(head)) { |
1285 | timer = list_entry(head->next, struct timer_list, entry); | 1294 | timer = list_entry(head->next, struct timer_list, entry); |
1286 | detach_timer(timer, 0); | 1295 | detach_timer(timer, 0); |
1287 | timer->base = &new_base->t_base; | 1296 | timer->base = new_base; |
1288 | internal_add_timer(new_base, timer); | 1297 | internal_add_timer(new_base, timer); |
1289 | } | 1298 | } |
1290 | } | 1299 | } |
@@ -1300,11 +1309,11 @@ static void __devinit migrate_timers(int cpu) | |||
1300 | new_base = get_cpu_var(tvec_bases); | 1309 | new_base = get_cpu_var(tvec_bases); |
1301 | 1310 | ||
1302 | local_irq_disable(); | 1311 | local_irq_disable(); |
1303 | spin_lock(&new_base->t_base.lock); | 1312 | spin_lock(&new_base->lock); |
1304 | spin_lock(&old_base->t_base.lock); | 1313 | spin_lock(&old_base->lock); |
1314 | |||
1315 | BUG_ON(old_base->running_timer); | ||
1305 | 1316 | ||
1306 | if (old_base->t_base.running_timer) | ||
1307 | BUG(); | ||
1308 | for (i = 0; i < TVR_SIZE; i++) | 1317 | for (i = 0; i < TVR_SIZE; i++) |
1309 | migrate_timer_list(new_base, old_base->tv1.vec + i); | 1318 | migrate_timer_list(new_base, old_base->tv1.vec + i); |
1310 | for (i = 0; i < TVN_SIZE; i++) { | 1319 | for (i = 0; i < TVN_SIZE; i++) { |
@@ -1314,14 +1323,14 @@ static void __devinit migrate_timers(int cpu) | |||
1314 | migrate_timer_list(new_base, old_base->tv5.vec + i); | 1323 | migrate_timer_list(new_base, old_base->tv5.vec + i); |
1315 | } | 1324 | } |
1316 | 1325 | ||
1317 | spin_unlock(&old_base->t_base.lock); | 1326 | spin_unlock(&old_base->lock); |
1318 | spin_unlock(&new_base->t_base.lock); | 1327 | spin_unlock(&new_base->lock); |
1319 | local_irq_enable(); | 1328 | local_irq_enable(); |
1320 | put_cpu_var(tvec_bases); | 1329 | put_cpu_var(tvec_bases); |
1321 | } | 1330 | } |
1322 | #endif /* CONFIG_HOTPLUG_CPU */ | 1331 | #endif /* CONFIG_HOTPLUG_CPU */ |
1323 | 1332 | ||
1324 | static int __devinit timer_cpu_notify(struct notifier_block *self, | 1333 | static int timer_cpu_notify(struct notifier_block *self, |
1325 | unsigned long action, void *hcpu) | 1334 | unsigned long action, void *hcpu) |
1326 | { | 1335 | { |
1327 | long cpu = (long)hcpu; | 1336 | long cpu = (long)hcpu; |
@@ -1341,7 +1350,7 @@ static int __devinit timer_cpu_notify(struct notifier_block *self, | |||
1341 | return NOTIFY_OK; | 1350 | return NOTIFY_OK; |
1342 | } | 1351 | } |
1343 | 1352 | ||
1344 | static struct notifier_block __devinitdata timers_nb = { | 1353 | static struct notifier_block timers_nb = { |
1345 | .notifier_call = timer_cpu_notify, | 1354 | .notifier_call = timer_cpu_notify, |
1346 | }; | 1355 | }; |
1347 | 1356 | ||
@@ -1471,7 +1480,7 @@ static void time_interpolator_update(long delta_nsec) | |||
1471 | */ | 1480 | */ |
1472 | if (jiffies % INTERPOLATOR_ADJUST == 0) | 1481 | if (jiffies % INTERPOLATOR_ADJUST == 0) |
1473 | { | 1482 | { |
1474 | if (time_interpolator->skips == 0 && time_interpolator->offset > TICK_NSEC) | 1483 | if (time_interpolator->skips == 0 && time_interpolator->offset > tick_nsec) |
1475 | time_interpolator->nsec_per_cyc--; | 1484 | time_interpolator->nsec_per_cyc--; |
1476 | if (time_interpolator->ns_skipped > INTERPOLATOR_MAX_SKIP && time_interpolator->offset == 0) | 1485 | if (time_interpolator->ns_skipped > INTERPOLATOR_MAX_SKIP && time_interpolator->offset == 0) |
1477 | time_interpolator->nsec_per_cyc++; | 1486 | time_interpolator->nsec_per_cyc++; |
@@ -1495,8 +1504,7 @@ register_time_interpolator(struct time_interpolator *ti) | |||
1495 | unsigned long flags; | 1504 | unsigned long flags; |
1496 | 1505 | ||
1497 | /* Sanity check */ | 1506 | /* Sanity check */ |
1498 | if (ti->frequency == 0 || ti->mask == 0) | 1507 | BUG_ON(ti->frequency == 0 || ti->mask == 0); |
1499 | BUG(); | ||
1500 | 1508 | ||
1501 | ti->nsec_per_cyc = ((u64)NSEC_PER_SEC << ti->shift) / ti->frequency; | 1509 | ti->nsec_per_cyc = ((u64)NSEC_PER_SEC << ti->shift) / ti->frequency; |
1502 | spin_lock(&time_interpolator_lock); | 1510 | spin_lock(&time_interpolator_lock); |
diff --git a/kernel/uid16.c b/kernel/uid16.c index aa25605027..187e2a4238 100644 --- a/kernel/uid16.c +++ b/kernel/uid16.c | |||
@@ -20,43 +20,67 @@ | |||
20 | 20 | ||
21 | asmlinkage long sys_chown16(const char __user * filename, old_uid_t user, old_gid_t group) | 21 | asmlinkage long sys_chown16(const char __user * filename, old_uid_t user, old_gid_t group) |
22 | { | 22 | { |
23 | return sys_chown(filename, low2highuid(user), low2highgid(group)); | 23 | long ret = sys_chown(filename, low2highuid(user), low2highgid(group)); |
24 | /* avoid REGPARM breakage on x86: */ | ||
25 | prevent_tail_call(ret); | ||
26 | return ret; | ||
24 | } | 27 | } |
25 | 28 | ||
26 | asmlinkage long sys_lchown16(const char __user * filename, old_uid_t user, old_gid_t group) | 29 | asmlinkage long sys_lchown16(const char __user * filename, old_uid_t user, old_gid_t group) |
27 | { | 30 | { |
28 | return sys_lchown(filename, low2highuid(user), low2highgid(group)); | 31 | long ret = sys_lchown(filename, low2highuid(user), low2highgid(group)); |
32 | /* avoid REGPARM breakage on x86: */ | ||
33 | prevent_tail_call(ret); | ||
34 | return ret; | ||
29 | } | 35 | } |
30 | 36 | ||
31 | asmlinkage long sys_fchown16(unsigned int fd, old_uid_t user, old_gid_t group) | 37 | asmlinkage long sys_fchown16(unsigned int fd, old_uid_t user, old_gid_t group) |
32 | { | 38 | { |
33 | return sys_fchown(fd, low2highuid(user), low2highgid(group)); | 39 | long ret = sys_fchown(fd, low2highuid(user), low2highgid(group)); |
40 | /* avoid REGPARM breakage on x86: */ | ||
41 | prevent_tail_call(ret); | ||
42 | return ret; | ||
34 | } | 43 | } |
35 | 44 | ||
36 | asmlinkage long sys_setregid16(old_gid_t rgid, old_gid_t egid) | 45 | asmlinkage long sys_setregid16(old_gid_t rgid, old_gid_t egid) |
37 | { | 46 | { |
38 | return sys_setregid(low2highgid(rgid), low2highgid(egid)); | 47 | long ret = sys_setregid(low2highgid(rgid), low2highgid(egid)); |
48 | /* avoid REGPARM breakage on x86: */ | ||
49 | prevent_tail_call(ret); | ||
50 | return ret; | ||
39 | } | 51 | } |
40 | 52 | ||
41 | asmlinkage long sys_setgid16(old_gid_t gid) | 53 | asmlinkage long sys_setgid16(old_gid_t gid) |
42 | { | 54 | { |
43 | return sys_setgid(low2highgid(gid)); | 55 | long ret = sys_setgid(low2highgid(gid)); |
56 | /* avoid REGPARM breakage on x86: */ | ||
57 | prevent_tail_call(ret); | ||
58 | return ret; | ||
44 | } | 59 | } |
45 | 60 | ||
46 | asmlinkage long sys_setreuid16(old_uid_t ruid, old_uid_t euid) | 61 | asmlinkage long sys_setreuid16(old_uid_t ruid, old_uid_t euid) |
47 | { | 62 | { |
48 | return sys_setreuid(low2highuid(ruid), low2highuid(euid)); | 63 | long ret = sys_setreuid(low2highuid(ruid), low2highuid(euid)); |
64 | /* avoid REGPARM breakage on x86: */ | ||
65 | prevent_tail_call(ret); | ||
66 | return ret; | ||
49 | } | 67 | } |
50 | 68 | ||
51 | asmlinkage long sys_setuid16(old_uid_t uid) | 69 | asmlinkage long sys_setuid16(old_uid_t uid) |
52 | { | 70 | { |
53 | return sys_setuid(low2highuid(uid)); | 71 | long ret = sys_setuid(low2highuid(uid)); |
72 | /* avoid REGPARM breakage on x86: */ | ||
73 | prevent_tail_call(ret); | ||
74 | return ret; | ||
54 | } | 75 | } |
55 | 76 | ||
56 | asmlinkage long sys_setresuid16(old_uid_t ruid, old_uid_t euid, old_uid_t suid) | 77 | asmlinkage long sys_setresuid16(old_uid_t ruid, old_uid_t euid, old_uid_t suid) |
57 | { | 78 | { |
58 | return sys_setresuid(low2highuid(ruid), low2highuid(euid), | 79 | long ret = sys_setresuid(low2highuid(ruid), low2highuid(euid), |
59 | low2highuid(suid)); | 80 | low2highuid(suid)); |
81 | /* avoid REGPARM breakage on x86: */ | ||
82 | prevent_tail_call(ret); | ||
83 | return ret; | ||
60 | } | 84 | } |
61 | 85 | ||
62 | asmlinkage long sys_getresuid16(old_uid_t __user *ruid, old_uid_t __user *euid, old_uid_t __user *suid) | 86 | asmlinkage long sys_getresuid16(old_uid_t __user *ruid, old_uid_t __user *euid, old_uid_t __user *suid) |
@@ -72,8 +96,11 @@ asmlinkage long sys_getresuid16(old_uid_t __user *ruid, old_uid_t __user *euid, | |||
72 | 96 | ||
73 | asmlinkage long sys_setresgid16(old_gid_t rgid, old_gid_t egid, old_gid_t sgid) | 97 | asmlinkage long sys_setresgid16(old_gid_t rgid, old_gid_t egid, old_gid_t sgid) |
74 | { | 98 | { |
75 | return sys_setresgid(low2highgid(rgid), low2highgid(egid), | 99 | long ret = sys_setresgid(low2highgid(rgid), low2highgid(egid), |
76 | low2highgid(sgid)); | 100 | low2highgid(sgid)); |
101 | /* avoid REGPARM breakage on x86: */ | ||
102 | prevent_tail_call(ret); | ||
103 | return ret; | ||
77 | } | 104 | } |
78 | 105 | ||
79 | asmlinkage long sys_getresgid16(old_gid_t __user *rgid, old_gid_t __user *egid, old_gid_t __user *sgid) | 106 | asmlinkage long sys_getresgid16(old_gid_t __user *rgid, old_gid_t __user *egid, old_gid_t __user *sgid) |
@@ -89,12 +116,18 @@ asmlinkage long sys_getresgid16(old_gid_t __user *rgid, old_gid_t __user *egid, | |||
89 | 116 | ||
90 | asmlinkage long sys_setfsuid16(old_uid_t uid) | 117 | asmlinkage long sys_setfsuid16(old_uid_t uid) |
91 | { | 118 | { |
92 | return sys_setfsuid(low2highuid(uid)); | 119 | long ret = sys_setfsuid(low2highuid(uid)); |
120 | /* avoid REGPARM breakage on x86: */ | ||
121 | prevent_tail_call(ret); | ||
122 | return ret; | ||
93 | } | 123 | } |
94 | 124 | ||
95 | asmlinkage long sys_setfsgid16(old_gid_t gid) | 125 | asmlinkage long sys_setfsgid16(old_gid_t gid) |
96 | { | 126 | { |
97 | return sys_setfsgid(low2highgid(gid)); | 127 | long ret = sys_setfsgid(low2highgid(gid)); |
128 | /* avoid REGPARM breakage on x86: */ | ||
129 | prevent_tail_call(ret); | ||
130 | return ret; | ||
98 | } | 131 | } |
99 | 132 | ||
100 | static int groups16_to_user(old_gid_t __user *grouplist, | 133 | static int groups16_to_user(old_gid_t __user *grouplist, |
diff --git a/kernel/workqueue.c b/kernel/workqueue.c index e9e464a903..880fb415a8 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c | |||
@@ -547,7 +547,7 @@ static void take_over_work(struct workqueue_struct *wq, unsigned int cpu) | |||
547 | } | 547 | } |
548 | 548 | ||
549 | /* We're holding the cpucontrol mutex here */ | 549 | /* We're holding the cpucontrol mutex here */ |
550 | static int __devinit workqueue_cpu_callback(struct notifier_block *nfb, | 550 | static int workqueue_cpu_callback(struct notifier_block *nfb, |
551 | unsigned long action, | 551 | unsigned long action, |
552 | void *hcpu) | 552 | void *hcpu) |
553 | { | 553 | { |