diff options
Diffstat (limited to 'kernel')
53 files changed, 3997 insertions, 900 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index 58908f9d156a..752bd7d383af 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
@@ -10,6 +10,7 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \ | |||
10 | kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ | 10 | kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ |
11 | hrtimer.o | 11 | hrtimer.o |
12 | 12 | ||
13 | obj-y += time/ | ||
13 | obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o | 14 | obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o |
14 | obj-$(CONFIG_FUTEX) += futex.o | 15 | obj-$(CONFIG_FUTEX) += futex.o |
15 | ifeq ($(CONFIG_COMPAT),y) | 16 | ifeq ($(CONFIG_COMPAT),y) |
@@ -20,8 +21,8 @@ obj-$(CONFIG_SMP) += cpu.o spinlock.o | |||
20 | obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o | 21 | obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o |
21 | obj-$(CONFIG_UID16) += uid16.o | 22 | obj-$(CONFIG_UID16) += uid16.o |
22 | obj-$(CONFIG_MODULES) += module.o | 23 | obj-$(CONFIG_MODULES) += module.o |
23 | obj-$(CONFIG_OBSOLETE_INTERMODULE) += intermodule.o | ||
24 | obj-$(CONFIG_KALLSYMS) += kallsyms.o | 24 | obj-$(CONFIG_KALLSYMS) += kallsyms.o |
25 | obj-$(CONFIG_STACK_UNWIND) += unwind.o | ||
25 | obj-$(CONFIG_PM) += power/ | 26 | obj-$(CONFIG_PM) += power/ |
26 | obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o | 27 | obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o |
27 | obj-$(CONFIG_KEXEC) += kexec.o | 28 | obj-$(CONFIG_KEXEC) += kexec.o |
diff --git a/kernel/acct.c b/kernel/acct.c index b327f4d20104..368c4f03fe0e 100644 --- a/kernel/acct.c +++ b/kernel/acct.c | |||
@@ -75,7 +75,7 @@ int acct_parm[3] = {4, 2, 30}; | |||
75 | /* | 75 | /* |
76 | * External references and all of the globals. | 76 | * External references and all of the globals. |
77 | */ | 77 | */ |
78 | static void do_acct_process(long, struct file *); | 78 | static void do_acct_process(struct file *); |
79 | 79 | ||
80 | /* | 80 | /* |
81 | * This structure is used so that all the data protected by lock | 81 | * This structure is used so that all the data protected by lock |
@@ -118,7 +118,7 @@ static int check_free_space(struct file *file) | |||
118 | spin_unlock(&acct_globals.lock); | 118 | spin_unlock(&acct_globals.lock); |
119 | 119 | ||
120 | /* May block */ | 120 | /* May block */ |
121 | if (vfs_statfs(file->f_dentry->d_inode->i_sb, &sbuf)) | 121 | if (vfs_statfs(file->f_dentry, &sbuf)) |
122 | return res; | 122 | return res; |
123 | suspend = sbuf.f_blocks * SUSPEND; | 123 | suspend = sbuf.f_blocks * SUSPEND; |
124 | resume = sbuf.f_blocks * RESUME; | 124 | resume = sbuf.f_blocks * RESUME; |
@@ -196,7 +196,7 @@ static void acct_file_reopen(struct file *file) | |||
196 | if (old_acct) { | 196 | if (old_acct) { |
197 | mnt_unpin(old_acct->f_vfsmnt); | 197 | mnt_unpin(old_acct->f_vfsmnt); |
198 | spin_unlock(&acct_globals.lock); | 198 | spin_unlock(&acct_globals.lock); |
199 | do_acct_process(0, old_acct); | 199 | do_acct_process(old_acct); |
200 | filp_close(old_acct, NULL); | 200 | filp_close(old_acct, NULL); |
201 | spin_lock(&acct_globals.lock); | 201 | spin_lock(&acct_globals.lock); |
202 | } | 202 | } |
@@ -419,16 +419,15 @@ static u32 encode_float(u64 value) | |||
419 | /* | 419 | /* |
420 | * do_acct_process does all actual work. Caller holds the reference to file. | 420 | * do_acct_process does all actual work. Caller holds the reference to file. |
421 | */ | 421 | */ |
422 | static void do_acct_process(long exitcode, struct file *file) | 422 | static void do_acct_process(struct file *file) |
423 | { | 423 | { |
424 | struct pacct_struct *pacct = ¤t->signal->pacct; | ||
424 | acct_t ac; | 425 | acct_t ac; |
425 | mm_segment_t fs; | 426 | mm_segment_t fs; |
426 | unsigned long vsize; | ||
427 | unsigned long flim; | 427 | unsigned long flim; |
428 | u64 elapsed; | 428 | u64 elapsed; |
429 | u64 run_time; | 429 | u64 run_time; |
430 | struct timespec uptime; | 430 | struct timespec uptime; |
431 | unsigned long jiffies; | ||
432 | 431 | ||
433 | /* | 432 | /* |
434 | * First check to see if there is enough free_space to continue | 433 | * First check to see if there is enough free_space to continue |
@@ -469,12 +468,6 @@ static void do_acct_process(long exitcode, struct file *file) | |||
469 | #endif | 468 | #endif |
470 | do_div(elapsed, AHZ); | 469 | do_div(elapsed, AHZ); |
471 | ac.ac_btime = xtime.tv_sec - elapsed; | 470 | ac.ac_btime = xtime.tv_sec - elapsed; |
472 | jiffies = cputime_to_jiffies(cputime_add(current->utime, | ||
473 | current->signal->utime)); | ||
474 | ac.ac_utime = encode_comp_t(jiffies_to_AHZ(jiffies)); | ||
475 | jiffies = cputime_to_jiffies(cputime_add(current->stime, | ||
476 | current->signal->stime)); | ||
477 | ac.ac_stime = encode_comp_t(jiffies_to_AHZ(jiffies)); | ||
478 | /* we really need to bite the bullet and change layout */ | 471 | /* we really need to bite the bullet and change layout */ |
479 | ac.ac_uid = current->uid; | 472 | ac.ac_uid = current->uid; |
480 | ac.ac_gid = current->gid; | 473 | ac.ac_gid = current->gid; |
@@ -496,37 +489,18 @@ static void do_acct_process(long exitcode, struct file *file) | |||
496 | old_encode_dev(tty_devnum(current->signal->tty)) : 0; | 489 | old_encode_dev(tty_devnum(current->signal->tty)) : 0; |
497 | read_unlock(&tasklist_lock); | 490 | read_unlock(&tasklist_lock); |
498 | 491 | ||
499 | ac.ac_flag = 0; | 492 | spin_lock(¤t->sighand->siglock); |
500 | if (current->flags & PF_FORKNOEXEC) | 493 | ac.ac_utime = encode_comp_t(jiffies_to_AHZ(cputime_to_jiffies(pacct->ac_utime))); |
501 | ac.ac_flag |= AFORK; | 494 | ac.ac_stime = encode_comp_t(jiffies_to_AHZ(cputime_to_jiffies(pacct->ac_stime))); |
502 | if (current->flags & PF_SUPERPRIV) | 495 | ac.ac_flag = pacct->ac_flag; |
503 | ac.ac_flag |= ASU; | 496 | ac.ac_mem = encode_comp_t(pacct->ac_mem); |
504 | if (current->flags & PF_DUMPCORE) | 497 | ac.ac_minflt = encode_comp_t(pacct->ac_minflt); |
505 | ac.ac_flag |= ACORE; | 498 | ac.ac_majflt = encode_comp_t(pacct->ac_majflt); |
506 | if (current->flags & PF_SIGNALED) | 499 | ac.ac_exitcode = pacct->ac_exitcode; |
507 | ac.ac_flag |= AXSIG; | 500 | spin_unlock(¤t->sighand->siglock); |
508 | |||
509 | vsize = 0; | ||
510 | if (current->mm) { | ||
511 | struct vm_area_struct *vma; | ||
512 | down_read(¤t->mm->mmap_sem); | ||
513 | vma = current->mm->mmap; | ||
514 | while (vma) { | ||
515 | vsize += vma->vm_end - vma->vm_start; | ||
516 | vma = vma->vm_next; | ||
517 | } | ||
518 | up_read(¤t->mm->mmap_sem); | ||
519 | } | ||
520 | vsize = vsize / 1024; | ||
521 | ac.ac_mem = encode_comp_t(vsize); | ||
522 | ac.ac_io = encode_comp_t(0 /* current->io_usage */); /* %% */ | 501 | ac.ac_io = encode_comp_t(0 /* current->io_usage */); /* %% */ |
523 | ac.ac_rw = encode_comp_t(ac.ac_io / 1024); | 502 | ac.ac_rw = encode_comp_t(ac.ac_io / 1024); |
524 | ac.ac_minflt = encode_comp_t(current->signal->min_flt + | ||
525 | current->min_flt); | ||
526 | ac.ac_majflt = encode_comp_t(current->signal->maj_flt + | ||
527 | current->maj_flt); | ||
528 | ac.ac_swaps = encode_comp_t(0); | 503 | ac.ac_swaps = encode_comp_t(0); |
529 | ac.ac_exitcode = exitcode; | ||
530 | 504 | ||
531 | /* | 505 | /* |
532 | * Kernel segment override to datasegment and write it | 506 | * Kernel segment override to datasegment and write it |
@@ -546,12 +520,63 @@ static void do_acct_process(long exitcode, struct file *file) | |||
546 | } | 520 | } |
547 | 521 | ||
548 | /** | 522 | /** |
523 | * acct_init_pacct - initialize a new pacct_struct | ||
524 | */ | ||
525 | void acct_init_pacct(struct pacct_struct *pacct) | ||
526 | { | ||
527 | memset(pacct, 0, sizeof(struct pacct_struct)); | ||
528 | pacct->ac_utime = pacct->ac_stime = cputime_zero; | ||
529 | } | ||
530 | |||
531 | /** | ||
532 | * acct_collect - collect accounting information into pacct_struct | ||
533 | * @exitcode: task exit code | ||
534 | * @group_dead: not 0, if this thread is the last one in the process. | ||
535 | */ | ||
536 | void acct_collect(long exitcode, int group_dead) | ||
537 | { | ||
538 | struct pacct_struct *pacct = ¤t->signal->pacct; | ||
539 | unsigned long vsize = 0; | ||
540 | |||
541 | if (group_dead && current->mm) { | ||
542 | struct vm_area_struct *vma; | ||
543 | down_read(¤t->mm->mmap_sem); | ||
544 | vma = current->mm->mmap; | ||
545 | while (vma) { | ||
546 | vsize += vma->vm_end - vma->vm_start; | ||
547 | vma = vma->vm_next; | ||
548 | } | ||
549 | up_read(¤t->mm->mmap_sem); | ||
550 | } | ||
551 | |||
552 | spin_lock_irq(¤t->sighand->siglock); | ||
553 | if (group_dead) | ||
554 | pacct->ac_mem = vsize / 1024; | ||
555 | if (thread_group_leader(current)) { | ||
556 | pacct->ac_exitcode = exitcode; | ||
557 | if (current->flags & PF_FORKNOEXEC) | ||
558 | pacct->ac_flag |= AFORK; | ||
559 | } | ||
560 | if (current->flags & PF_SUPERPRIV) | ||
561 | pacct->ac_flag |= ASU; | ||
562 | if (current->flags & PF_DUMPCORE) | ||
563 | pacct->ac_flag |= ACORE; | ||
564 | if (current->flags & PF_SIGNALED) | ||
565 | pacct->ac_flag |= AXSIG; | ||
566 | pacct->ac_utime = cputime_add(pacct->ac_utime, current->utime); | ||
567 | pacct->ac_stime = cputime_add(pacct->ac_stime, current->stime); | ||
568 | pacct->ac_minflt += current->min_flt; | ||
569 | pacct->ac_majflt += current->maj_flt; | ||
570 | spin_unlock_irq(¤t->sighand->siglock); | ||
571 | } | ||
572 | |||
573 | /** | ||
549 | * acct_process - now just a wrapper around do_acct_process | 574 | * acct_process - now just a wrapper around do_acct_process |
550 | * @exitcode: task exit code | 575 | * @exitcode: task exit code |
551 | * | 576 | * |
552 | * handles process accounting for an exiting task | 577 | * handles process accounting for an exiting task |
553 | */ | 578 | */ |
554 | void acct_process(long exitcode) | 579 | void acct_process() |
555 | { | 580 | { |
556 | struct file *file = NULL; | 581 | struct file *file = NULL; |
557 | 582 | ||
@@ -570,7 +595,7 @@ void acct_process(long exitcode) | |||
570 | get_file(file); | 595 | get_file(file); |
571 | spin_unlock(&acct_globals.lock); | 596 | spin_unlock(&acct_globals.lock); |
572 | 597 | ||
573 | do_acct_process(exitcode, file); | 598 | do_acct_process(file); |
574 | fput(file); | 599 | fput(file); |
575 | } | 600 | } |
576 | 601 | ||
@@ -599,9 +624,7 @@ void acct_update_integrals(struct task_struct *tsk) | |||
599 | */ | 624 | */ |
600 | void acct_clear_integrals(struct task_struct *tsk) | 625 | void acct_clear_integrals(struct task_struct *tsk) |
601 | { | 626 | { |
602 | if (tsk) { | 627 | tsk->acct_stimexpd = 0; |
603 | tsk->acct_stimexpd = 0; | 628 | tsk->acct_rss_mem1 = 0; |
604 | tsk->acct_rss_mem1 = 0; | 629 | tsk->acct_vm_mem1 = 0; |
605 | tsk->acct_vm_mem1 = 0; | ||
606 | } | ||
607 | } | 630 | } |
diff --git a/kernel/audit.c b/kernel/audit.c index df57b493e1cb..7dfac7031bd7 100644 --- a/kernel/audit.c +++ b/kernel/audit.c | |||
@@ -56,6 +56,7 @@ | |||
56 | #include <linux/skbuff.h> | 56 | #include <linux/skbuff.h> |
57 | #include <linux/netlink.h> | 57 | #include <linux/netlink.h> |
58 | #include <linux/selinux.h> | 58 | #include <linux/selinux.h> |
59 | #include <linux/inotify.h> | ||
59 | 60 | ||
60 | #include "audit.h" | 61 | #include "audit.h" |
61 | 62 | ||
@@ -89,6 +90,7 @@ static int audit_backlog_wait_overflow = 0; | |||
89 | /* The identity of the user shutting down the audit system. */ | 90 | /* The identity of the user shutting down the audit system. */ |
90 | uid_t audit_sig_uid = -1; | 91 | uid_t audit_sig_uid = -1; |
91 | pid_t audit_sig_pid = -1; | 92 | pid_t audit_sig_pid = -1; |
93 | u32 audit_sig_sid = 0; | ||
92 | 94 | ||
93 | /* Records can be lost in several ways: | 95 | /* Records can be lost in several ways: |
94 | 0) [suppressed in audit_alloc] | 96 | 0) [suppressed in audit_alloc] |
@@ -102,6 +104,12 @@ static atomic_t audit_lost = ATOMIC_INIT(0); | |||
102 | /* The netlink socket. */ | 104 | /* The netlink socket. */ |
103 | static struct sock *audit_sock; | 105 | static struct sock *audit_sock; |
104 | 106 | ||
107 | /* Inotify handle. */ | ||
108 | struct inotify_handle *audit_ih; | ||
109 | |||
110 | /* Hash for inode-based rules */ | ||
111 | struct list_head audit_inode_hash[AUDIT_INODE_BUCKETS]; | ||
112 | |||
105 | /* The audit_freelist is a list of pre-allocated audit buffers (if more | 113 | /* The audit_freelist is a list of pre-allocated audit buffers (if more |
106 | * than AUDIT_MAXFREE are in use, the audit buffer is freed instead of | 114 | * than AUDIT_MAXFREE are in use, the audit buffer is freed instead of |
107 | * being placed on the freelist). */ | 115 | * being placed on the freelist). */ |
@@ -114,10 +122,8 @@ static struct task_struct *kauditd_task; | |||
114 | static DECLARE_WAIT_QUEUE_HEAD(kauditd_wait); | 122 | static DECLARE_WAIT_QUEUE_HEAD(kauditd_wait); |
115 | static DECLARE_WAIT_QUEUE_HEAD(audit_backlog_wait); | 123 | static DECLARE_WAIT_QUEUE_HEAD(audit_backlog_wait); |
116 | 124 | ||
117 | /* The netlink socket is only to be read by 1 CPU, which lets us assume | 125 | /* Serialize requests from userspace. */ |
118 | * that list additions and deletions never happen simultaneously in | 126 | static DEFINE_MUTEX(audit_cmd_mutex); |
119 | * auditsc.c */ | ||
120 | DEFINE_MUTEX(audit_netlink_mutex); | ||
121 | 127 | ||
122 | /* AUDIT_BUFSIZ is the size of the temporary buffer used for formatting | 128 | /* AUDIT_BUFSIZ is the size of the temporary buffer used for formatting |
123 | * audit records. Since printk uses a 1024 byte buffer, this buffer | 129 | * audit records. Since printk uses a 1024 byte buffer, this buffer |
@@ -250,7 +256,7 @@ static int audit_set_rate_limit(int limit, uid_t loginuid, u32 sid) | |||
250 | "audit_rate_limit=%d old=%d by auid=%u", | 256 | "audit_rate_limit=%d old=%d by auid=%u", |
251 | limit, old, loginuid); | 257 | limit, old, loginuid); |
252 | audit_rate_limit = limit; | 258 | audit_rate_limit = limit; |
253 | return old; | 259 | return 0; |
254 | } | 260 | } |
255 | 261 | ||
256 | static int audit_set_backlog_limit(int limit, uid_t loginuid, u32 sid) | 262 | static int audit_set_backlog_limit(int limit, uid_t loginuid, u32 sid) |
@@ -273,7 +279,7 @@ static int audit_set_backlog_limit(int limit, uid_t loginuid, u32 sid) | |||
273 | "audit_backlog_limit=%d old=%d by auid=%u", | 279 | "audit_backlog_limit=%d old=%d by auid=%u", |
274 | limit, old, loginuid); | 280 | limit, old, loginuid); |
275 | audit_backlog_limit = limit; | 281 | audit_backlog_limit = limit; |
276 | return old; | 282 | return 0; |
277 | } | 283 | } |
278 | 284 | ||
279 | static int audit_set_enabled(int state, uid_t loginuid, u32 sid) | 285 | static int audit_set_enabled(int state, uid_t loginuid, u32 sid) |
@@ -299,7 +305,7 @@ static int audit_set_enabled(int state, uid_t loginuid, u32 sid) | |||
299 | "audit_enabled=%d old=%d by auid=%u", | 305 | "audit_enabled=%d old=%d by auid=%u", |
300 | state, old, loginuid); | 306 | state, old, loginuid); |
301 | audit_enabled = state; | 307 | audit_enabled = state; |
302 | return old; | 308 | return 0; |
303 | } | 309 | } |
304 | 310 | ||
305 | static int audit_set_failure(int state, uid_t loginuid, u32 sid) | 311 | static int audit_set_failure(int state, uid_t loginuid, u32 sid) |
@@ -327,7 +333,7 @@ static int audit_set_failure(int state, uid_t loginuid, u32 sid) | |||
327 | "audit_failure=%d old=%d by auid=%u", | 333 | "audit_failure=%d old=%d by auid=%u", |
328 | state, old, loginuid); | 334 | state, old, loginuid); |
329 | audit_failure = state; | 335 | audit_failure = state; |
330 | return old; | 336 | return 0; |
331 | } | 337 | } |
332 | 338 | ||
333 | static int kauditd_thread(void *dummy) | 339 | static int kauditd_thread(void *dummy) |
@@ -363,9 +369,52 @@ static int kauditd_thread(void *dummy) | |||
363 | remove_wait_queue(&kauditd_wait, &wait); | 369 | remove_wait_queue(&kauditd_wait, &wait); |
364 | } | 370 | } |
365 | } | 371 | } |
372 | } | ||
373 | |||
374 | int audit_send_list(void *_dest) | ||
375 | { | ||
376 | struct audit_netlink_list *dest = _dest; | ||
377 | int pid = dest->pid; | ||
378 | struct sk_buff *skb; | ||
379 | |||
380 | /* wait for parent to finish and send an ACK */ | ||
381 | mutex_lock(&audit_cmd_mutex); | ||
382 | mutex_unlock(&audit_cmd_mutex); | ||
383 | |||
384 | while ((skb = __skb_dequeue(&dest->q)) != NULL) | ||
385 | netlink_unicast(audit_sock, skb, pid, 0); | ||
386 | |||
387 | kfree(dest); | ||
388 | |||
366 | return 0; | 389 | return 0; |
367 | } | 390 | } |
368 | 391 | ||
392 | struct sk_buff *audit_make_reply(int pid, int seq, int type, int done, | ||
393 | int multi, void *payload, int size) | ||
394 | { | ||
395 | struct sk_buff *skb; | ||
396 | struct nlmsghdr *nlh; | ||
397 | int len = NLMSG_SPACE(size); | ||
398 | void *data; | ||
399 | int flags = multi ? NLM_F_MULTI : 0; | ||
400 | int t = done ? NLMSG_DONE : type; | ||
401 | |||
402 | skb = alloc_skb(len, GFP_KERNEL); | ||
403 | if (!skb) | ||
404 | return NULL; | ||
405 | |||
406 | nlh = NLMSG_PUT(skb, pid, seq, t, size); | ||
407 | nlh->nlmsg_flags = flags; | ||
408 | data = NLMSG_DATA(nlh); | ||
409 | memcpy(data, payload, size); | ||
410 | return skb; | ||
411 | |||
412 | nlmsg_failure: /* Used by NLMSG_PUT */ | ||
413 | if (skb) | ||
414 | kfree_skb(skb); | ||
415 | return NULL; | ||
416 | } | ||
417 | |||
369 | /** | 418 | /** |
370 | * audit_send_reply - send an audit reply message via netlink | 419 | * audit_send_reply - send an audit reply message via netlink |
371 | * @pid: process id to send reply to | 420 | * @pid: process id to send reply to |
@@ -383,29 +432,13 @@ void audit_send_reply(int pid, int seq, int type, int done, int multi, | |||
383 | void *payload, int size) | 432 | void *payload, int size) |
384 | { | 433 | { |
385 | struct sk_buff *skb; | 434 | struct sk_buff *skb; |
386 | struct nlmsghdr *nlh; | 435 | skb = audit_make_reply(pid, seq, type, done, multi, payload, size); |
387 | int len = NLMSG_SPACE(size); | ||
388 | void *data; | ||
389 | int flags = multi ? NLM_F_MULTI : 0; | ||
390 | int t = done ? NLMSG_DONE : type; | ||
391 | |||
392 | skb = alloc_skb(len, GFP_KERNEL); | ||
393 | if (!skb) | 436 | if (!skb) |
394 | return; | 437 | return; |
395 | |||
396 | nlh = NLMSG_PUT(skb, pid, seq, t, size); | ||
397 | nlh->nlmsg_flags = flags; | ||
398 | data = NLMSG_DATA(nlh); | ||
399 | memcpy(data, payload, size); | ||
400 | |||
401 | /* Ignore failure. It'll only happen if the sender goes away, | 438 | /* Ignore failure. It'll only happen if the sender goes away, |
402 | because our timeout is set to infinite. */ | 439 | because our timeout is set to infinite. */ |
403 | netlink_unicast(audit_sock, skb, pid, 0); | 440 | netlink_unicast(audit_sock, skb, pid, 0); |
404 | return; | 441 | return; |
405 | |||
406 | nlmsg_failure: /* Used by NLMSG_PUT */ | ||
407 | if (skb) | ||
408 | kfree_skb(skb); | ||
409 | } | 442 | } |
410 | 443 | ||
411 | /* | 444 | /* |
@@ -451,7 +484,9 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | |||
451 | struct audit_buffer *ab; | 484 | struct audit_buffer *ab; |
452 | u16 msg_type = nlh->nlmsg_type; | 485 | u16 msg_type = nlh->nlmsg_type; |
453 | uid_t loginuid; /* loginuid of sender */ | 486 | uid_t loginuid; /* loginuid of sender */ |
454 | struct audit_sig_info sig_data; | 487 | struct audit_sig_info *sig_data; |
488 | char *ctx; | ||
489 | u32 len; | ||
455 | 490 | ||
456 | err = audit_netlink_ok(NETLINK_CB(skb).eff_cap, msg_type); | 491 | err = audit_netlink_ok(NETLINK_CB(skb).eff_cap, msg_type); |
457 | if (err) | 492 | if (err) |
@@ -503,12 +538,9 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | |||
503 | if (status_get->mask & AUDIT_STATUS_PID) { | 538 | if (status_get->mask & AUDIT_STATUS_PID) { |
504 | int old = audit_pid; | 539 | int old = audit_pid; |
505 | if (sid) { | 540 | if (sid) { |
506 | char *ctx = NULL; | 541 | if ((err = selinux_ctxid_to_string( |
507 | u32 len; | ||
508 | int rc; | ||
509 | if ((rc = selinux_ctxid_to_string( | ||
510 | sid, &ctx, &len))) | 542 | sid, &ctx, &len))) |
511 | return rc; | 543 | return err; |
512 | else | 544 | else |
513 | audit_log(NULL, GFP_KERNEL, | 545 | audit_log(NULL, GFP_KERNEL, |
514 | AUDIT_CONFIG_CHANGE, | 546 | AUDIT_CONFIG_CHANGE, |
@@ -523,10 +555,10 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | |||
523 | audit_pid = status_get->pid; | 555 | audit_pid = status_get->pid; |
524 | } | 556 | } |
525 | if (status_get->mask & AUDIT_STATUS_RATE_LIMIT) | 557 | if (status_get->mask & AUDIT_STATUS_RATE_LIMIT) |
526 | audit_set_rate_limit(status_get->rate_limit, | 558 | err = audit_set_rate_limit(status_get->rate_limit, |
527 | loginuid, sid); | 559 | loginuid, sid); |
528 | if (status_get->mask & AUDIT_STATUS_BACKLOG_LIMIT) | 560 | if (status_get->mask & AUDIT_STATUS_BACKLOG_LIMIT) |
529 | audit_set_backlog_limit(status_get->backlog_limit, | 561 | err = audit_set_backlog_limit(status_get->backlog_limit, |
530 | loginuid, sid); | 562 | loginuid, sid); |
531 | break; | 563 | break; |
532 | case AUDIT_USER: | 564 | case AUDIT_USER: |
@@ -544,8 +576,6 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | |||
544 | "user pid=%d uid=%u auid=%u", | 576 | "user pid=%d uid=%u auid=%u", |
545 | pid, uid, loginuid); | 577 | pid, uid, loginuid); |
546 | if (sid) { | 578 | if (sid) { |
547 | char *ctx = NULL; | ||
548 | u32 len; | ||
549 | if (selinux_ctxid_to_string( | 579 | if (selinux_ctxid_to_string( |
550 | sid, &ctx, &len)) { | 580 | sid, &ctx, &len)) { |
551 | audit_log_format(ab, | 581 | audit_log_format(ab, |
@@ -584,10 +614,21 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | |||
584 | loginuid, sid); | 614 | loginuid, sid); |
585 | break; | 615 | break; |
586 | case AUDIT_SIGNAL_INFO: | 616 | case AUDIT_SIGNAL_INFO: |
587 | sig_data.uid = audit_sig_uid; | 617 | err = selinux_ctxid_to_string(audit_sig_sid, &ctx, &len); |
588 | sig_data.pid = audit_sig_pid; | 618 | if (err) |
619 | return err; | ||
620 | sig_data = kmalloc(sizeof(*sig_data) + len, GFP_KERNEL); | ||
621 | if (!sig_data) { | ||
622 | kfree(ctx); | ||
623 | return -ENOMEM; | ||
624 | } | ||
625 | sig_data->uid = audit_sig_uid; | ||
626 | sig_data->pid = audit_sig_pid; | ||
627 | memcpy(sig_data->ctx, ctx, len); | ||
628 | kfree(ctx); | ||
589 | audit_send_reply(NETLINK_CB(skb).pid, seq, AUDIT_SIGNAL_INFO, | 629 | audit_send_reply(NETLINK_CB(skb).pid, seq, AUDIT_SIGNAL_INFO, |
590 | 0, 0, &sig_data, sizeof(sig_data)); | 630 | 0, 0, sig_data, sizeof(*sig_data) + len); |
631 | kfree(sig_data); | ||
591 | break; | 632 | break; |
592 | default: | 633 | default: |
593 | err = -EINVAL; | 634 | err = -EINVAL; |
@@ -629,20 +670,30 @@ static void audit_receive(struct sock *sk, int length) | |||
629 | struct sk_buff *skb; | 670 | struct sk_buff *skb; |
630 | unsigned int qlen; | 671 | unsigned int qlen; |
631 | 672 | ||
632 | mutex_lock(&audit_netlink_mutex); | 673 | mutex_lock(&audit_cmd_mutex); |
633 | 674 | ||
634 | for (qlen = skb_queue_len(&sk->sk_receive_queue); qlen; qlen--) { | 675 | for (qlen = skb_queue_len(&sk->sk_receive_queue); qlen; qlen--) { |
635 | skb = skb_dequeue(&sk->sk_receive_queue); | 676 | skb = skb_dequeue(&sk->sk_receive_queue); |
636 | audit_receive_skb(skb); | 677 | audit_receive_skb(skb); |
637 | kfree_skb(skb); | 678 | kfree_skb(skb); |
638 | } | 679 | } |
639 | mutex_unlock(&audit_netlink_mutex); | 680 | mutex_unlock(&audit_cmd_mutex); |
640 | } | 681 | } |
641 | 682 | ||
683 | #ifdef CONFIG_AUDITSYSCALL | ||
684 | static const struct inotify_operations audit_inotify_ops = { | ||
685 | .handle_event = audit_handle_ievent, | ||
686 | .destroy_watch = audit_free_parent, | ||
687 | }; | ||
688 | #endif | ||
642 | 689 | ||
643 | /* Initialize audit support at boot time. */ | 690 | /* Initialize audit support at boot time. */ |
644 | static int __init audit_init(void) | 691 | static int __init audit_init(void) |
645 | { | 692 | { |
693 | #ifdef CONFIG_AUDITSYSCALL | ||
694 | int i; | ||
695 | #endif | ||
696 | |||
646 | printk(KERN_INFO "audit: initializing netlink socket (%s)\n", | 697 | printk(KERN_INFO "audit: initializing netlink socket (%s)\n", |
647 | audit_default ? "enabled" : "disabled"); | 698 | audit_default ? "enabled" : "disabled"); |
648 | audit_sock = netlink_kernel_create(NETLINK_AUDIT, 0, audit_receive, | 699 | audit_sock = netlink_kernel_create(NETLINK_AUDIT, 0, audit_receive, |
@@ -661,6 +712,16 @@ static int __init audit_init(void) | |||
661 | selinux_audit_set_callback(&selinux_audit_rule_update); | 712 | selinux_audit_set_callback(&selinux_audit_rule_update); |
662 | 713 | ||
663 | audit_log(NULL, GFP_KERNEL, AUDIT_KERNEL, "initialized"); | 714 | audit_log(NULL, GFP_KERNEL, AUDIT_KERNEL, "initialized"); |
715 | |||
716 | #ifdef CONFIG_AUDITSYSCALL | ||
717 | audit_ih = inotify_init(&audit_inotify_ops); | ||
718 | if (IS_ERR(audit_ih)) | ||
719 | audit_panic("cannot initialize inotify handle"); | ||
720 | |||
721 | for (i = 0; i < AUDIT_INODE_BUCKETS; i++) | ||
722 | INIT_LIST_HEAD(&audit_inode_hash[i]); | ||
723 | #endif | ||
724 | |||
664 | return 0; | 725 | return 0; |
665 | } | 726 | } |
666 | __initcall(audit_init); | 727 | __initcall(audit_init); |
@@ -690,10 +751,12 @@ static void audit_buffer_free(struct audit_buffer *ab) | |||
690 | kfree_skb(ab->skb); | 751 | kfree_skb(ab->skb); |
691 | 752 | ||
692 | spin_lock_irqsave(&audit_freelist_lock, flags); | 753 | spin_lock_irqsave(&audit_freelist_lock, flags); |
693 | if (++audit_freelist_count > AUDIT_MAXFREE) | 754 | if (audit_freelist_count > AUDIT_MAXFREE) |
694 | kfree(ab); | 755 | kfree(ab); |
695 | else | 756 | else { |
757 | audit_freelist_count++; | ||
696 | list_add(&ab->list, &audit_freelist); | 758 | list_add(&ab->list, &audit_freelist); |
759 | } | ||
697 | spin_unlock_irqrestore(&audit_freelist_lock, flags); | 760 | spin_unlock_irqrestore(&audit_freelist_lock, flags); |
698 | } | 761 | } |
699 | 762 | ||
@@ -988,28 +1051,76 @@ void audit_log_hex(struct audit_buffer *ab, const unsigned char *buf, | |||
988 | skb_put(skb, len << 1); /* new string is twice the old string */ | 1051 | skb_put(skb, len << 1); /* new string is twice the old string */ |
989 | } | 1052 | } |
990 | 1053 | ||
1054 | /* | ||
1055 | * Format a string of no more than slen characters into the audit buffer, | ||
1056 | * enclosed in quote marks. | ||
1057 | */ | ||
1058 | static void audit_log_n_string(struct audit_buffer *ab, size_t slen, | ||
1059 | const char *string) | ||
1060 | { | ||
1061 | int avail, new_len; | ||
1062 | unsigned char *ptr; | ||
1063 | struct sk_buff *skb; | ||
1064 | |||
1065 | BUG_ON(!ab->skb); | ||
1066 | skb = ab->skb; | ||
1067 | avail = skb_tailroom(skb); | ||
1068 | new_len = slen + 3; /* enclosing quotes + null terminator */ | ||
1069 | if (new_len > avail) { | ||
1070 | avail = audit_expand(ab, new_len); | ||
1071 | if (!avail) | ||
1072 | return; | ||
1073 | } | ||
1074 | ptr = skb->tail; | ||
1075 | *ptr++ = '"'; | ||
1076 | memcpy(ptr, string, slen); | ||
1077 | ptr += slen; | ||
1078 | *ptr++ = '"'; | ||
1079 | *ptr = 0; | ||
1080 | skb_put(skb, slen + 2); /* don't include null terminator */ | ||
1081 | } | ||
1082 | |||
991 | /** | 1083 | /** |
992 | * audit_log_unstrustedstring - log a string that may contain random characters | 1084 | * audit_log_n_unstrustedstring - log a string that may contain random characters |
993 | * @ab: audit_buffer | 1085 | * @ab: audit_buffer |
1086 | * @len: lenth of string (not including trailing null) | ||
994 | * @string: string to be logged | 1087 | * @string: string to be logged |
995 | * | 1088 | * |
996 | * This code will escape a string that is passed to it if the string | 1089 | * This code will escape a string that is passed to it if the string |
997 | * contains a control character, unprintable character, double quote mark, | 1090 | * contains a control character, unprintable character, double quote mark, |
998 | * or a space. Unescaped strings will start and end with a double quote mark. | 1091 | * or a space. Unescaped strings will start and end with a double quote mark. |
999 | * Strings that are escaped are printed in hex (2 digits per char). | 1092 | * Strings that are escaped are printed in hex (2 digits per char). |
1093 | * | ||
1094 | * The caller specifies the number of characters in the string to log, which may | ||
1095 | * or may not be the entire string. | ||
1000 | */ | 1096 | */ |
1001 | void audit_log_untrustedstring(struct audit_buffer *ab, const char *string) | 1097 | const char *audit_log_n_untrustedstring(struct audit_buffer *ab, size_t len, |
1098 | const char *string) | ||
1002 | { | 1099 | { |
1003 | const unsigned char *p = string; | 1100 | const unsigned char *p = string; |
1004 | 1101 | ||
1005 | while (*p) { | 1102 | while (*p) { |
1006 | if (*p == '"' || *p < 0x21 || *p > 0x7f) { | 1103 | if (*p == '"' || *p < 0x21 || *p > 0x7f) { |
1007 | audit_log_hex(ab, string, strlen(string)); | 1104 | audit_log_hex(ab, string, len); |
1008 | return; | 1105 | return string + len + 1; |
1009 | } | 1106 | } |
1010 | p++; | 1107 | p++; |
1011 | } | 1108 | } |
1012 | audit_log_format(ab, "\"%s\"", string); | 1109 | audit_log_n_string(ab, len, string); |
1110 | return p + 1; | ||
1111 | } | ||
1112 | |||
1113 | /** | ||
1114 | * audit_log_unstrustedstring - log a string that may contain random characters | ||
1115 | * @ab: audit_buffer | ||
1116 | * @string: string to be logged | ||
1117 | * | ||
1118 | * Same as audit_log_n_unstrustedstring(), except that strlen is used to | ||
1119 | * determine string length. | ||
1120 | */ | ||
1121 | const char *audit_log_untrustedstring(struct audit_buffer *ab, const char *string) | ||
1122 | { | ||
1123 | return audit_log_n_untrustedstring(ab, strlen(string), string); | ||
1013 | } | 1124 | } |
1014 | 1125 | ||
1015 | /* This is a helper-function to print the escaped d_path */ | 1126 | /* This is a helper-function to print the escaped d_path */ |
diff --git a/kernel/audit.h b/kernel/audit.h index 6f733920fd32..8323e4132a33 100644 --- a/kernel/audit.h +++ b/kernel/audit.h | |||
@@ -19,9 +19,9 @@ | |||
19 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 19 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
20 | */ | 20 | */ |
21 | 21 | ||
22 | #include <linux/mutex.h> | ||
23 | #include <linux/fs.h> | 22 | #include <linux/fs.h> |
24 | #include <linux/audit.h> | 23 | #include <linux/audit.h> |
24 | #include <linux/skbuff.h> | ||
25 | 25 | ||
26 | /* 0 = no checking | 26 | /* 0 = no checking |
27 | 1 = put_count checking | 27 | 1 = put_count checking |
@@ -53,6 +53,18 @@ enum audit_state { | |||
53 | }; | 53 | }; |
54 | 54 | ||
55 | /* Rule lists */ | 55 | /* Rule lists */ |
56 | struct audit_parent; | ||
57 | |||
58 | struct audit_watch { | ||
59 | atomic_t count; /* reference count */ | ||
60 | char *path; /* insertion path */ | ||
61 | dev_t dev; /* associated superblock device */ | ||
62 | unsigned long ino; /* associated inode number */ | ||
63 | struct audit_parent *parent; /* associated parent */ | ||
64 | struct list_head wlist; /* entry in parent->watches list */ | ||
65 | struct list_head rules; /* associated rules */ | ||
66 | }; | ||
67 | |||
56 | struct audit_field { | 68 | struct audit_field { |
57 | u32 type; | 69 | u32 type; |
58 | u32 val; | 70 | u32 val; |
@@ -70,6 +82,9 @@ struct audit_krule { | |||
70 | u32 buflen; /* for data alloc on list rules */ | 82 | u32 buflen; /* for data alloc on list rules */ |
71 | u32 field_count; | 83 | u32 field_count; |
72 | struct audit_field *fields; | 84 | struct audit_field *fields; |
85 | struct audit_field *inode_f; /* quick access to an inode field */ | ||
86 | struct audit_watch *watch; /* associated watch */ | ||
87 | struct list_head rlist; /* entry in audit_watch.rules list */ | ||
73 | }; | 88 | }; |
74 | 89 | ||
75 | struct audit_entry { | 90 | struct audit_entry { |
@@ -78,15 +93,53 @@ struct audit_entry { | |||
78 | struct audit_krule rule; | 93 | struct audit_krule rule; |
79 | }; | 94 | }; |
80 | 95 | ||
81 | |||
82 | extern int audit_pid; | 96 | extern int audit_pid; |
83 | extern int audit_comparator(const u32 left, const u32 op, const u32 right); | ||
84 | 97 | ||
98 | #define AUDIT_INODE_BUCKETS 32 | ||
99 | extern struct list_head audit_inode_hash[AUDIT_INODE_BUCKETS]; | ||
100 | |||
101 | static inline int audit_hash_ino(u32 ino) | ||
102 | { | ||
103 | return (ino & (AUDIT_INODE_BUCKETS-1)); | ||
104 | } | ||
105 | |||
106 | extern int audit_comparator(const u32 left, const u32 op, const u32 right); | ||
107 | extern int audit_compare_dname_path(const char *dname, const char *path, | ||
108 | int *dirlen); | ||
109 | extern struct sk_buff * audit_make_reply(int pid, int seq, int type, | ||
110 | int done, int multi, | ||
111 | void *payload, int size); | ||
85 | extern void audit_send_reply(int pid, int seq, int type, | 112 | extern void audit_send_reply(int pid, int seq, int type, |
86 | int done, int multi, | 113 | int done, int multi, |
87 | void *payload, int size); | 114 | void *payload, int size); |
88 | extern void audit_log_lost(const char *message); | 115 | extern void audit_log_lost(const char *message); |
89 | extern void audit_panic(const char *message); | 116 | extern void audit_panic(const char *message); |
90 | extern struct mutex audit_netlink_mutex; | ||
91 | 117 | ||
118 | struct audit_netlink_list { | ||
119 | int pid; | ||
120 | struct sk_buff_head q; | ||
121 | }; | ||
122 | |||
123 | int audit_send_list(void *); | ||
124 | |||
125 | struct inotify_watch; | ||
126 | extern void audit_free_parent(struct inotify_watch *); | ||
127 | extern void audit_handle_ievent(struct inotify_watch *, u32, u32, u32, | ||
128 | const char *, struct inode *); | ||
92 | extern int selinux_audit_rule_update(void); | 129 | extern int selinux_audit_rule_update(void); |
130 | |||
131 | #ifdef CONFIG_AUDITSYSCALL | ||
132 | extern void __audit_signal_info(int sig, struct task_struct *t); | ||
133 | static inline void audit_signal_info(int sig, struct task_struct *t) | ||
134 | { | ||
135 | if (unlikely(audit_pid && t->tgid == audit_pid)) | ||
136 | __audit_signal_info(sig, t); | ||
137 | } | ||
138 | extern enum audit_state audit_filter_inodes(struct task_struct *, | ||
139 | struct audit_context *); | ||
140 | extern void audit_set_auditable(struct audit_context *); | ||
141 | #else | ||
142 | #define audit_signal_info(s,t) | ||
143 | #define audit_filter_inodes(t,c) AUDIT_DISABLED | ||
144 | #define audit_set_auditable(c) | ||
145 | #endif | ||
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index 7c134906d689..4c99d2c586ed 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c | |||
@@ -22,13 +22,59 @@ | |||
22 | #include <linux/kernel.h> | 22 | #include <linux/kernel.h> |
23 | #include <linux/audit.h> | 23 | #include <linux/audit.h> |
24 | #include <linux/kthread.h> | 24 | #include <linux/kthread.h> |
25 | #include <linux/mutex.h> | ||
26 | #include <linux/fs.h> | ||
27 | #include <linux/namei.h> | ||
25 | #include <linux/netlink.h> | 28 | #include <linux/netlink.h> |
29 | #include <linux/sched.h> | ||
30 | #include <linux/inotify.h> | ||
26 | #include <linux/selinux.h> | 31 | #include <linux/selinux.h> |
27 | #include "audit.h" | 32 | #include "audit.h" |
28 | 33 | ||
29 | /* There are three lists of rules -- one to search at task creation | 34 | /* |
30 | * time, one to search at syscall entry time, and another to search at | 35 | * Locking model: |
31 | * syscall exit time. */ | 36 | * |
37 | * audit_filter_mutex: | ||
38 | * Synchronizes writes and blocking reads of audit's filterlist | ||
39 | * data. Rcu is used to traverse the filterlist and access | ||
40 | * contents of structs audit_entry, audit_watch and opaque | ||
41 | * selinux rules during filtering. If modified, these structures | ||
42 | * must be copied and replace their counterparts in the filterlist. | ||
43 | * An audit_parent struct is not accessed during filtering, so may | ||
44 | * be written directly provided audit_filter_mutex is held. | ||
45 | */ | ||
46 | |||
47 | /* | ||
48 | * Reference counting: | ||
49 | * | ||
50 | * audit_parent: lifetime is from audit_init_parent() to receipt of an IN_IGNORED | ||
51 | * event. Each audit_watch holds a reference to its associated parent. | ||
52 | * | ||
53 | * audit_watch: if added to lists, lifetime is from audit_init_watch() to | ||
54 | * audit_remove_watch(). Additionally, an audit_watch may exist | ||
55 | * temporarily to assist in searching existing filter data. Each | ||
56 | * audit_krule holds a reference to its associated watch. | ||
57 | */ | ||
58 | |||
59 | struct audit_parent { | ||
60 | struct list_head ilist; /* entry in inotify registration list */ | ||
61 | struct list_head watches; /* associated watches */ | ||
62 | struct inotify_watch wdata; /* inotify watch data */ | ||
63 | unsigned flags; /* status flags */ | ||
64 | }; | ||
65 | |||
66 | /* | ||
67 | * audit_parent status flags: | ||
68 | * | ||
69 | * AUDIT_PARENT_INVALID - set anytime rules/watches are auto-removed due to | ||
70 | * a filesystem event to ensure we're adding audit watches to a valid parent. | ||
71 | * Technically not needed for IN_DELETE_SELF or IN_UNMOUNT events, as we cannot | ||
72 | * receive them while we have nameidata, but must be used for IN_MOVE_SELF which | ||
73 | * we can receive while holding nameidata. | ||
74 | */ | ||
75 | #define AUDIT_PARENT_INVALID 0x001 | ||
76 | |||
77 | /* Audit filter lists, defined in <linux/audit.h> */ | ||
32 | struct list_head audit_filter_list[AUDIT_NR_FILTERS] = { | 78 | struct list_head audit_filter_list[AUDIT_NR_FILTERS] = { |
33 | LIST_HEAD_INIT(audit_filter_list[0]), | 79 | LIST_HEAD_INIT(audit_filter_list[0]), |
34 | LIST_HEAD_INIT(audit_filter_list[1]), | 80 | LIST_HEAD_INIT(audit_filter_list[1]), |
@@ -41,9 +87,53 @@ struct list_head audit_filter_list[AUDIT_NR_FILTERS] = { | |||
41 | #endif | 87 | #endif |
42 | }; | 88 | }; |
43 | 89 | ||
90 | static DEFINE_MUTEX(audit_filter_mutex); | ||
91 | |||
92 | /* Inotify handle */ | ||
93 | extern struct inotify_handle *audit_ih; | ||
94 | |||
95 | /* Inotify events we care about. */ | ||
96 | #define AUDIT_IN_WATCH IN_MOVE|IN_CREATE|IN_DELETE|IN_DELETE_SELF|IN_MOVE_SELF | ||
97 | |||
98 | void audit_free_parent(struct inotify_watch *i_watch) | ||
99 | { | ||
100 | struct audit_parent *parent; | ||
101 | |||
102 | parent = container_of(i_watch, struct audit_parent, wdata); | ||
103 | WARN_ON(!list_empty(&parent->watches)); | ||
104 | kfree(parent); | ||
105 | } | ||
106 | |||
107 | static inline void audit_get_watch(struct audit_watch *watch) | ||
108 | { | ||
109 | atomic_inc(&watch->count); | ||
110 | } | ||
111 | |||
112 | static void audit_put_watch(struct audit_watch *watch) | ||
113 | { | ||
114 | if (atomic_dec_and_test(&watch->count)) { | ||
115 | WARN_ON(watch->parent); | ||
116 | WARN_ON(!list_empty(&watch->rules)); | ||
117 | kfree(watch->path); | ||
118 | kfree(watch); | ||
119 | } | ||
120 | } | ||
121 | |||
122 | static void audit_remove_watch(struct audit_watch *watch) | ||
123 | { | ||
124 | list_del(&watch->wlist); | ||
125 | put_inotify_watch(&watch->parent->wdata); | ||
126 | watch->parent = NULL; | ||
127 | audit_put_watch(watch); /* match initial get */ | ||
128 | } | ||
129 | |||
44 | static inline void audit_free_rule(struct audit_entry *e) | 130 | static inline void audit_free_rule(struct audit_entry *e) |
45 | { | 131 | { |
46 | int i; | 132 | int i; |
133 | |||
134 | /* some rules don't have associated watches */ | ||
135 | if (e->rule.watch) | ||
136 | audit_put_watch(e->rule.watch); | ||
47 | if (e->rule.fields) | 137 | if (e->rule.fields) |
48 | for (i = 0; i < e->rule.field_count; i++) { | 138 | for (i = 0; i < e->rule.field_count; i++) { |
49 | struct audit_field *f = &e->rule.fields[i]; | 139 | struct audit_field *f = &e->rule.fields[i]; |
@@ -60,6 +150,50 @@ static inline void audit_free_rule_rcu(struct rcu_head *head) | |||
60 | audit_free_rule(e); | 150 | audit_free_rule(e); |
61 | } | 151 | } |
62 | 152 | ||
153 | /* Initialize a parent watch entry. */ | ||
154 | static struct audit_parent *audit_init_parent(struct nameidata *ndp) | ||
155 | { | ||
156 | struct audit_parent *parent; | ||
157 | s32 wd; | ||
158 | |||
159 | parent = kzalloc(sizeof(*parent), GFP_KERNEL); | ||
160 | if (unlikely(!parent)) | ||
161 | return ERR_PTR(-ENOMEM); | ||
162 | |||
163 | INIT_LIST_HEAD(&parent->watches); | ||
164 | parent->flags = 0; | ||
165 | |||
166 | inotify_init_watch(&parent->wdata); | ||
167 | /* grab a ref so inotify watch hangs around until we take audit_filter_mutex */ | ||
168 | get_inotify_watch(&parent->wdata); | ||
169 | wd = inotify_add_watch(audit_ih, &parent->wdata, ndp->dentry->d_inode, | ||
170 | AUDIT_IN_WATCH); | ||
171 | if (wd < 0) { | ||
172 | audit_free_parent(&parent->wdata); | ||
173 | return ERR_PTR(wd); | ||
174 | } | ||
175 | |||
176 | return parent; | ||
177 | } | ||
178 | |||
179 | /* Initialize a watch entry. */ | ||
180 | static struct audit_watch *audit_init_watch(char *path) | ||
181 | { | ||
182 | struct audit_watch *watch; | ||
183 | |||
184 | watch = kzalloc(sizeof(*watch), GFP_KERNEL); | ||
185 | if (unlikely(!watch)) | ||
186 | return ERR_PTR(-ENOMEM); | ||
187 | |||
188 | INIT_LIST_HEAD(&watch->rules); | ||
189 | atomic_set(&watch->count, 1); | ||
190 | watch->path = path; | ||
191 | watch->dev = (dev_t)-1; | ||
192 | watch->ino = (unsigned long)-1; | ||
193 | |||
194 | return watch; | ||
195 | } | ||
196 | |||
63 | /* Initialize an audit filterlist entry. */ | 197 | /* Initialize an audit filterlist entry. */ |
64 | static inline struct audit_entry *audit_init_entry(u32 field_count) | 198 | static inline struct audit_entry *audit_init_entry(u32 field_count) |
65 | { | 199 | { |
@@ -107,6 +241,43 @@ static char *audit_unpack_string(void **bufp, size_t *remain, size_t len) | |||
107 | return str; | 241 | return str; |
108 | } | 242 | } |
109 | 243 | ||
244 | /* Translate an inode field to kernel respresentation. */ | ||
245 | static inline int audit_to_inode(struct audit_krule *krule, | ||
246 | struct audit_field *f) | ||
247 | { | ||
248 | if (krule->listnr != AUDIT_FILTER_EXIT || | ||
249 | krule->watch || krule->inode_f) | ||
250 | return -EINVAL; | ||
251 | |||
252 | krule->inode_f = f; | ||
253 | return 0; | ||
254 | } | ||
255 | |||
256 | /* Translate a watch string to kernel respresentation. */ | ||
257 | static int audit_to_watch(struct audit_krule *krule, char *path, int len, | ||
258 | u32 op) | ||
259 | { | ||
260 | struct audit_watch *watch; | ||
261 | |||
262 | if (!audit_ih) | ||
263 | return -EOPNOTSUPP; | ||
264 | |||
265 | if (path[0] != '/' || path[len-1] == '/' || | ||
266 | krule->listnr != AUDIT_FILTER_EXIT || | ||
267 | op & ~AUDIT_EQUAL || | ||
268 | krule->inode_f || krule->watch) /* 1 inode # per rule, for hash */ | ||
269 | return -EINVAL; | ||
270 | |||
271 | watch = audit_init_watch(path); | ||
272 | if (unlikely(IS_ERR(watch))) | ||
273 | return PTR_ERR(watch); | ||
274 | |||
275 | audit_get_watch(watch); | ||
276 | krule->watch = watch; | ||
277 | |||
278 | return 0; | ||
279 | } | ||
280 | |||
110 | /* Common user-space to kernel rule translation. */ | 281 | /* Common user-space to kernel rule translation. */ |
111 | static inline struct audit_entry *audit_to_entry_common(struct audit_rule *rule) | 282 | static inline struct audit_entry *audit_to_entry_common(struct audit_rule *rule) |
112 | { | 283 | { |
@@ -128,8 +299,11 @@ static inline struct audit_entry *audit_to_entry_common(struct audit_rule *rule) | |||
128 | #endif | 299 | #endif |
129 | ; | 300 | ; |
130 | } | 301 | } |
131 | if (rule->action != AUDIT_NEVER && rule->action != AUDIT_POSSIBLE && | 302 | if (unlikely(rule->action == AUDIT_POSSIBLE)) { |
132 | rule->action != AUDIT_ALWAYS) | 303 | printk(KERN_ERR "AUDIT_POSSIBLE is deprecated\n"); |
304 | goto exit_err; | ||
305 | } | ||
306 | if (rule->action != AUDIT_NEVER && rule->action != AUDIT_ALWAYS) | ||
133 | goto exit_err; | 307 | goto exit_err; |
134 | if (rule->field_count > AUDIT_MAX_FIELDS) | 308 | if (rule->field_count > AUDIT_MAX_FIELDS) |
135 | goto exit_err; | 309 | goto exit_err; |
@@ -158,6 +332,7 @@ exit_err: | |||
158 | static struct audit_entry *audit_rule_to_entry(struct audit_rule *rule) | 332 | static struct audit_entry *audit_rule_to_entry(struct audit_rule *rule) |
159 | { | 333 | { |
160 | struct audit_entry *entry; | 334 | struct audit_entry *entry; |
335 | struct audit_field *f; | ||
161 | int err = 0; | 336 | int err = 0; |
162 | int i; | 337 | int i; |
163 | 338 | ||
@@ -172,14 +347,37 @@ static struct audit_entry *audit_rule_to_entry(struct audit_rule *rule) | |||
172 | f->type = rule->fields[i] & ~(AUDIT_NEGATE|AUDIT_OPERATORS); | 347 | f->type = rule->fields[i] & ~(AUDIT_NEGATE|AUDIT_OPERATORS); |
173 | f->val = rule->values[i]; | 348 | f->val = rule->values[i]; |
174 | 349 | ||
175 | if (f->type & AUDIT_UNUSED_BITS || | 350 | err = -EINVAL; |
176 | f->type == AUDIT_SE_USER || | 351 | switch(f->type) { |
177 | f->type == AUDIT_SE_ROLE || | 352 | default: |
178 | f->type == AUDIT_SE_TYPE || | ||
179 | f->type == AUDIT_SE_SEN || | ||
180 | f->type == AUDIT_SE_CLR) { | ||
181 | err = -EINVAL; | ||
182 | goto exit_free; | 353 | goto exit_free; |
354 | case AUDIT_PID: | ||
355 | case AUDIT_UID: | ||
356 | case AUDIT_EUID: | ||
357 | case AUDIT_SUID: | ||
358 | case AUDIT_FSUID: | ||
359 | case AUDIT_GID: | ||
360 | case AUDIT_EGID: | ||
361 | case AUDIT_SGID: | ||
362 | case AUDIT_FSGID: | ||
363 | case AUDIT_LOGINUID: | ||
364 | case AUDIT_PERS: | ||
365 | case AUDIT_ARCH: | ||
366 | case AUDIT_MSGTYPE: | ||
367 | case AUDIT_DEVMAJOR: | ||
368 | case AUDIT_DEVMINOR: | ||
369 | case AUDIT_EXIT: | ||
370 | case AUDIT_SUCCESS: | ||
371 | case AUDIT_ARG0: | ||
372 | case AUDIT_ARG1: | ||
373 | case AUDIT_ARG2: | ||
374 | case AUDIT_ARG3: | ||
375 | break; | ||
376 | case AUDIT_INODE: | ||
377 | err = audit_to_inode(&entry->rule, f); | ||
378 | if (err) | ||
379 | goto exit_free; | ||
380 | break; | ||
183 | } | 381 | } |
184 | 382 | ||
185 | entry->rule.vers_ops = (f->op & AUDIT_OPERATORS) ? 2 : 1; | 383 | entry->rule.vers_ops = (f->op & AUDIT_OPERATORS) ? 2 : 1; |
@@ -196,6 +394,18 @@ static struct audit_entry *audit_rule_to_entry(struct audit_rule *rule) | |||
196 | } | 394 | } |
197 | } | 395 | } |
198 | 396 | ||
397 | f = entry->rule.inode_f; | ||
398 | if (f) { | ||
399 | switch(f->op) { | ||
400 | case AUDIT_NOT_EQUAL: | ||
401 | entry->rule.inode_f = NULL; | ||
402 | case AUDIT_EQUAL: | ||
403 | break; | ||
404 | default: | ||
405 | goto exit_free; | ||
406 | } | ||
407 | } | ||
408 | |||
199 | exit_nofree: | 409 | exit_nofree: |
200 | return entry; | 410 | return entry; |
201 | 411 | ||
@@ -210,6 +420,7 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data, | |||
210 | { | 420 | { |
211 | int err = 0; | 421 | int err = 0; |
212 | struct audit_entry *entry; | 422 | struct audit_entry *entry; |
423 | struct audit_field *f; | ||
213 | void *bufp; | 424 | void *bufp; |
214 | size_t remain = datasz - sizeof(struct audit_rule_data); | 425 | size_t remain = datasz - sizeof(struct audit_rule_data); |
215 | int i; | 426 | int i; |
@@ -235,6 +446,29 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data, | |||
235 | f->se_str = NULL; | 446 | f->se_str = NULL; |
236 | f->se_rule = NULL; | 447 | f->se_rule = NULL; |
237 | switch(f->type) { | 448 | switch(f->type) { |
449 | case AUDIT_PID: | ||
450 | case AUDIT_UID: | ||
451 | case AUDIT_EUID: | ||
452 | case AUDIT_SUID: | ||
453 | case AUDIT_FSUID: | ||
454 | case AUDIT_GID: | ||
455 | case AUDIT_EGID: | ||
456 | case AUDIT_SGID: | ||
457 | case AUDIT_FSGID: | ||
458 | case AUDIT_LOGINUID: | ||
459 | case AUDIT_PERS: | ||
460 | case AUDIT_ARCH: | ||
461 | case AUDIT_MSGTYPE: | ||
462 | case AUDIT_PPID: | ||
463 | case AUDIT_DEVMAJOR: | ||
464 | case AUDIT_DEVMINOR: | ||
465 | case AUDIT_EXIT: | ||
466 | case AUDIT_SUCCESS: | ||
467 | case AUDIT_ARG0: | ||
468 | case AUDIT_ARG1: | ||
469 | case AUDIT_ARG2: | ||
470 | case AUDIT_ARG3: | ||
471 | break; | ||
238 | case AUDIT_SE_USER: | 472 | case AUDIT_SE_USER: |
239 | case AUDIT_SE_ROLE: | 473 | case AUDIT_SE_ROLE: |
240 | case AUDIT_SE_TYPE: | 474 | case AUDIT_SE_TYPE: |
@@ -260,6 +494,37 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data, | |||
260 | } else | 494 | } else |
261 | f->se_str = str; | 495 | f->se_str = str; |
262 | break; | 496 | break; |
497 | case AUDIT_WATCH: | ||
498 | str = audit_unpack_string(&bufp, &remain, f->val); | ||
499 | if (IS_ERR(str)) | ||
500 | goto exit_free; | ||
501 | entry->rule.buflen += f->val; | ||
502 | |||
503 | err = audit_to_watch(&entry->rule, str, f->val, f->op); | ||
504 | if (err) { | ||
505 | kfree(str); | ||
506 | goto exit_free; | ||
507 | } | ||
508 | break; | ||
509 | case AUDIT_INODE: | ||
510 | err = audit_to_inode(&entry->rule, f); | ||
511 | if (err) | ||
512 | goto exit_free; | ||
513 | break; | ||
514 | default: | ||
515 | goto exit_free; | ||
516 | } | ||
517 | } | ||
518 | |||
519 | f = entry->rule.inode_f; | ||
520 | if (f) { | ||
521 | switch(f->op) { | ||
522 | case AUDIT_NOT_EQUAL: | ||
523 | entry->rule.inode_f = NULL; | ||
524 | case AUDIT_EQUAL: | ||
525 | break; | ||
526 | default: | ||
527 | goto exit_free; | ||
263 | } | 528 | } |
264 | } | 529 | } |
265 | 530 | ||
@@ -291,7 +556,7 @@ static struct audit_rule *audit_krule_to_rule(struct audit_krule *krule) | |||
291 | 556 | ||
292 | rule = kmalloc(sizeof(*rule), GFP_KERNEL); | 557 | rule = kmalloc(sizeof(*rule), GFP_KERNEL); |
293 | if (unlikely(!rule)) | 558 | if (unlikely(!rule)) |
294 | return ERR_PTR(-ENOMEM); | 559 | return NULL; |
295 | memset(rule, 0, sizeof(*rule)); | 560 | memset(rule, 0, sizeof(*rule)); |
296 | 561 | ||
297 | rule->flags = krule->flags | krule->listnr; | 562 | rule->flags = krule->flags | krule->listnr; |
@@ -322,7 +587,7 @@ static struct audit_rule_data *audit_krule_to_data(struct audit_krule *krule) | |||
322 | 587 | ||
323 | data = kmalloc(sizeof(*data) + krule->buflen, GFP_KERNEL); | 588 | data = kmalloc(sizeof(*data) + krule->buflen, GFP_KERNEL); |
324 | if (unlikely(!data)) | 589 | if (unlikely(!data)) |
325 | return ERR_PTR(-ENOMEM); | 590 | return NULL; |
326 | memset(data, 0, sizeof(*data)); | 591 | memset(data, 0, sizeof(*data)); |
327 | 592 | ||
328 | data->flags = krule->flags | krule->listnr; | 593 | data->flags = krule->flags | krule->listnr; |
@@ -343,6 +608,10 @@ static struct audit_rule_data *audit_krule_to_data(struct audit_krule *krule) | |||
343 | data->buflen += data->values[i] = | 608 | data->buflen += data->values[i] = |
344 | audit_pack_string(&bufp, f->se_str); | 609 | audit_pack_string(&bufp, f->se_str); |
345 | break; | 610 | break; |
611 | case AUDIT_WATCH: | ||
612 | data->buflen += data->values[i] = | ||
613 | audit_pack_string(&bufp, krule->watch->path); | ||
614 | break; | ||
346 | default: | 615 | default: |
347 | data->values[i] = f->val; | 616 | data->values[i] = f->val; |
348 | } | 617 | } |
@@ -378,6 +647,10 @@ static int audit_compare_rule(struct audit_krule *a, struct audit_krule *b) | |||
378 | if (strcmp(a->fields[i].se_str, b->fields[i].se_str)) | 647 | if (strcmp(a->fields[i].se_str, b->fields[i].se_str)) |
379 | return 1; | 648 | return 1; |
380 | break; | 649 | break; |
650 | case AUDIT_WATCH: | ||
651 | if (strcmp(a->watch->path, b->watch->path)) | ||
652 | return 1; | ||
653 | break; | ||
381 | default: | 654 | default: |
382 | if (a->fields[i].val != b->fields[i].val) | 655 | if (a->fields[i].val != b->fields[i].val) |
383 | return 1; | 656 | return 1; |
@@ -391,6 +664,32 @@ static int audit_compare_rule(struct audit_krule *a, struct audit_krule *b) | |||
391 | return 0; | 664 | return 0; |
392 | } | 665 | } |
393 | 666 | ||
667 | /* Duplicate the given audit watch. The new watch's rules list is initialized | ||
668 | * to an empty list and wlist is undefined. */ | ||
669 | static struct audit_watch *audit_dupe_watch(struct audit_watch *old) | ||
670 | { | ||
671 | char *path; | ||
672 | struct audit_watch *new; | ||
673 | |||
674 | path = kstrdup(old->path, GFP_KERNEL); | ||
675 | if (unlikely(!path)) | ||
676 | return ERR_PTR(-ENOMEM); | ||
677 | |||
678 | new = audit_init_watch(path); | ||
679 | if (unlikely(IS_ERR(new))) { | ||
680 | kfree(path); | ||
681 | goto out; | ||
682 | } | ||
683 | |||
684 | new->dev = old->dev; | ||
685 | new->ino = old->ino; | ||
686 | get_inotify_watch(&old->parent->wdata); | ||
687 | new->parent = old->parent; | ||
688 | |||
689 | out: | ||
690 | return new; | ||
691 | } | ||
692 | |||
394 | /* Duplicate selinux field information. The se_rule is opaque, so must be | 693 | /* Duplicate selinux field information. The se_rule is opaque, so must be |
395 | * re-initialized. */ | 694 | * re-initialized. */ |
396 | static inline int audit_dupe_selinux_field(struct audit_field *df, | 695 | static inline int audit_dupe_selinux_field(struct audit_field *df, |
@@ -422,8 +721,11 @@ static inline int audit_dupe_selinux_field(struct audit_field *df, | |||
422 | /* Duplicate an audit rule. This will be a deep copy with the exception | 721 | /* Duplicate an audit rule. This will be a deep copy with the exception |
423 | * of the watch - that pointer is carried over. The selinux specific fields | 722 | * of the watch - that pointer is carried over. The selinux specific fields |
424 | * will be updated in the copy. The point is to be able to replace the old | 723 | * will be updated in the copy. The point is to be able to replace the old |
425 | * rule with the new rule in the filterlist, then free the old rule. */ | 724 | * rule with the new rule in the filterlist, then free the old rule. |
426 | static struct audit_entry *audit_dupe_rule(struct audit_krule *old) | 725 | * The rlist element is undefined; list manipulations are handled apart from |
726 | * the initial copy. */ | ||
727 | static struct audit_entry *audit_dupe_rule(struct audit_krule *old, | ||
728 | struct audit_watch *watch) | ||
427 | { | 729 | { |
428 | u32 fcount = old->field_count; | 730 | u32 fcount = old->field_count; |
429 | struct audit_entry *entry; | 731 | struct audit_entry *entry; |
@@ -442,6 +744,8 @@ static struct audit_entry *audit_dupe_rule(struct audit_krule *old) | |||
442 | for (i = 0; i < AUDIT_BITMASK_SIZE; i++) | 744 | for (i = 0; i < AUDIT_BITMASK_SIZE; i++) |
443 | new->mask[i] = old->mask[i]; | 745 | new->mask[i] = old->mask[i]; |
444 | new->buflen = old->buflen; | 746 | new->buflen = old->buflen; |
747 | new->inode_f = old->inode_f; | ||
748 | new->watch = NULL; | ||
445 | new->field_count = old->field_count; | 749 | new->field_count = old->field_count; |
446 | memcpy(new->fields, old->fields, sizeof(struct audit_field) * fcount); | 750 | memcpy(new->fields, old->fields, sizeof(struct audit_field) * fcount); |
447 | 751 | ||
@@ -463,68 +767,409 @@ static struct audit_entry *audit_dupe_rule(struct audit_krule *old) | |||
463 | } | 767 | } |
464 | } | 768 | } |
465 | 769 | ||
770 | if (watch) { | ||
771 | audit_get_watch(watch); | ||
772 | new->watch = watch; | ||
773 | } | ||
774 | |||
466 | return entry; | 775 | return entry; |
467 | } | 776 | } |
468 | 777 | ||
469 | /* Add rule to given filterlist if not a duplicate. Protected by | 778 | /* Update inode info in audit rules based on filesystem event. */ |
470 | * audit_netlink_mutex. */ | 779 | static void audit_update_watch(struct audit_parent *parent, |
780 | const char *dname, dev_t dev, | ||
781 | unsigned long ino, unsigned invalidating) | ||
782 | { | ||
783 | struct audit_watch *owatch, *nwatch, *nextw; | ||
784 | struct audit_krule *r, *nextr; | ||
785 | struct audit_entry *oentry, *nentry; | ||
786 | struct audit_buffer *ab; | ||
787 | |||
788 | mutex_lock(&audit_filter_mutex); | ||
789 | list_for_each_entry_safe(owatch, nextw, &parent->watches, wlist) { | ||
790 | if (audit_compare_dname_path(dname, owatch->path, NULL)) | ||
791 | continue; | ||
792 | |||
793 | /* If the update involves invalidating rules, do the inode-based | ||
794 | * filtering now, so we don't omit records. */ | ||
795 | if (invalidating && | ||
796 | audit_filter_inodes(current, current->audit_context) == AUDIT_RECORD_CONTEXT) | ||
797 | audit_set_auditable(current->audit_context); | ||
798 | |||
799 | nwatch = audit_dupe_watch(owatch); | ||
800 | if (unlikely(IS_ERR(nwatch))) { | ||
801 | mutex_unlock(&audit_filter_mutex); | ||
802 | audit_panic("error updating watch, skipping"); | ||
803 | return; | ||
804 | } | ||
805 | nwatch->dev = dev; | ||
806 | nwatch->ino = ino; | ||
807 | |||
808 | list_for_each_entry_safe(r, nextr, &owatch->rules, rlist) { | ||
809 | |||
810 | oentry = container_of(r, struct audit_entry, rule); | ||
811 | list_del(&oentry->rule.rlist); | ||
812 | list_del_rcu(&oentry->list); | ||
813 | |||
814 | nentry = audit_dupe_rule(&oentry->rule, nwatch); | ||
815 | if (unlikely(IS_ERR(nentry))) | ||
816 | audit_panic("error updating watch, removing"); | ||
817 | else { | ||
818 | int h = audit_hash_ino((u32)ino); | ||
819 | list_add(&nentry->rule.rlist, &nwatch->rules); | ||
820 | list_add_rcu(&nentry->list, &audit_inode_hash[h]); | ||
821 | } | ||
822 | |||
823 | call_rcu(&oentry->rcu, audit_free_rule_rcu); | ||
824 | } | ||
825 | |||
826 | ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE); | ||
827 | audit_log_format(ab, "audit updated rules specifying watch="); | ||
828 | audit_log_untrustedstring(ab, owatch->path); | ||
829 | audit_log_format(ab, " with dev=%u ino=%lu\n", dev, ino); | ||
830 | audit_log_end(ab); | ||
831 | |||
832 | audit_remove_watch(owatch); | ||
833 | goto add_watch_to_parent; /* event applies to a single watch */ | ||
834 | } | ||
835 | mutex_unlock(&audit_filter_mutex); | ||
836 | return; | ||
837 | |||
838 | add_watch_to_parent: | ||
839 | list_add(&nwatch->wlist, &parent->watches); | ||
840 | mutex_unlock(&audit_filter_mutex); | ||
841 | return; | ||
842 | } | ||
843 | |||
844 | /* Remove all watches & rules associated with a parent that is going away. */ | ||
845 | static void audit_remove_parent_watches(struct audit_parent *parent) | ||
846 | { | ||
847 | struct audit_watch *w, *nextw; | ||
848 | struct audit_krule *r, *nextr; | ||
849 | struct audit_entry *e; | ||
850 | |||
851 | mutex_lock(&audit_filter_mutex); | ||
852 | parent->flags |= AUDIT_PARENT_INVALID; | ||
853 | list_for_each_entry_safe(w, nextw, &parent->watches, wlist) { | ||
854 | list_for_each_entry_safe(r, nextr, &w->rules, rlist) { | ||
855 | e = container_of(r, struct audit_entry, rule); | ||
856 | list_del(&r->rlist); | ||
857 | list_del_rcu(&e->list); | ||
858 | call_rcu(&e->rcu, audit_free_rule_rcu); | ||
859 | |||
860 | audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, | ||
861 | "audit implicitly removed rule from list=%d\n", | ||
862 | AUDIT_FILTER_EXIT); | ||
863 | } | ||
864 | audit_remove_watch(w); | ||
865 | } | ||
866 | mutex_unlock(&audit_filter_mutex); | ||
867 | } | ||
868 | |||
869 | /* Unregister inotify watches for parents on in_list. | ||
870 | * Generates an IN_IGNORED event. */ | ||
871 | static void audit_inotify_unregister(struct list_head *in_list) | ||
872 | { | ||
873 | struct audit_parent *p, *n; | ||
874 | |||
875 | list_for_each_entry_safe(p, n, in_list, ilist) { | ||
876 | list_del(&p->ilist); | ||
877 | inotify_rm_watch(audit_ih, &p->wdata); | ||
878 | /* the put matching the get in audit_do_del_rule() */ | ||
879 | put_inotify_watch(&p->wdata); | ||
880 | } | ||
881 | } | ||
882 | |||
883 | /* Find an existing audit rule. | ||
884 | * Caller must hold audit_filter_mutex to prevent stale rule data. */ | ||
885 | static struct audit_entry *audit_find_rule(struct audit_entry *entry, | ||
886 | struct list_head *list) | ||
887 | { | ||
888 | struct audit_entry *e, *found = NULL; | ||
889 | int h; | ||
890 | |||
891 | if (entry->rule.watch) { | ||
892 | /* we don't know the inode number, so must walk entire hash */ | ||
893 | for (h = 0; h < AUDIT_INODE_BUCKETS; h++) { | ||
894 | list = &audit_inode_hash[h]; | ||
895 | list_for_each_entry(e, list, list) | ||
896 | if (!audit_compare_rule(&entry->rule, &e->rule)) { | ||
897 | found = e; | ||
898 | goto out; | ||
899 | } | ||
900 | } | ||
901 | goto out; | ||
902 | } | ||
903 | |||
904 | list_for_each_entry(e, list, list) | ||
905 | if (!audit_compare_rule(&entry->rule, &e->rule)) { | ||
906 | found = e; | ||
907 | goto out; | ||
908 | } | ||
909 | |||
910 | out: | ||
911 | return found; | ||
912 | } | ||
913 | |||
914 | /* Get path information necessary for adding watches. */ | ||
915 | static int audit_get_nd(char *path, struct nameidata **ndp, | ||
916 | struct nameidata **ndw) | ||
917 | { | ||
918 | struct nameidata *ndparent, *ndwatch; | ||
919 | int err; | ||
920 | |||
921 | ndparent = kmalloc(sizeof(*ndparent), GFP_KERNEL); | ||
922 | if (unlikely(!ndparent)) | ||
923 | return -ENOMEM; | ||
924 | |||
925 | ndwatch = kmalloc(sizeof(*ndwatch), GFP_KERNEL); | ||
926 | if (unlikely(!ndwatch)) { | ||
927 | kfree(ndparent); | ||
928 | return -ENOMEM; | ||
929 | } | ||
930 | |||
931 | err = path_lookup(path, LOOKUP_PARENT, ndparent); | ||
932 | if (err) { | ||
933 | kfree(ndparent); | ||
934 | kfree(ndwatch); | ||
935 | return err; | ||
936 | } | ||
937 | |||
938 | err = path_lookup(path, 0, ndwatch); | ||
939 | if (err) { | ||
940 | kfree(ndwatch); | ||
941 | ndwatch = NULL; | ||
942 | } | ||
943 | |||
944 | *ndp = ndparent; | ||
945 | *ndw = ndwatch; | ||
946 | |||
947 | return 0; | ||
948 | } | ||
949 | |||
950 | /* Release resources used for watch path information. */ | ||
951 | static void audit_put_nd(struct nameidata *ndp, struct nameidata *ndw) | ||
952 | { | ||
953 | if (ndp) { | ||
954 | path_release(ndp); | ||
955 | kfree(ndp); | ||
956 | } | ||
957 | if (ndw) { | ||
958 | path_release(ndw); | ||
959 | kfree(ndw); | ||
960 | } | ||
961 | } | ||
962 | |||
963 | /* Associate the given rule with an existing parent inotify_watch. | ||
964 | * Caller must hold audit_filter_mutex. */ | ||
965 | static void audit_add_to_parent(struct audit_krule *krule, | ||
966 | struct audit_parent *parent) | ||
967 | { | ||
968 | struct audit_watch *w, *watch = krule->watch; | ||
969 | int watch_found = 0; | ||
970 | |||
971 | list_for_each_entry(w, &parent->watches, wlist) { | ||
972 | if (strcmp(watch->path, w->path)) | ||
973 | continue; | ||
974 | |||
975 | watch_found = 1; | ||
976 | |||
977 | /* put krule's and initial refs to temporary watch */ | ||
978 | audit_put_watch(watch); | ||
979 | audit_put_watch(watch); | ||
980 | |||
981 | audit_get_watch(w); | ||
982 | krule->watch = watch = w; | ||
983 | break; | ||
984 | } | ||
985 | |||
986 | if (!watch_found) { | ||
987 | get_inotify_watch(&parent->wdata); | ||
988 | watch->parent = parent; | ||
989 | |||
990 | list_add(&watch->wlist, &parent->watches); | ||
991 | } | ||
992 | list_add(&krule->rlist, &watch->rules); | ||
993 | } | ||
994 | |||
995 | /* Find a matching watch entry, or add this one. | ||
996 | * Caller must hold audit_filter_mutex. */ | ||
997 | static int audit_add_watch(struct audit_krule *krule, struct nameidata *ndp, | ||
998 | struct nameidata *ndw) | ||
999 | { | ||
1000 | struct audit_watch *watch = krule->watch; | ||
1001 | struct inotify_watch *i_watch; | ||
1002 | struct audit_parent *parent; | ||
1003 | int ret = 0; | ||
1004 | |||
1005 | /* update watch filter fields */ | ||
1006 | if (ndw) { | ||
1007 | watch->dev = ndw->dentry->d_inode->i_sb->s_dev; | ||
1008 | watch->ino = ndw->dentry->d_inode->i_ino; | ||
1009 | } | ||
1010 | |||
1011 | /* The audit_filter_mutex must not be held during inotify calls because | ||
1012 | * we hold it during inotify event callback processing. If an existing | ||
1013 | * inotify watch is found, inotify_find_watch() grabs a reference before | ||
1014 | * returning. | ||
1015 | */ | ||
1016 | mutex_unlock(&audit_filter_mutex); | ||
1017 | |||
1018 | if (inotify_find_watch(audit_ih, ndp->dentry->d_inode, &i_watch) < 0) { | ||
1019 | parent = audit_init_parent(ndp); | ||
1020 | if (IS_ERR(parent)) { | ||
1021 | /* caller expects mutex locked */ | ||
1022 | mutex_lock(&audit_filter_mutex); | ||
1023 | return PTR_ERR(parent); | ||
1024 | } | ||
1025 | } else | ||
1026 | parent = container_of(i_watch, struct audit_parent, wdata); | ||
1027 | |||
1028 | mutex_lock(&audit_filter_mutex); | ||
1029 | |||
1030 | /* parent was moved before we took audit_filter_mutex */ | ||
1031 | if (parent->flags & AUDIT_PARENT_INVALID) | ||
1032 | ret = -ENOENT; | ||
1033 | else | ||
1034 | audit_add_to_parent(krule, parent); | ||
1035 | |||
1036 | /* match get in audit_init_parent or inotify_find_watch */ | ||
1037 | put_inotify_watch(&parent->wdata); | ||
1038 | return ret; | ||
1039 | } | ||
1040 | |||
1041 | /* Add rule to given filterlist if not a duplicate. */ | ||
471 | static inline int audit_add_rule(struct audit_entry *entry, | 1042 | static inline int audit_add_rule(struct audit_entry *entry, |
472 | struct list_head *list) | 1043 | struct list_head *list) |
473 | { | 1044 | { |
474 | struct audit_entry *e; | 1045 | struct audit_entry *e; |
1046 | struct audit_field *inode_f = entry->rule.inode_f; | ||
1047 | struct audit_watch *watch = entry->rule.watch; | ||
1048 | struct nameidata *ndp, *ndw; | ||
1049 | int h, err, putnd_needed = 0; | ||
1050 | |||
1051 | if (inode_f) { | ||
1052 | h = audit_hash_ino(inode_f->val); | ||
1053 | list = &audit_inode_hash[h]; | ||
1054 | } | ||
475 | 1055 | ||
476 | /* Do not use the _rcu iterator here, since this is the only | 1056 | mutex_lock(&audit_filter_mutex); |
477 | * addition routine. */ | 1057 | e = audit_find_rule(entry, list); |
478 | list_for_each_entry(e, list, list) { | 1058 | mutex_unlock(&audit_filter_mutex); |
479 | if (!audit_compare_rule(&entry->rule, &e->rule)) | 1059 | if (e) { |
480 | return -EEXIST; | 1060 | err = -EEXIST; |
1061 | goto error; | ||
1062 | } | ||
1063 | |||
1064 | /* Avoid calling path_lookup under audit_filter_mutex. */ | ||
1065 | if (watch) { | ||
1066 | err = audit_get_nd(watch->path, &ndp, &ndw); | ||
1067 | if (err) | ||
1068 | goto error; | ||
1069 | putnd_needed = 1; | ||
1070 | } | ||
1071 | |||
1072 | mutex_lock(&audit_filter_mutex); | ||
1073 | if (watch) { | ||
1074 | /* audit_filter_mutex is dropped and re-taken during this call */ | ||
1075 | err = audit_add_watch(&entry->rule, ndp, ndw); | ||
1076 | if (err) { | ||
1077 | mutex_unlock(&audit_filter_mutex); | ||
1078 | goto error; | ||
1079 | } | ||
1080 | h = audit_hash_ino((u32)watch->ino); | ||
1081 | list = &audit_inode_hash[h]; | ||
481 | } | 1082 | } |
482 | 1083 | ||
483 | if (entry->rule.flags & AUDIT_FILTER_PREPEND) { | 1084 | if (entry->rule.flags & AUDIT_FILTER_PREPEND) { |
484 | list_add_rcu(&entry->list, list); | 1085 | list_add_rcu(&entry->list, list); |
1086 | entry->rule.flags &= ~AUDIT_FILTER_PREPEND; | ||
485 | } else { | 1087 | } else { |
486 | list_add_tail_rcu(&entry->list, list); | 1088 | list_add_tail_rcu(&entry->list, list); |
487 | } | 1089 | } |
1090 | mutex_unlock(&audit_filter_mutex); | ||
488 | 1091 | ||
489 | return 0; | 1092 | if (putnd_needed) |
1093 | audit_put_nd(ndp, ndw); | ||
1094 | |||
1095 | return 0; | ||
1096 | |||
1097 | error: | ||
1098 | if (putnd_needed) | ||
1099 | audit_put_nd(ndp, ndw); | ||
1100 | if (watch) | ||
1101 | audit_put_watch(watch); /* tmp watch, matches initial get */ | ||
1102 | return err; | ||
490 | } | 1103 | } |
491 | 1104 | ||
492 | /* Remove an existing rule from filterlist. Protected by | 1105 | /* Remove an existing rule from filterlist. */ |
493 | * audit_netlink_mutex. */ | ||
494 | static inline int audit_del_rule(struct audit_entry *entry, | 1106 | static inline int audit_del_rule(struct audit_entry *entry, |
495 | struct list_head *list) | 1107 | struct list_head *list) |
496 | { | 1108 | { |
497 | struct audit_entry *e; | 1109 | struct audit_entry *e; |
1110 | struct audit_field *inode_f = entry->rule.inode_f; | ||
1111 | struct audit_watch *watch, *tmp_watch = entry->rule.watch; | ||
1112 | LIST_HEAD(inotify_list); | ||
1113 | int h, ret = 0; | ||
1114 | |||
1115 | if (inode_f) { | ||
1116 | h = audit_hash_ino(inode_f->val); | ||
1117 | list = &audit_inode_hash[h]; | ||
1118 | } | ||
498 | 1119 | ||
499 | /* Do not use the _rcu iterator here, since this is the only | 1120 | mutex_lock(&audit_filter_mutex); |
500 | * deletion routine. */ | 1121 | e = audit_find_rule(entry, list); |
501 | list_for_each_entry(e, list, list) { | 1122 | if (!e) { |
502 | if (!audit_compare_rule(&entry->rule, &e->rule)) { | 1123 | mutex_unlock(&audit_filter_mutex); |
503 | list_del_rcu(&e->list); | 1124 | ret = -ENOENT; |
504 | call_rcu(&e->rcu, audit_free_rule_rcu); | 1125 | goto out; |
505 | return 0; | 1126 | } |
1127 | |||
1128 | watch = e->rule.watch; | ||
1129 | if (watch) { | ||
1130 | struct audit_parent *parent = watch->parent; | ||
1131 | |||
1132 | list_del(&e->rule.rlist); | ||
1133 | |||
1134 | if (list_empty(&watch->rules)) { | ||
1135 | audit_remove_watch(watch); | ||
1136 | |||
1137 | if (list_empty(&parent->watches)) { | ||
1138 | /* Put parent on the inotify un-registration | ||
1139 | * list. Grab a reference before releasing | ||
1140 | * audit_filter_mutex, to be released in | ||
1141 | * audit_inotify_unregister(). */ | ||
1142 | list_add(&parent->ilist, &inotify_list); | ||
1143 | get_inotify_watch(&parent->wdata); | ||
1144 | } | ||
506 | } | 1145 | } |
507 | } | 1146 | } |
508 | return -ENOENT; /* No matching rule */ | 1147 | |
1148 | list_del_rcu(&e->list); | ||
1149 | call_rcu(&e->rcu, audit_free_rule_rcu); | ||
1150 | |||
1151 | mutex_unlock(&audit_filter_mutex); | ||
1152 | |||
1153 | if (!list_empty(&inotify_list)) | ||
1154 | audit_inotify_unregister(&inotify_list); | ||
1155 | |||
1156 | out: | ||
1157 | if (tmp_watch) | ||
1158 | audit_put_watch(tmp_watch); /* match initial get */ | ||
1159 | |||
1160 | return ret; | ||
509 | } | 1161 | } |
510 | 1162 | ||
511 | /* List rules using struct audit_rule. Exists for backward | 1163 | /* List rules using struct audit_rule. Exists for backward |
512 | * compatibility with userspace. */ | 1164 | * compatibility with userspace. */ |
513 | static int audit_list(void *_dest) | 1165 | static void audit_list(int pid, int seq, struct sk_buff_head *q) |
514 | { | 1166 | { |
515 | int pid, seq; | 1167 | struct sk_buff *skb; |
516 | int *dest = _dest; | ||
517 | struct audit_entry *entry; | 1168 | struct audit_entry *entry; |
518 | int i; | 1169 | int i; |
519 | 1170 | ||
520 | pid = dest[0]; | 1171 | /* This is a blocking read, so use audit_filter_mutex instead of rcu |
521 | seq = dest[1]; | 1172 | * iterator to sync with list writers. */ |
522 | kfree(dest); | ||
523 | |||
524 | mutex_lock(&audit_netlink_mutex); | ||
525 | |||
526 | /* The *_rcu iterators not needed here because we are | ||
527 | always called with audit_netlink_mutex held. */ | ||
528 | for (i=0; i<AUDIT_NR_FILTERS; i++) { | 1173 | for (i=0; i<AUDIT_NR_FILTERS; i++) { |
529 | list_for_each_entry(entry, &audit_filter_list[i], list) { | 1174 | list_for_each_entry(entry, &audit_filter_list[i], list) { |
530 | struct audit_rule *rule; | 1175 | struct audit_rule *rule; |
@@ -532,33 +1177,41 @@ static int audit_list(void *_dest) | |||
532 | rule = audit_krule_to_rule(&entry->rule); | 1177 | rule = audit_krule_to_rule(&entry->rule); |
533 | if (unlikely(!rule)) | 1178 | if (unlikely(!rule)) |
534 | break; | 1179 | break; |
535 | audit_send_reply(pid, seq, AUDIT_LIST, 0, 1, | 1180 | skb = audit_make_reply(pid, seq, AUDIT_LIST, 0, 1, |
536 | rule, sizeof(*rule)); | 1181 | rule, sizeof(*rule)); |
1182 | if (skb) | ||
1183 | skb_queue_tail(q, skb); | ||
537 | kfree(rule); | 1184 | kfree(rule); |
538 | } | 1185 | } |
539 | } | 1186 | } |
540 | audit_send_reply(pid, seq, AUDIT_LIST, 1, 1, NULL, 0); | 1187 | for (i = 0; i < AUDIT_INODE_BUCKETS; i++) { |
541 | 1188 | list_for_each_entry(entry, &audit_inode_hash[i], list) { | |
542 | mutex_unlock(&audit_netlink_mutex); | 1189 | struct audit_rule *rule; |
543 | return 0; | 1190 | |
1191 | rule = audit_krule_to_rule(&entry->rule); | ||
1192 | if (unlikely(!rule)) | ||
1193 | break; | ||
1194 | skb = audit_make_reply(pid, seq, AUDIT_LIST, 0, 1, | ||
1195 | rule, sizeof(*rule)); | ||
1196 | if (skb) | ||
1197 | skb_queue_tail(q, skb); | ||
1198 | kfree(rule); | ||
1199 | } | ||
1200 | } | ||
1201 | skb = audit_make_reply(pid, seq, AUDIT_LIST, 1, 1, NULL, 0); | ||
1202 | if (skb) | ||
1203 | skb_queue_tail(q, skb); | ||
544 | } | 1204 | } |
545 | 1205 | ||
546 | /* List rules using struct audit_rule_data. */ | 1206 | /* List rules using struct audit_rule_data. */ |
547 | static int audit_list_rules(void *_dest) | 1207 | static void audit_list_rules(int pid, int seq, struct sk_buff_head *q) |
548 | { | 1208 | { |
549 | int pid, seq; | 1209 | struct sk_buff *skb; |
550 | int *dest = _dest; | ||
551 | struct audit_entry *e; | 1210 | struct audit_entry *e; |
552 | int i; | 1211 | int i; |
553 | 1212 | ||
554 | pid = dest[0]; | 1213 | /* This is a blocking read, so use audit_filter_mutex instead of rcu |
555 | seq = dest[1]; | 1214 | * iterator to sync with list writers. */ |
556 | kfree(dest); | ||
557 | |||
558 | mutex_lock(&audit_netlink_mutex); | ||
559 | |||
560 | /* The *_rcu iterators not needed here because we are | ||
561 | always called with audit_netlink_mutex held. */ | ||
562 | for (i=0; i<AUDIT_NR_FILTERS; i++) { | 1215 | for (i=0; i<AUDIT_NR_FILTERS; i++) { |
563 | list_for_each_entry(e, &audit_filter_list[i], list) { | 1216 | list_for_each_entry(e, &audit_filter_list[i], list) { |
564 | struct audit_rule_data *data; | 1217 | struct audit_rule_data *data; |
@@ -566,15 +1219,30 @@ static int audit_list_rules(void *_dest) | |||
566 | data = audit_krule_to_data(&e->rule); | 1219 | data = audit_krule_to_data(&e->rule); |
567 | if (unlikely(!data)) | 1220 | if (unlikely(!data)) |
568 | break; | 1221 | break; |
569 | audit_send_reply(pid, seq, AUDIT_LIST_RULES, 0, 1, | 1222 | skb = audit_make_reply(pid, seq, AUDIT_LIST_RULES, 0, 1, |
570 | data, sizeof(*data)); | 1223 | data, sizeof(*data) + data->buflen); |
1224 | if (skb) | ||
1225 | skb_queue_tail(q, skb); | ||
571 | kfree(data); | 1226 | kfree(data); |
572 | } | 1227 | } |
573 | } | 1228 | } |
574 | audit_send_reply(pid, seq, AUDIT_LIST_RULES, 1, 1, NULL, 0); | 1229 | for (i=0; i< AUDIT_INODE_BUCKETS; i++) { |
1230 | list_for_each_entry(e, &audit_inode_hash[i], list) { | ||
1231 | struct audit_rule_data *data; | ||
575 | 1232 | ||
576 | mutex_unlock(&audit_netlink_mutex); | 1233 | data = audit_krule_to_data(&e->rule); |
577 | return 0; | 1234 | if (unlikely(!data)) |
1235 | break; | ||
1236 | skb = audit_make_reply(pid, seq, AUDIT_LIST_RULES, 0, 1, | ||
1237 | data, sizeof(*data) + data->buflen); | ||
1238 | if (skb) | ||
1239 | skb_queue_tail(q, skb); | ||
1240 | kfree(data); | ||
1241 | } | ||
1242 | } | ||
1243 | skb = audit_make_reply(pid, seq, AUDIT_LIST_RULES, 1, 1, NULL, 0); | ||
1244 | if (skb) | ||
1245 | skb_queue_tail(q, skb); | ||
578 | } | 1246 | } |
579 | 1247 | ||
580 | /** | 1248 | /** |
@@ -592,7 +1260,7 @@ int audit_receive_filter(int type, int pid, int uid, int seq, void *data, | |||
592 | size_t datasz, uid_t loginuid, u32 sid) | 1260 | size_t datasz, uid_t loginuid, u32 sid) |
593 | { | 1261 | { |
594 | struct task_struct *tsk; | 1262 | struct task_struct *tsk; |
595 | int *dest; | 1263 | struct audit_netlink_list *dest; |
596 | int err = 0; | 1264 | int err = 0; |
597 | struct audit_entry *entry; | 1265 | struct audit_entry *entry; |
598 | 1266 | ||
@@ -605,18 +1273,22 @@ int audit_receive_filter(int type, int pid, int uid, int seq, void *data, | |||
605 | * happen if we're actually running in the context of auditctl | 1273 | * happen if we're actually running in the context of auditctl |
606 | * trying to _send_ the stuff */ | 1274 | * trying to _send_ the stuff */ |
607 | 1275 | ||
608 | dest = kmalloc(2 * sizeof(int), GFP_KERNEL); | 1276 | dest = kmalloc(sizeof(struct audit_netlink_list), GFP_KERNEL); |
609 | if (!dest) | 1277 | if (!dest) |
610 | return -ENOMEM; | 1278 | return -ENOMEM; |
611 | dest[0] = pid; | 1279 | dest->pid = pid; |
612 | dest[1] = seq; | 1280 | skb_queue_head_init(&dest->q); |
613 | 1281 | ||
1282 | mutex_lock(&audit_filter_mutex); | ||
614 | if (type == AUDIT_LIST) | 1283 | if (type == AUDIT_LIST) |
615 | tsk = kthread_run(audit_list, dest, "audit_list"); | 1284 | audit_list(pid, seq, &dest->q); |
616 | else | 1285 | else |
617 | tsk = kthread_run(audit_list_rules, dest, | 1286 | audit_list_rules(pid, seq, &dest->q); |
618 | "audit_list_rules"); | 1287 | mutex_unlock(&audit_filter_mutex); |
1288 | |||
1289 | tsk = kthread_run(audit_send_list, dest, "audit_send_list"); | ||
619 | if (IS_ERR(tsk)) { | 1290 | if (IS_ERR(tsk)) { |
1291 | skb_queue_purge(&dest->q); | ||
620 | kfree(dest); | 1292 | kfree(dest); |
621 | err = PTR_ERR(tsk); | 1293 | err = PTR_ERR(tsk); |
622 | } | 1294 | } |
@@ -632,6 +1304,7 @@ int audit_receive_filter(int type, int pid, int uid, int seq, void *data, | |||
632 | 1304 | ||
633 | err = audit_add_rule(entry, | 1305 | err = audit_add_rule(entry, |
634 | &audit_filter_list[entry->rule.listnr]); | 1306 | &audit_filter_list[entry->rule.listnr]); |
1307 | |||
635 | if (sid) { | 1308 | if (sid) { |
636 | char *ctx = NULL; | 1309 | char *ctx = NULL; |
637 | u32 len; | 1310 | u32 len; |
@@ -712,7 +1385,43 @@ int audit_comparator(const u32 left, const u32 op, const u32 right) | |||
712 | return 0; | 1385 | return 0; |
713 | } | 1386 | } |
714 | 1387 | ||
1388 | /* Compare given dentry name with last component in given path, | ||
1389 | * return of 0 indicates a match. */ | ||
1390 | int audit_compare_dname_path(const char *dname, const char *path, | ||
1391 | int *dirlen) | ||
1392 | { | ||
1393 | int dlen, plen; | ||
1394 | const char *p; | ||
715 | 1395 | ||
1396 | if (!dname || !path) | ||
1397 | return 1; | ||
1398 | |||
1399 | dlen = strlen(dname); | ||
1400 | plen = strlen(path); | ||
1401 | if (plen < dlen) | ||
1402 | return 1; | ||
1403 | |||
1404 | /* disregard trailing slashes */ | ||
1405 | p = path + plen - 1; | ||
1406 | while ((*p == '/') && (p > path)) | ||
1407 | p--; | ||
1408 | |||
1409 | /* find last path component */ | ||
1410 | p = p - dlen + 1; | ||
1411 | if (p < path) | ||
1412 | return 1; | ||
1413 | else if (p > path) { | ||
1414 | if (*--p != '/') | ||
1415 | return 1; | ||
1416 | else | ||
1417 | p++; | ||
1418 | } | ||
1419 | |||
1420 | /* return length of path's directory component */ | ||
1421 | if (dirlen) | ||
1422 | *dirlen = p - path; | ||
1423 | return strncmp(p, dname, dlen); | ||
1424 | } | ||
716 | 1425 | ||
717 | static int audit_filter_user_rules(struct netlink_skb_parms *cb, | 1426 | static int audit_filter_user_rules(struct netlink_skb_parms *cb, |
718 | struct audit_krule *rule, | 1427 | struct audit_krule *rule, |
@@ -744,7 +1453,6 @@ static int audit_filter_user_rules(struct netlink_skb_parms *cb, | |||
744 | } | 1453 | } |
745 | switch (rule->action) { | 1454 | switch (rule->action) { |
746 | case AUDIT_NEVER: *state = AUDIT_DISABLED; break; | 1455 | case AUDIT_NEVER: *state = AUDIT_DISABLED; break; |
747 | case AUDIT_POSSIBLE: *state = AUDIT_BUILD_CONTEXT; break; | ||
748 | case AUDIT_ALWAYS: *state = AUDIT_RECORD_CONTEXT; break; | 1456 | case AUDIT_ALWAYS: *state = AUDIT_RECORD_CONTEXT; break; |
749 | } | 1457 | } |
750 | return 1; | 1458 | return 1; |
@@ -826,32 +1534,65 @@ static inline int audit_rule_has_selinux(struct audit_krule *rule) | |||
826 | int selinux_audit_rule_update(void) | 1534 | int selinux_audit_rule_update(void) |
827 | { | 1535 | { |
828 | struct audit_entry *entry, *n, *nentry; | 1536 | struct audit_entry *entry, *n, *nentry; |
1537 | struct audit_watch *watch; | ||
829 | int i, err = 0; | 1538 | int i, err = 0; |
830 | 1539 | ||
831 | /* audit_netlink_mutex synchronizes the writers */ | 1540 | /* audit_filter_mutex synchronizes the writers */ |
832 | mutex_lock(&audit_netlink_mutex); | 1541 | mutex_lock(&audit_filter_mutex); |
833 | 1542 | ||
834 | for (i = 0; i < AUDIT_NR_FILTERS; i++) { | 1543 | for (i = 0; i < AUDIT_NR_FILTERS; i++) { |
835 | list_for_each_entry_safe(entry, n, &audit_filter_list[i], list) { | 1544 | list_for_each_entry_safe(entry, n, &audit_filter_list[i], list) { |
836 | if (!audit_rule_has_selinux(&entry->rule)) | 1545 | if (!audit_rule_has_selinux(&entry->rule)) |
837 | continue; | 1546 | continue; |
838 | 1547 | ||
839 | nentry = audit_dupe_rule(&entry->rule); | 1548 | watch = entry->rule.watch; |
1549 | nentry = audit_dupe_rule(&entry->rule, watch); | ||
840 | if (unlikely(IS_ERR(nentry))) { | 1550 | if (unlikely(IS_ERR(nentry))) { |
841 | /* save the first error encountered for the | 1551 | /* save the first error encountered for the |
842 | * return value */ | 1552 | * return value */ |
843 | if (!err) | 1553 | if (!err) |
844 | err = PTR_ERR(nentry); | 1554 | err = PTR_ERR(nentry); |
845 | audit_panic("error updating selinux filters"); | 1555 | audit_panic("error updating selinux filters"); |
1556 | if (watch) | ||
1557 | list_del(&entry->rule.rlist); | ||
846 | list_del_rcu(&entry->list); | 1558 | list_del_rcu(&entry->list); |
847 | } else { | 1559 | } else { |
1560 | if (watch) { | ||
1561 | list_add(&nentry->rule.rlist, | ||
1562 | &watch->rules); | ||
1563 | list_del(&entry->rule.rlist); | ||
1564 | } | ||
848 | list_replace_rcu(&entry->list, &nentry->list); | 1565 | list_replace_rcu(&entry->list, &nentry->list); |
849 | } | 1566 | } |
850 | call_rcu(&entry->rcu, audit_free_rule_rcu); | 1567 | call_rcu(&entry->rcu, audit_free_rule_rcu); |
851 | } | 1568 | } |
852 | } | 1569 | } |
853 | 1570 | ||
854 | mutex_unlock(&audit_netlink_mutex); | 1571 | mutex_unlock(&audit_filter_mutex); |
855 | 1572 | ||
856 | return err; | 1573 | return err; |
857 | } | 1574 | } |
1575 | |||
1576 | /* Update watch data in audit rules based on inotify events. */ | ||
1577 | void audit_handle_ievent(struct inotify_watch *i_watch, u32 wd, u32 mask, | ||
1578 | u32 cookie, const char *dname, struct inode *inode) | ||
1579 | { | ||
1580 | struct audit_parent *parent; | ||
1581 | |||
1582 | parent = container_of(i_watch, struct audit_parent, wdata); | ||
1583 | |||
1584 | if (mask & (IN_CREATE|IN_MOVED_TO) && inode) | ||
1585 | audit_update_watch(parent, dname, inode->i_sb->s_dev, | ||
1586 | inode->i_ino, 0); | ||
1587 | else if (mask & (IN_DELETE|IN_MOVED_FROM)) | ||
1588 | audit_update_watch(parent, dname, (dev_t)-1, (unsigned long)-1, 1); | ||
1589 | /* inotify automatically removes the watch and sends IN_IGNORED */ | ||
1590 | else if (mask & (IN_DELETE_SELF|IN_UNMOUNT)) | ||
1591 | audit_remove_parent_watches(parent); | ||
1592 | /* inotify does not remove the watch, so remove it manually */ | ||
1593 | else if(mask & IN_MOVE_SELF) { | ||
1594 | audit_remove_parent_watches(parent); | ||
1595 | inotify_remove_watch_locked(audit_ih, i_watch); | ||
1596 | } else if (mask & IN_IGNORED) | ||
1597 | put_inotify_watch(i_watch); | ||
1598 | } | ||
diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 1c03a4ed1b27..9ebd96fda295 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c | |||
@@ -3,7 +3,7 @@ | |||
3 | * | 3 | * |
4 | * Copyright 2003-2004 Red Hat Inc., Durham, North Carolina. | 4 | * Copyright 2003-2004 Red Hat Inc., Durham, North Carolina. |
5 | * Copyright 2005 Hewlett-Packard Development Company, L.P. | 5 | * Copyright 2005 Hewlett-Packard Development Company, L.P. |
6 | * Copyright (C) 2005 IBM Corporation | 6 | * Copyright (C) 2005, 2006 IBM Corporation |
7 | * All Rights Reserved. | 7 | * All Rights Reserved. |
8 | * | 8 | * |
9 | * This program is free software; you can redistribute it and/or modify | 9 | * This program is free software; you can redistribute it and/or modify |
@@ -29,6 +29,9 @@ | |||
29 | * this file -- see entry.S) is based on a GPL'd patch written by | 29 | * this file -- see entry.S) is based on a GPL'd patch written by |
30 | * okir@suse.de and Copyright 2003 SuSE Linux AG. | 30 | * okir@suse.de and Copyright 2003 SuSE Linux AG. |
31 | * | 31 | * |
32 | * POSIX message queue support added by George Wilson <ltcgcw@us.ibm.com>, | ||
33 | * 2006. | ||
34 | * | ||
32 | * The support of additional filter rules compares (>, <, >=, <=) was | 35 | * The support of additional filter rules compares (>, <, >=, <=) was |
33 | * added by Dustin Kirkland <dustin.kirkland@us.ibm.com>, 2005. | 36 | * added by Dustin Kirkland <dustin.kirkland@us.ibm.com>, 2005. |
34 | * | 37 | * |
@@ -49,6 +52,7 @@ | |||
49 | #include <linux/module.h> | 52 | #include <linux/module.h> |
50 | #include <linux/mount.h> | 53 | #include <linux/mount.h> |
51 | #include <linux/socket.h> | 54 | #include <linux/socket.h> |
55 | #include <linux/mqueue.h> | ||
52 | #include <linux/audit.h> | 56 | #include <linux/audit.h> |
53 | #include <linux/personality.h> | 57 | #include <linux/personality.h> |
54 | #include <linux/time.h> | 58 | #include <linux/time.h> |
@@ -59,6 +63,8 @@ | |||
59 | #include <linux/list.h> | 63 | #include <linux/list.h> |
60 | #include <linux/tty.h> | 64 | #include <linux/tty.h> |
61 | #include <linux/selinux.h> | 65 | #include <linux/selinux.h> |
66 | #include <linux/binfmts.h> | ||
67 | #include <linux/syscalls.h> | ||
62 | 68 | ||
63 | #include "audit.h" | 69 | #include "audit.h" |
64 | 70 | ||
@@ -76,6 +82,9 @@ extern int audit_enabled; | |||
76 | * path_lookup. */ | 82 | * path_lookup. */ |
77 | #define AUDIT_NAMES_RESERVED 7 | 83 | #define AUDIT_NAMES_RESERVED 7 |
78 | 84 | ||
85 | /* Indicates that audit should log the full pathname. */ | ||
86 | #define AUDIT_NAME_FULL -1 | ||
87 | |||
79 | /* When fs/namei.c:getname() is called, we store the pointer in name and | 88 | /* When fs/namei.c:getname() is called, we store the pointer in name and |
80 | * we don't let putname() free it (instead we free all of the saved | 89 | * we don't let putname() free it (instead we free all of the saved |
81 | * pointers at syscall exit time). | 90 | * pointers at syscall exit time). |
@@ -83,8 +92,9 @@ extern int audit_enabled; | |||
83 | * Further, in fs/namei.c:path_lookup() we store the inode and device. */ | 92 | * Further, in fs/namei.c:path_lookup() we store the inode and device. */ |
84 | struct audit_names { | 93 | struct audit_names { |
85 | const char *name; | 94 | const char *name; |
95 | int name_len; /* number of name's characters to log */ | ||
96 | unsigned name_put; /* call __putname() for this name */ | ||
86 | unsigned long ino; | 97 | unsigned long ino; |
87 | unsigned long pino; | ||
88 | dev_t dev; | 98 | dev_t dev; |
89 | umode_t mode; | 99 | umode_t mode; |
90 | uid_t uid; | 100 | uid_t uid; |
@@ -100,6 +110,33 @@ struct audit_aux_data { | |||
100 | 110 | ||
101 | #define AUDIT_AUX_IPCPERM 0 | 111 | #define AUDIT_AUX_IPCPERM 0 |
102 | 112 | ||
113 | struct audit_aux_data_mq_open { | ||
114 | struct audit_aux_data d; | ||
115 | int oflag; | ||
116 | mode_t mode; | ||
117 | struct mq_attr attr; | ||
118 | }; | ||
119 | |||
120 | struct audit_aux_data_mq_sendrecv { | ||
121 | struct audit_aux_data d; | ||
122 | mqd_t mqdes; | ||
123 | size_t msg_len; | ||
124 | unsigned int msg_prio; | ||
125 | struct timespec abs_timeout; | ||
126 | }; | ||
127 | |||
128 | struct audit_aux_data_mq_notify { | ||
129 | struct audit_aux_data d; | ||
130 | mqd_t mqdes; | ||
131 | struct sigevent notification; | ||
132 | }; | ||
133 | |||
134 | struct audit_aux_data_mq_getsetattr { | ||
135 | struct audit_aux_data d; | ||
136 | mqd_t mqdes; | ||
137 | struct mq_attr mqstat; | ||
138 | }; | ||
139 | |||
103 | struct audit_aux_data_ipcctl { | 140 | struct audit_aux_data_ipcctl { |
104 | struct audit_aux_data d; | 141 | struct audit_aux_data d; |
105 | struct ipc_perm p; | 142 | struct ipc_perm p; |
@@ -110,6 +147,13 @@ struct audit_aux_data_ipcctl { | |||
110 | u32 osid; | 147 | u32 osid; |
111 | }; | 148 | }; |
112 | 149 | ||
150 | struct audit_aux_data_execve { | ||
151 | struct audit_aux_data d; | ||
152 | int argc; | ||
153 | int envc; | ||
154 | char mem[0]; | ||
155 | }; | ||
156 | |||
113 | struct audit_aux_data_socketcall { | 157 | struct audit_aux_data_socketcall { |
114 | struct audit_aux_data d; | 158 | struct audit_aux_data d; |
115 | int nargs; | 159 | int nargs; |
@@ -148,7 +192,7 @@ struct audit_context { | |||
148 | struct audit_aux_data *aux; | 192 | struct audit_aux_data *aux; |
149 | 193 | ||
150 | /* Save things to print about task_struct */ | 194 | /* Save things to print about task_struct */ |
151 | pid_t pid; | 195 | pid_t pid, ppid; |
152 | uid_t uid, euid, suid, fsuid; | 196 | uid_t uid, euid, suid, fsuid; |
153 | gid_t gid, egid, sgid, fsgid; | 197 | gid_t gid, egid, sgid, fsgid; |
154 | unsigned long personality; | 198 | unsigned long personality; |
@@ -160,12 +204,13 @@ struct audit_context { | |||
160 | #endif | 204 | #endif |
161 | }; | 205 | }; |
162 | 206 | ||
163 | 207 | /* Determine if any context name data matches a rule's watch data */ | |
164 | /* Compare a task_struct with an audit_rule. Return 1 on match, 0 | 208 | /* Compare a task_struct with an audit_rule. Return 1 on match, 0 |
165 | * otherwise. */ | 209 | * otherwise. */ |
166 | static int audit_filter_rules(struct task_struct *tsk, | 210 | static int audit_filter_rules(struct task_struct *tsk, |
167 | struct audit_krule *rule, | 211 | struct audit_krule *rule, |
168 | struct audit_context *ctx, | 212 | struct audit_context *ctx, |
213 | struct audit_names *name, | ||
169 | enum audit_state *state) | 214 | enum audit_state *state) |
170 | { | 215 | { |
171 | int i, j, need_sid = 1; | 216 | int i, j, need_sid = 1; |
@@ -179,6 +224,10 @@ static int audit_filter_rules(struct task_struct *tsk, | |||
179 | case AUDIT_PID: | 224 | case AUDIT_PID: |
180 | result = audit_comparator(tsk->pid, f->op, f->val); | 225 | result = audit_comparator(tsk->pid, f->op, f->val); |
181 | break; | 226 | break; |
227 | case AUDIT_PPID: | ||
228 | if (ctx) | ||
229 | result = audit_comparator(ctx->ppid, f->op, f->val); | ||
230 | break; | ||
182 | case AUDIT_UID: | 231 | case AUDIT_UID: |
183 | result = audit_comparator(tsk->uid, f->op, f->val); | 232 | result = audit_comparator(tsk->uid, f->op, f->val); |
184 | break; | 233 | break; |
@@ -224,7 +273,10 @@ static int audit_filter_rules(struct task_struct *tsk, | |||
224 | } | 273 | } |
225 | break; | 274 | break; |
226 | case AUDIT_DEVMAJOR: | 275 | case AUDIT_DEVMAJOR: |
227 | if (ctx) { | 276 | if (name) |
277 | result = audit_comparator(MAJOR(name->dev), | ||
278 | f->op, f->val); | ||
279 | else if (ctx) { | ||
228 | for (j = 0; j < ctx->name_count; j++) { | 280 | for (j = 0; j < ctx->name_count; j++) { |
229 | if (audit_comparator(MAJOR(ctx->names[j].dev), f->op, f->val)) { | 281 | if (audit_comparator(MAJOR(ctx->names[j].dev), f->op, f->val)) { |
230 | ++result; | 282 | ++result; |
@@ -234,7 +286,10 @@ static int audit_filter_rules(struct task_struct *tsk, | |||
234 | } | 286 | } |
235 | break; | 287 | break; |
236 | case AUDIT_DEVMINOR: | 288 | case AUDIT_DEVMINOR: |
237 | if (ctx) { | 289 | if (name) |
290 | result = audit_comparator(MINOR(name->dev), | ||
291 | f->op, f->val); | ||
292 | else if (ctx) { | ||
238 | for (j = 0; j < ctx->name_count; j++) { | 293 | for (j = 0; j < ctx->name_count; j++) { |
239 | if (audit_comparator(MINOR(ctx->names[j].dev), f->op, f->val)) { | 294 | if (audit_comparator(MINOR(ctx->names[j].dev), f->op, f->val)) { |
240 | ++result; | 295 | ++result; |
@@ -244,16 +299,22 @@ static int audit_filter_rules(struct task_struct *tsk, | |||
244 | } | 299 | } |
245 | break; | 300 | break; |
246 | case AUDIT_INODE: | 301 | case AUDIT_INODE: |
247 | if (ctx) { | 302 | if (name) |
303 | result = (name->ino == f->val); | ||
304 | else if (ctx) { | ||
248 | for (j = 0; j < ctx->name_count; j++) { | 305 | for (j = 0; j < ctx->name_count; j++) { |
249 | if (audit_comparator(ctx->names[j].ino, f->op, f->val) || | 306 | if (audit_comparator(ctx->names[j].ino, f->op, f->val)) { |
250 | audit_comparator(ctx->names[j].pino, f->op, f->val)) { | ||
251 | ++result; | 307 | ++result; |
252 | break; | 308 | break; |
253 | } | 309 | } |
254 | } | 310 | } |
255 | } | 311 | } |
256 | break; | 312 | break; |
313 | case AUDIT_WATCH: | ||
314 | if (name && rule->watch->ino != (unsigned long)-1) | ||
315 | result = (name->dev == rule->watch->dev && | ||
316 | name->ino == rule->watch->ino); | ||
317 | break; | ||
257 | case AUDIT_LOGINUID: | 318 | case AUDIT_LOGINUID: |
258 | result = 0; | 319 | result = 0; |
259 | if (ctx) | 320 | if (ctx) |
@@ -294,7 +355,6 @@ static int audit_filter_rules(struct task_struct *tsk, | |||
294 | } | 355 | } |
295 | switch (rule->action) { | 356 | switch (rule->action) { |
296 | case AUDIT_NEVER: *state = AUDIT_DISABLED; break; | 357 | case AUDIT_NEVER: *state = AUDIT_DISABLED; break; |
297 | case AUDIT_POSSIBLE: *state = AUDIT_BUILD_CONTEXT; break; | ||
298 | case AUDIT_ALWAYS: *state = AUDIT_RECORD_CONTEXT; break; | 358 | case AUDIT_ALWAYS: *state = AUDIT_RECORD_CONTEXT; break; |
299 | } | 359 | } |
300 | return 1; | 360 | return 1; |
@@ -311,7 +371,7 @@ static enum audit_state audit_filter_task(struct task_struct *tsk) | |||
311 | 371 | ||
312 | rcu_read_lock(); | 372 | rcu_read_lock(); |
313 | list_for_each_entry_rcu(e, &audit_filter_list[AUDIT_FILTER_TASK], list) { | 373 | list_for_each_entry_rcu(e, &audit_filter_list[AUDIT_FILTER_TASK], list) { |
314 | if (audit_filter_rules(tsk, &e->rule, NULL, &state)) { | 374 | if (audit_filter_rules(tsk, &e->rule, NULL, NULL, &state)) { |
315 | rcu_read_unlock(); | 375 | rcu_read_unlock(); |
316 | return state; | 376 | return state; |
317 | } | 377 | } |
@@ -341,8 +401,47 @@ static enum audit_state audit_filter_syscall(struct task_struct *tsk, | |||
341 | int bit = AUDIT_BIT(ctx->major); | 401 | int bit = AUDIT_BIT(ctx->major); |
342 | 402 | ||
343 | list_for_each_entry_rcu(e, list, list) { | 403 | list_for_each_entry_rcu(e, list, list) { |
344 | if ((e->rule.mask[word] & bit) == bit | 404 | if ((e->rule.mask[word] & bit) == bit && |
345 | && audit_filter_rules(tsk, &e->rule, ctx, &state)) { | 405 | audit_filter_rules(tsk, &e->rule, ctx, NULL, |
406 | &state)) { | ||
407 | rcu_read_unlock(); | ||
408 | return state; | ||
409 | } | ||
410 | } | ||
411 | } | ||
412 | rcu_read_unlock(); | ||
413 | return AUDIT_BUILD_CONTEXT; | ||
414 | } | ||
415 | |||
416 | /* At syscall exit time, this filter is called if any audit_names[] have been | ||
417 | * collected during syscall processing. We only check rules in sublists at hash | ||
418 | * buckets applicable to the inode numbers in audit_names[]. | ||
419 | * Regarding audit_state, same rules apply as for audit_filter_syscall(). | ||
420 | */ | ||
421 | enum audit_state audit_filter_inodes(struct task_struct *tsk, | ||
422 | struct audit_context *ctx) | ||
423 | { | ||
424 | int i; | ||
425 | struct audit_entry *e; | ||
426 | enum audit_state state; | ||
427 | |||
428 | if (audit_pid && tsk->tgid == audit_pid) | ||
429 | return AUDIT_DISABLED; | ||
430 | |||
431 | rcu_read_lock(); | ||
432 | for (i = 0; i < ctx->name_count; i++) { | ||
433 | int word = AUDIT_WORD(ctx->major); | ||
434 | int bit = AUDIT_BIT(ctx->major); | ||
435 | struct audit_names *n = &ctx->names[i]; | ||
436 | int h = audit_hash_ino((u32)n->ino); | ||
437 | struct list_head *list = &audit_inode_hash[h]; | ||
438 | |||
439 | if (list_empty(list)) | ||
440 | continue; | ||
441 | |||
442 | list_for_each_entry_rcu(e, list, list) { | ||
443 | if ((e->rule.mask[word] & bit) == bit && | ||
444 | audit_filter_rules(tsk, &e->rule, ctx, n, &state)) { | ||
346 | rcu_read_unlock(); | 445 | rcu_read_unlock(); |
347 | return state; | 446 | return state; |
348 | } | 447 | } |
@@ -352,6 +451,11 @@ static enum audit_state audit_filter_syscall(struct task_struct *tsk, | |||
352 | return AUDIT_BUILD_CONTEXT; | 451 | return AUDIT_BUILD_CONTEXT; |
353 | } | 452 | } |
354 | 453 | ||
454 | void audit_set_auditable(struct audit_context *ctx) | ||
455 | { | ||
456 | ctx->auditable = 1; | ||
457 | } | ||
458 | |||
355 | static inline struct audit_context *audit_get_context(struct task_struct *tsk, | 459 | static inline struct audit_context *audit_get_context(struct task_struct *tsk, |
356 | int return_valid, | 460 | int return_valid, |
357 | int return_code) | 461 | int return_code) |
@@ -365,12 +469,22 @@ static inline struct audit_context *audit_get_context(struct task_struct *tsk, | |||
365 | 469 | ||
366 | if (context->in_syscall && !context->auditable) { | 470 | if (context->in_syscall && !context->auditable) { |
367 | enum audit_state state; | 471 | enum audit_state state; |
472 | |||
368 | state = audit_filter_syscall(tsk, context, &audit_filter_list[AUDIT_FILTER_EXIT]); | 473 | state = audit_filter_syscall(tsk, context, &audit_filter_list[AUDIT_FILTER_EXIT]); |
474 | if (state == AUDIT_RECORD_CONTEXT) { | ||
475 | context->auditable = 1; | ||
476 | goto get_context; | ||
477 | } | ||
478 | |||
479 | state = audit_filter_inodes(tsk, context); | ||
369 | if (state == AUDIT_RECORD_CONTEXT) | 480 | if (state == AUDIT_RECORD_CONTEXT) |
370 | context->auditable = 1; | 481 | context->auditable = 1; |
482 | |||
371 | } | 483 | } |
372 | 484 | ||
485 | get_context: | ||
373 | context->pid = tsk->pid; | 486 | context->pid = tsk->pid; |
487 | context->ppid = sys_getppid(); /* sic. tsk == current in all cases */ | ||
374 | context->uid = tsk->uid; | 488 | context->uid = tsk->uid; |
375 | context->gid = tsk->gid; | 489 | context->gid = tsk->gid; |
376 | context->euid = tsk->euid; | 490 | context->euid = tsk->euid; |
@@ -413,7 +527,7 @@ static inline void audit_free_names(struct audit_context *context) | |||
413 | #endif | 527 | #endif |
414 | 528 | ||
415 | for (i = 0; i < context->name_count; i++) { | 529 | for (i = 0; i < context->name_count; i++) { |
416 | if (context->names[i].name) | 530 | if (context->names[i].name && context->names[i].name_put) |
417 | __putname(context->names[i].name); | 531 | __putname(context->names[i].name); |
418 | } | 532 | } |
419 | context->name_count = 0; | 533 | context->name_count = 0; |
@@ -606,7 +720,7 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts | |||
606 | tty = "(none)"; | 720 | tty = "(none)"; |
607 | audit_log_format(ab, | 721 | audit_log_format(ab, |
608 | " a0=%lx a1=%lx a2=%lx a3=%lx items=%d" | 722 | " a0=%lx a1=%lx a2=%lx a3=%lx items=%d" |
609 | " pid=%d auid=%u uid=%u gid=%u" | 723 | " ppid=%d pid=%d auid=%u uid=%u gid=%u" |
610 | " euid=%u suid=%u fsuid=%u" | 724 | " euid=%u suid=%u fsuid=%u" |
611 | " egid=%u sgid=%u fsgid=%u tty=%s", | 725 | " egid=%u sgid=%u fsgid=%u tty=%s", |
612 | context->argv[0], | 726 | context->argv[0], |
@@ -614,6 +728,7 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts | |||
614 | context->argv[2], | 728 | context->argv[2], |
615 | context->argv[3], | 729 | context->argv[3], |
616 | context->name_count, | 730 | context->name_count, |
731 | context->ppid, | ||
617 | context->pid, | 732 | context->pid, |
618 | context->loginuid, | 733 | context->loginuid, |
619 | context->uid, | 734 | context->uid, |
@@ -630,11 +745,48 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts | |||
630 | continue; /* audit_panic has been called */ | 745 | continue; /* audit_panic has been called */ |
631 | 746 | ||
632 | switch (aux->type) { | 747 | switch (aux->type) { |
748 | case AUDIT_MQ_OPEN: { | ||
749 | struct audit_aux_data_mq_open *axi = (void *)aux; | ||
750 | audit_log_format(ab, | ||
751 | "oflag=0x%x mode=%#o mq_flags=0x%lx mq_maxmsg=%ld " | ||
752 | "mq_msgsize=%ld mq_curmsgs=%ld", | ||
753 | axi->oflag, axi->mode, axi->attr.mq_flags, | ||
754 | axi->attr.mq_maxmsg, axi->attr.mq_msgsize, | ||
755 | axi->attr.mq_curmsgs); | ||
756 | break; } | ||
757 | |||
758 | case AUDIT_MQ_SENDRECV: { | ||
759 | struct audit_aux_data_mq_sendrecv *axi = (void *)aux; | ||
760 | audit_log_format(ab, | ||
761 | "mqdes=%d msg_len=%zd msg_prio=%u " | ||
762 | "abs_timeout_sec=%ld abs_timeout_nsec=%ld", | ||
763 | axi->mqdes, axi->msg_len, axi->msg_prio, | ||
764 | axi->abs_timeout.tv_sec, axi->abs_timeout.tv_nsec); | ||
765 | break; } | ||
766 | |||
767 | case AUDIT_MQ_NOTIFY: { | ||
768 | struct audit_aux_data_mq_notify *axi = (void *)aux; | ||
769 | audit_log_format(ab, | ||
770 | "mqdes=%d sigev_signo=%d", | ||
771 | axi->mqdes, | ||
772 | axi->notification.sigev_signo); | ||
773 | break; } | ||
774 | |||
775 | case AUDIT_MQ_GETSETATTR: { | ||
776 | struct audit_aux_data_mq_getsetattr *axi = (void *)aux; | ||
777 | audit_log_format(ab, | ||
778 | "mqdes=%d mq_flags=0x%lx mq_maxmsg=%ld mq_msgsize=%ld " | ||
779 | "mq_curmsgs=%ld ", | ||
780 | axi->mqdes, | ||
781 | axi->mqstat.mq_flags, axi->mqstat.mq_maxmsg, | ||
782 | axi->mqstat.mq_msgsize, axi->mqstat.mq_curmsgs); | ||
783 | break; } | ||
784 | |||
633 | case AUDIT_IPC: { | 785 | case AUDIT_IPC: { |
634 | struct audit_aux_data_ipcctl *axi = (void *)aux; | 786 | struct audit_aux_data_ipcctl *axi = (void *)aux; |
635 | audit_log_format(ab, | 787 | audit_log_format(ab, |
636 | " qbytes=%lx iuid=%u igid=%u mode=%x", | 788 | "ouid=%u ogid=%u mode=%x", |
637 | axi->qbytes, axi->uid, axi->gid, axi->mode); | 789 | axi->uid, axi->gid, axi->mode); |
638 | if (axi->osid != 0) { | 790 | if (axi->osid != 0) { |
639 | char *ctx = NULL; | 791 | char *ctx = NULL; |
640 | u32 len; | 792 | u32 len; |
@@ -652,19 +804,18 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts | |||
652 | case AUDIT_IPC_SET_PERM: { | 804 | case AUDIT_IPC_SET_PERM: { |
653 | struct audit_aux_data_ipcctl *axi = (void *)aux; | 805 | struct audit_aux_data_ipcctl *axi = (void *)aux; |
654 | audit_log_format(ab, | 806 | audit_log_format(ab, |
655 | " new qbytes=%lx new iuid=%u new igid=%u new mode=%x", | 807 | "qbytes=%lx ouid=%u ogid=%u mode=%x", |
656 | axi->qbytes, axi->uid, axi->gid, axi->mode); | 808 | axi->qbytes, axi->uid, axi->gid, axi->mode); |
657 | if (axi->osid != 0) { | 809 | break; } |
658 | char *ctx = NULL; | 810 | |
659 | u32 len; | 811 | case AUDIT_EXECVE: { |
660 | if (selinux_ctxid_to_string( | 812 | struct audit_aux_data_execve *axi = (void *)aux; |
661 | axi->osid, &ctx, &len)) { | 813 | int i; |
662 | audit_log_format(ab, " osid=%u", | 814 | const char *p; |
663 | axi->osid); | 815 | for (i = 0, p = axi->mem; i < axi->argc; i++) { |
664 | call_panic = 1; | 816 | audit_log_format(ab, "a%d=", i); |
665 | } else | 817 | p = audit_log_untrustedstring(ab, p); |
666 | audit_log_format(ab, " obj=%s", ctx); | 818 | audit_log_format(ab, "\n"); |
667 | kfree(ctx); | ||
668 | } | 819 | } |
669 | break; } | 820 | break; } |
670 | 821 | ||
@@ -700,8 +851,7 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts | |||
700 | } | 851 | } |
701 | } | 852 | } |
702 | for (i = 0; i < context->name_count; i++) { | 853 | for (i = 0; i < context->name_count; i++) { |
703 | unsigned long ino = context->names[i].ino; | 854 | struct audit_names *n = &context->names[i]; |
704 | unsigned long pino = context->names[i].pino; | ||
705 | 855 | ||
706 | ab = audit_log_start(context, GFP_KERNEL, AUDIT_PATH); | 856 | ab = audit_log_start(context, GFP_KERNEL, AUDIT_PATH); |
707 | if (!ab) | 857 | if (!ab) |
@@ -709,33 +859,47 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts | |||
709 | 859 | ||
710 | audit_log_format(ab, "item=%d", i); | 860 | audit_log_format(ab, "item=%d", i); |
711 | 861 | ||
712 | audit_log_format(ab, " name="); | 862 | if (n->name) { |
713 | if (context->names[i].name) | 863 | switch(n->name_len) { |
714 | audit_log_untrustedstring(ab, context->names[i].name); | 864 | case AUDIT_NAME_FULL: |
715 | else | 865 | /* log the full path */ |
716 | audit_log_format(ab, "(null)"); | 866 | audit_log_format(ab, " name="); |
717 | 867 | audit_log_untrustedstring(ab, n->name); | |
718 | if (pino != (unsigned long)-1) | 868 | break; |
719 | audit_log_format(ab, " parent=%lu", pino); | 869 | case 0: |
720 | if (ino != (unsigned long)-1) | 870 | /* name was specified as a relative path and the |
721 | audit_log_format(ab, " inode=%lu", ino); | 871 | * directory component is the cwd */ |
722 | if ((pino != (unsigned long)-1) || (ino != (unsigned long)-1)) | 872 | audit_log_d_path(ab, " name=", context->pwd, |
723 | audit_log_format(ab, " dev=%02x:%02x mode=%#o" | 873 | context->pwdmnt); |
724 | " ouid=%u ogid=%u rdev=%02x:%02x", | 874 | break; |
725 | MAJOR(context->names[i].dev), | 875 | default: |
726 | MINOR(context->names[i].dev), | 876 | /* log the name's directory component */ |
727 | context->names[i].mode, | 877 | audit_log_format(ab, " name="); |
728 | context->names[i].uid, | 878 | audit_log_n_untrustedstring(ab, n->name_len, |
729 | context->names[i].gid, | 879 | n->name); |
730 | MAJOR(context->names[i].rdev), | 880 | } |
731 | MINOR(context->names[i].rdev)); | 881 | } else |
732 | if (context->names[i].osid != 0) { | 882 | audit_log_format(ab, " name=(null)"); |
883 | |||
884 | if (n->ino != (unsigned long)-1) { | ||
885 | audit_log_format(ab, " inode=%lu" | ||
886 | " dev=%02x:%02x mode=%#o" | ||
887 | " ouid=%u ogid=%u rdev=%02x:%02x", | ||
888 | n->ino, | ||
889 | MAJOR(n->dev), | ||
890 | MINOR(n->dev), | ||
891 | n->mode, | ||
892 | n->uid, | ||
893 | n->gid, | ||
894 | MAJOR(n->rdev), | ||
895 | MINOR(n->rdev)); | ||
896 | } | ||
897 | if (n->osid != 0) { | ||
733 | char *ctx = NULL; | 898 | char *ctx = NULL; |
734 | u32 len; | 899 | u32 len; |
735 | if (selinux_ctxid_to_string( | 900 | if (selinux_ctxid_to_string( |
736 | context->names[i].osid, &ctx, &len)) { | 901 | n->osid, &ctx, &len)) { |
737 | audit_log_format(ab, " osid=%u", | 902 | audit_log_format(ab, " osid=%u", n->osid); |
738 | context->names[i].osid); | ||
739 | call_panic = 2; | 903 | call_panic = 2; |
740 | } else | 904 | } else |
741 | audit_log_format(ab, " obj=%s", ctx); | 905 | audit_log_format(ab, " obj=%s", ctx); |
@@ -908,11 +1072,11 @@ void audit_syscall_exit(int valid, long return_code) | |||
908 | * Add a name to the list of audit names for this context. | 1072 | * Add a name to the list of audit names for this context. |
909 | * Called from fs/namei.c:getname(). | 1073 | * Called from fs/namei.c:getname(). |
910 | */ | 1074 | */ |
911 | void audit_getname(const char *name) | 1075 | void __audit_getname(const char *name) |
912 | { | 1076 | { |
913 | struct audit_context *context = current->audit_context; | 1077 | struct audit_context *context = current->audit_context; |
914 | 1078 | ||
915 | if (!context || IS_ERR(name) || !name) | 1079 | if (IS_ERR(name) || !name) |
916 | return; | 1080 | return; |
917 | 1081 | ||
918 | if (!context->in_syscall) { | 1082 | if (!context->in_syscall) { |
@@ -925,6 +1089,8 @@ void audit_getname(const char *name) | |||
925 | } | 1089 | } |
926 | BUG_ON(context->name_count >= AUDIT_NAMES); | 1090 | BUG_ON(context->name_count >= AUDIT_NAMES); |
927 | context->names[context->name_count].name = name; | 1091 | context->names[context->name_count].name = name; |
1092 | context->names[context->name_count].name_len = AUDIT_NAME_FULL; | ||
1093 | context->names[context->name_count].name_put = 1; | ||
928 | context->names[context->name_count].ino = (unsigned long)-1; | 1094 | context->names[context->name_count].ino = (unsigned long)-1; |
929 | ++context->name_count; | 1095 | ++context->name_count; |
930 | if (!context->pwd) { | 1096 | if (!context->pwd) { |
@@ -991,11 +1157,10 @@ static void audit_inode_context(int idx, const struct inode *inode) | |||
991 | * audit_inode - store the inode and device from a lookup | 1157 | * audit_inode - store the inode and device from a lookup |
992 | * @name: name being audited | 1158 | * @name: name being audited |
993 | * @inode: inode being audited | 1159 | * @inode: inode being audited |
994 | * @flags: lookup flags (as used in path_lookup()) | ||
995 | * | 1160 | * |
996 | * Called from fs/namei.c:path_lookup(). | 1161 | * Called from fs/namei.c:path_lookup(). |
997 | */ | 1162 | */ |
998 | void __audit_inode(const char *name, const struct inode *inode, unsigned flags) | 1163 | void __audit_inode(const char *name, const struct inode *inode) |
999 | { | 1164 | { |
1000 | int idx; | 1165 | int idx; |
1001 | struct audit_context *context = current->audit_context; | 1166 | struct audit_context *context = current->audit_context; |
@@ -1021,20 +1186,13 @@ void __audit_inode(const char *name, const struct inode *inode, unsigned flags) | |||
1021 | ++context->ino_count; | 1186 | ++context->ino_count; |
1022 | #endif | 1187 | #endif |
1023 | } | 1188 | } |
1189 | context->names[idx].ino = inode->i_ino; | ||
1024 | context->names[idx].dev = inode->i_sb->s_dev; | 1190 | context->names[idx].dev = inode->i_sb->s_dev; |
1025 | context->names[idx].mode = inode->i_mode; | 1191 | context->names[idx].mode = inode->i_mode; |
1026 | context->names[idx].uid = inode->i_uid; | 1192 | context->names[idx].uid = inode->i_uid; |
1027 | context->names[idx].gid = inode->i_gid; | 1193 | context->names[idx].gid = inode->i_gid; |
1028 | context->names[idx].rdev = inode->i_rdev; | 1194 | context->names[idx].rdev = inode->i_rdev; |
1029 | audit_inode_context(idx, inode); | 1195 | audit_inode_context(idx, inode); |
1030 | if ((flags & LOOKUP_PARENT) && (strcmp(name, "/") != 0) && | ||
1031 | (strcmp(name, ".") != 0)) { | ||
1032 | context->names[idx].ino = (unsigned long)-1; | ||
1033 | context->names[idx].pino = inode->i_ino; | ||
1034 | } else { | ||
1035 | context->names[idx].ino = inode->i_ino; | ||
1036 | context->names[idx].pino = (unsigned long)-1; | ||
1037 | } | ||
1038 | } | 1196 | } |
1039 | 1197 | ||
1040 | /** | 1198 | /** |
@@ -1056,51 +1214,40 @@ void __audit_inode_child(const char *dname, const struct inode *inode, | |||
1056 | { | 1214 | { |
1057 | int idx; | 1215 | int idx; |
1058 | struct audit_context *context = current->audit_context; | 1216 | struct audit_context *context = current->audit_context; |
1217 | const char *found_name = NULL; | ||
1218 | int dirlen = 0; | ||
1059 | 1219 | ||
1060 | if (!context->in_syscall) | 1220 | if (!context->in_syscall) |
1061 | return; | 1221 | return; |
1062 | 1222 | ||
1063 | /* determine matching parent */ | 1223 | /* determine matching parent */ |
1064 | if (dname) | 1224 | if (!dname) |
1065 | for (idx = 0; idx < context->name_count; idx++) | 1225 | goto update_context; |
1066 | if (context->names[idx].pino == pino) { | 1226 | for (idx = 0; idx < context->name_count; idx++) |
1067 | const char *n; | 1227 | if (context->names[idx].ino == pino) { |
1068 | const char *name = context->names[idx].name; | 1228 | const char *name = context->names[idx].name; |
1069 | int dlen = strlen(dname); | 1229 | |
1070 | int nlen = name ? strlen(name) : 0; | 1230 | if (!name) |
1071 | 1231 | continue; | |
1072 | if (nlen < dlen) | 1232 | |
1073 | continue; | 1233 | if (audit_compare_dname_path(dname, name, &dirlen) == 0) { |
1074 | 1234 | context->names[idx].name_len = dirlen; | |
1075 | /* disregard trailing slashes */ | 1235 | found_name = name; |
1076 | n = name + nlen - 1; | 1236 | break; |
1077 | while ((*n == '/') && (n > name)) | ||
1078 | n--; | ||
1079 | |||
1080 | /* find last path component */ | ||
1081 | n = n - dlen + 1; | ||
1082 | if (n < name) | ||
1083 | continue; | ||
1084 | else if (n > name) { | ||
1085 | if (*--n != '/') | ||
1086 | continue; | ||
1087 | else | ||
1088 | n++; | ||
1089 | } | ||
1090 | |||
1091 | if (strncmp(n, dname, dlen) == 0) | ||
1092 | goto update_context; | ||
1093 | } | 1237 | } |
1238 | } | ||
1094 | 1239 | ||
1095 | /* catch-all in case match not found */ | 1240 | update_context: |
1096 | idx = context->name_count++; | 1241 | idx = context->name_count++; |
1097 | context->names[idx].name = NULL; | ||
1098 | context->names[idx].pino = pino; | ||
1099 | #if AUDIT_DEBUG | 1242 | #if AUDIT_DEBUG |
1100 | context->ino_count++; | 1243 | context->ino_count++; |
1101 | #endif | 1244 | #endif |
1245 | /* Re-use the name belonging to the slot for a matching parent directory. | ||
1246 | * All names for this context are relinquished in audit_free_names() */ | ||
1247 | context->names[idx].name = found_name; | ||
1248 | context->names[idx].name_len = AUDIT_NAME_FULL; | ||
1249 | context->names[idx].name_put = 0; /* don't call __putname() */ | ||
1102 | 1250 | ||
1103 | update_context: | ||
1104 | if (inode) { | 1251 | if (inode) { |
1105 | context->names[idx].ino = inode->i_ino; | 1252 | context->names[idx].ino = inode->i_ino; |
1106 | context->names[idx].dev = inode->i_sb->s_dev; | 1253 | context->names[idx].dev = inode->i_sb->s_dev; |
@@ -1109,7 +1256,8 @@ update_context: | |||
1109 | context->names[idx].gid = inode->i_gid; | 1256 | context->names[idx].gid = inode->i_gid; |
1110 | context->names[idx].rdev = inode->i_rdev; | 1257 | context->names[idx].rdev = inode->i_rdev; |
1111 | audit_inode_context(idx, inode); | 1258 | audit_inode_context(idx, inode); |
1112 | } | 1259 | } else |
1260 | context->names[idx].ino = (unsigned long)-1; | ||
1113 | } | 1261 | } |
1114 | 1262 | ||
1115 | /** | 1263 | /** |
@@ -1142,18 +1290,23 @@ void auditsc_get_stamp(struct audit_context *ctx, | |||
1142 | */ | 1290 | */ |
1143 | int audit_set_loginuid(struct task_struct *task, uid_t loginuid) | 1291 | int audit_set_loginuid(struct task_struct *task, uid_t loginuid) |
1144 | { | 1292 | { |
1145 | if (task->audit_context) { | 1293 | struct audit_context *context = task->audit_context; |
1146 | struct audit_buffer *ab; | 1294 | |
1147 | 1295 | if (context) { | |
1148 | ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_LOGIN); | 1296 | /* Only log if audit is enabled */ |
1149 | if (ab) { | 1297 | if (context->in_syscall) { |
1150 | audit_log_format(ab, "login pid=%d uid=%u " | 1298 | struct audit_buffer *ab; |
1151 | "old auid=%u new auid=%u", | 1299 | |
1152 | task->pid, task->uid, | 1300 | ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_LOGIN); |
1153 | task->audit_context->loginuid, loginuid); | 1301 | if (ab) { |
1154 | audit_log_end(ab); | 1302 | audit_log_format(ab, "login pid=%d uid=%u " |
1303 | "old auid=%u new auid=%u", | ||
1304 | task->pid, task->uid, | ||
1305 | context->loginuid, loginuid); | ||
1306 | audit_log_end(ab); | ||
1307 | } | ||
1155 | } | 1308 | } |
1156 | task->audit_context->loginuid = loginuid; | 1309 | context->loginuid = loginuid; |
1157 | } | 1310 | } |
1158 | return 0; | 1311 | return 0; |
1159 | } | 1312 | } |
@@ -1170,16 +1323,193 @@ uid_t audit_get_loginuid(struct audit_context *ctx) | |||
1170 | } | 1323 | } |
1171 | 1324 | ||
1172 | /** | 1325 | /** |
1173 | * audit_ipc_obj - record audit data for ipc object | 1326 | * __audit_mq_open - record audit data for a POSIX MQ open |
1174 | * @ipcp: ipc permissions | 1327 | * @oflag: open flag |
1328 | * @mode: mode bits | ||
1329 | * @u_attr: queue attributes | ||
1175 | * | 1330 | * |
1176 | * Returns 0 for success or NULL context or < 0 on error. | 1331 | * Returns 0 for success or NULL context or < 0 on error. |
1177 | */ | 1332 | */ |
1178 | int audit_ipc_obj(struct kern_ipc_perm *ipcp) | 1333 | int __audit_mq_open(int oflag, mode_t mode, struct mq_attr __user *u_attr) |
1179 | { | 1334 | { |
1180 | struct audit_aux_data_ipcctl *ax; | 1335 | struct audit_aux_data_mq_open *ax; |
1336 | struct audit_context *context = current->audit_context; | ||
1337 | |||
1338 | if (!audit_enabled) | ||
1339 | return 0; | ||
1340 | |||
1341 | if (likely(!context)) | ||
1342 | return 0; | ||
1343 | |||
1344 | ax = kmalloc(sizeof(*ax), GFP_ATOMIC); | ||
1345 | if (!ax) | ||
1346 | return -ENOMEM; | ||
1347 | |||
1348 | if (u_attr != NULL) { | ||
1349 | if (copy_from_user(&ax->attr, u_attr, sizeof(ax->attr))) { | ||
1350 | kfree(ax); | ||
1351 | return -EFAULT; | ||
1352 | } | ||
1353 | } else | ||
1354 | memset(&ax->attr, 0, sizeof(ax->attr)); | ||
1355 | |||
1356 | ax->oflag = oflag; | ||
1357 | ax->mode = mode; | ||
1358 | |||
1359 | ax->d.type = AUDIT_MQ_OPEN; | ||
1360 | ax->d.next = context->aux; | ||
1361 | context->aux = (void *)ax; | ||
1362 | return 0; | ||
1363 | } | ||
1364 | |||
1365 | /** | ||
1366 | * __audit_mq_timedsend - record audit data for a POSIX MQ timed send | ||
1367 | * @mqdes: MQ descriptor | ||
1368 | * @msg_len: Message length | ||
1369 | * @msg_prio: Message priority | ||
1370 | * @abs_timeout: Message timeout in absolute time | ||
1371 | * | ||
1372 | * Returns 0 for success or NULL context or < 0 on error. | ||
1373 | */ | ||
1374 | int __audit_mq_timedsend(mqd_t mqdes, size_t msg_len, unsigned int msg_prio, | ||
1375 | const struct timespec __user *u_abs_timeout) | ||
1376 | { | ||
1377 | struct audit_aux_data_mq_sendrecv *ax; | ||
1378 | struct audit_context *context = current->audit_context; | ||
1379 | |||
1380 | if (!audit_enabled) | ||
1381 | return 0; | ||
1382 | |||
1383 | if (likely(!context)) | ||
1384 | return 0; | ||
1385 | |||
1386 | ax = kmalloc(sizeof(*ax), GFP_ATOMIC); | ||
1387 | if (!ax) | ||
1388 | return -ENOMEM; | ||
1389 | |||
1390 | if (u_abs_timeout != NULL) { | ||
1391 | if (copy_from_user(&ax->abs_timeout, u_abs_timeout, sizeof(ax->abs_timeout))) { | ||
1392 | kfree(ax); | ||
1393 | return -EFAULT; | ||
1394 | } | ||
1395 | } else | ||
1396 | memset(&ax->abs_timeout, 0, sizeof(ax->abs_timeout)); | ||
1397 | |||
1398 | ax->mqdes = mqdes; | ||
1399 | ax->msg_len = msg_len; | ||
1400 | ax->msg_prio = msg_prio; | ||
1401 | |||
1402 | ax->d.type = AUDIT_MQ_SENDRECV; | ||
1403 | ax->d.next = context->aux; | ||
1404 | context->aux = (void *)ax; | ||
1405 | return 0; | ||
1406 | } | ||
1407 | |||
1408 | /** | ||
1409 | * __audit_mq_timedreceive - record audit data for a POSIX MQ timed receive | ||
1410 | * @mqdes: MQ descriptor | ||
1411 | * @msg_len: Message length | ||
1412 | * @msg_prio: Message priority | ||
1413 | * @abs_timeout: Message timeout in absolute time | ||
1414 | * | ||
1415 | * Returns 0 for success or NULL context or < 0 on error. | ||
1416 | */ | ||
1417 | int __audit_mq_timedreceive(mqd_t mqdes, size_t msg_len, | ||
1418 | unsigned int __user *u_msg_prio, | ||
1419 | const struct timespec __user *u_abs_timeout) | ||
1420 | { | ||
1421 | struct audit_aux_data_mq_sendrecv *ax; | ||
1422 | struct audit_context *context = current->audit_context; | ||
1423 | |||
1424 | if (!audit_enabled) | ||
1425 | return 0; | ||
1426 | |||
1427 | if (likely(!context)) | ||
1428 | return 0; | ||
1429 | |||
1430 | ax = kmalloc(sizeof(*ax), GFP_ATOMIC); | ||
1431 | if (!ax) | ||
1432 | return -ENOMEM; | ||
1433 | |||
1434 | if (u_msg_prio != NULL) { | ||
1435 | if (get_user(ax->msg_prio, u_msg_prio)) { | ||
1436 | kfree(ax); | ||
1437 | return -EFAULT; | ||
1438 | } | ||
1439 | } else | ||
1440 | ax->msg_prio = 0; | ||
1441 | |||
1442 | if (u_abs_timeout != NULL) { | ||
1443 | if (copy_from_user(&ax->abs_timeout, u_abs_timeout, sizeof(ax->abs_timeout))) { | ||
1444 | kfree(ax); | ||
1445 | return -EFAULT; | ||
1446 | } | ||
1447 | } else | ||
1448 | memset(&ax->abs_timeout, 0, sizeof(ax->abs_timeout)); | ||
1449 | |||
1450 | ax->mqdes = mqdes; | ||
1451 | ax->msg_len = msg_len; | ||
1452 | |||
1453 | ax->d.type = AUDIT_MQ_SENDRECV; | ||
1454 | ax->d.next = context->aux; | ||
1455 | context->aux = (void *)ax; | ||
1456 | return 0; | ||
1457 | } | ||
1458 | |||
1459 | /** | ||
1460 | * __audit_mq_notify - record audit data for a POSIX MQ notify | ||
1461 | * @mqdes: MQ descriptor | ||
1462 | * @u_notification: Notification event | ||
1463 | * | ||
1464 | * Returns 0 for success or NULL context or < 0 on error. | ||
1465 | */ | ||
1466 | |||
1467 | int __audit_mq_notify(mqd_t mqdes, const struct sigevent __user *u_notification) | ||
1468 | { | ||
1469 | struct audit_aux_data_mq_notify *ax; | ||
1470 | struct audit_context *context = current->audit_context; | ||
1471 | |||
1472 | if (!audit_enabled) | ||
1473 | return 0; | ||
1474 | |||
1475 | if (likely(!context)) | ||
1476 | return 0; | ||
1477 | |||
1478 | ax = kmalloc(sizeof(*ax), GFP_ATOMIC); | ||
1479 | if (!ax) | ||
1480 | return -ENOMEM; | ||
1481 | |||
1482 | if (u_notification != NULL) { | ||
1483 | if (copy_from_user(&ax->notification, u_notification, sizeof(ax->notification))) { | ||
1484 | kfree(ax); | ||
1485 | return -EFAULT; | ||
1486 | } | ||
1487 | } else | ||
1488 | memset(&ax->notification, 0, sizeof(ax->notification)); | ||
1489 | |||
1490 | ax->mqdes = mqdes; | ||
1491 | |||
1492 | ax->d.type = AUDIT_MQ_NOTIFY; | ||
1493 | ax->d.next = context->aux; | ||
1494 | context->aux = (void *)ax; | ||
1495 | return 0; | ||
1496 | } | ||
1497 | |||
1498 | /** | ||
1499 | * __audit_mq_getsetattr - record audit data for a POSIX MQ get/set attribute | ||
1500 | * @mqdes: MQ descriptor | ||
1501 | * @mqstat: MQ flags | ||
1502 | * | ||
1503 | * Returns 0 for success or NULL context or < 0 on error. | ||
1504 | */ | ||
1505 | int __audit_mq_getsetattr(mqd_t mqdes, struct mq_attr *mqstat) | ||
1506 | { | ||
1507 | struct audit_aux_data_mq_getsetattr *ax; | ||
1181 | struct audit_context *context = current->audit_context; | 1508 | struct audit_context *context = current->audit_context; |
1182 | 1509 | ||
1510 | if (!audit_enabled) | ||
1511 | return 0; | ||
1512 | |||
1183 | if (likely(!context)) | 1513 | if (likely(!context)) |
1184 | return 0; | 1514 | return 0; |
1185 | 1515 | ||
@@ -1187,6 +1517,30 @@ int audit_ipc_obj(struct kern_ipc_perm *ipcp) | |||
1187 | if (!ax) | 1517 | if (!ax) |
1188 | return -ENOMEM; | 1518 | return -ENOMEM; |
1189 | 1519 | ||
1520 | ax->mqdes = mqdes; | ||
1521 | ax->mqstat = *mqstat; | ||
1522 | |||
1523 | ax->d.type = AUDIT_MQ_GETSETATTR; | ||
1524 | ax->d.next = context->aux; | ||
1525 | context->aux = (void *)ax; | ||
1526 | return 0; | ||
1527 | } | ||
1528 | |||
1529 | /** | ||
1530 | * audit_ipc_obj - record audit data for ipc object | ||
1531 | * @ipcp: ipc permissions | ||
1532 | * | ||
1533 | * Returns 0 for success or NULL context or < 0 on error. | ||
1534 | */ | ||
1535 | int __audit_ipc_obj(struct kern_ipc_perm *ipcp) | ||
1536 | { | ||
1537 | struct audit_aux_data_ipcctl *ax; | ||
1538 | struct audit_context *context = current->audit_context; | ||
1539 | |||
1540 | ax = kmalloc(sizeof(*ax), GFP_ATOMIC); | ||
1541 | if (!ax) | ||
1542 | return -ENOMEM; | ||
1543 | |||
1190 | ax->uid = ipcp->uid; | 1544 | ax->uid = ipcp->uid; |
1191 | ax->gid = ipcp->gid; | 1545 | ax->gid = ipcp->gid; |
1192 | ax->mode = ipcp->mode; | 1546 | ax->mode = ipcp->mode; |
@@ -1204,17 +1558,15 @@ int audit_ipc_obj(struct kern_ipc_perm *ipcp) | |||
1204 | * @uid: msgq user id | 1558 | * @uid: msgq user id |
1205 | * @gid: msgq group id | 1559 | * @gid: msgq group id |
1206 | * @mode: msgq mode (permissions) | 1560 | * @mode: msgq mode (permissions) |
1561 | * @ipcp: in-kernel IPC permissions | ||
1207 | * | 1562 | * |
1208 | * Returns 0 for success or NULL context or < 0 on error. | 1563 | * Returns 0 for success or NULL context or < 0 on error. |
1209 | */ | 1564 | */ |
1210 | int audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode, struct kern_ipc_perm *ipcp) | 1565 | int __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode) |
1211 | { | 1566 | { |
1212 | struct audit_aux_data_ipcctl *ax; | 1567 | struct audit_aux_data_ipcctl *ax; |
1213 | struct audit_context *context = current->audit_context; | 1568 | struct audit_context *context = current->audit_context; |
1214 | 1569 | ||
1215 | if (likely(!context)) | ||
1216 | return 0; | ||
1217 | |||
1218 | ax = kmalloc(sizeof(*ax), GFP_ATOMIC); | 1570 | ax = kmalloc(sizeof(*ax), GFP_ATOMIC); |
1219 | if (!ax) | 1571 | if (!ax) |
1220 | return -ENOMEM; | 1572 | return -ENOMEM; |
@@ -1223,7 +1575,6 @@ int audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode, | |||
1223 | ax->uid = uid; | 1575 | ax->uid = uid; |
1224 | ax->gid = gid; | 1576 | ax->gid = gid; |
1225 | ax->mode = mode; | 1577 | ax->mode = mode; |
1226 | selinux_get_ipc_sid(ipcp, &ax->osid); | ||
1227 | 1578 | ||
1228 | ax->d.type = AUDIT_IPC_SET_PERM; | 1579 | ax->d.type = AUDIT_IPC_SET_PERM; |
1229 | ax->d.next = context->aux; | 1580 | ax->d.next = context->aux; |
@@ -1231,6 +1582,39 @@ int audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode, | |||
1231 | return 0; | 1582 | return 0; |
1232 | } | 1583 | } |
1233 | 1584 | ||
1585 | int audit_bprm(struct linux_binprm *bprm) | ||
1586 | { | ||
1587 | struct audit_aux_data_execve *ax; | ||
1588 | struct audit_context *context = current->audit_context; | ||
1589 | unsigned long p, next; | ||
1590 | void *to; | ||
1591 | |||
1592 | if (likely(!audit_enabled || !context)) | ||
1593 | return 0; | ||
1594 | |||
1595 | ax = kmalloc(sizeof(*ax) + PAGE_SIZE * MAX_ARG_PAGES - bprm->p, | ||
1596 | GFP_KERNEL); | ||
1597 | if (!ax) | ||
1598 | return -ENOMEM; | ||
1599 | |||
1600 | ax->argc = bprm->argc; | ||
1601 | ax->envc = bprm->envc; | ||
1602 | for (p = bprm->p, to = ax->mem; p < MAX_ARG_PAGES*PAGE_SIZE; p = next) { | ||
1603 | struct page *page = bprm->page[p / PAGE_SIZE]; | ||
1604 | void *kaddr = kmap(page); | ||
1605 | next = (p + PAGE_SIZE) & ~(PAGE_SIZE - 1); | ||
1606 | memcpy(to, kaddr + (p & (PAGE_SIZE - 1)), next - p); | ||
1607 | to += next - p; | ||
1608 | kunmap(page); | ||
1609 | } | ||
1610 | |||
1611 | ax->d.type = AUDIT_EXECVE; | ||
1612 | ax->d.next = context->aux; | ||
1613 | context->aux = (void *)ax; | ||
1614 | return 0; | ||
1615 | } | ||
1616 | |||
1617 | |||
1234 | /** | 1618 | /** |
1235 | * audit_socketcall - record audit data for sys_socketcall | 1619 | * audit_socketcall - record audit data for sys_socketcall |
1236 | * @nargs: number of args | 1620 | * @nargs: number of args |
@@ -1325,19 +1709,20 @@ int audit_avc_path(struct dentry *dentry, struct vfsmount *mnt) | |||
1325 | * If the audit subsystem is being terminated, record the task (pid) | 1709 | * If the audit subsystem is being terminated, record the task (pid) |
1326 | * and uid that is doing that. | 1710 | * and uid that is doing that. |
1327 | */ | 1711 | */ |
1328 | void audit_signal_info(int sig, struct task_struct *t) | 1712 | void __audit_signal_info(int sig, struct task_struct *t) |
1329 | { | 1713 | { |
1330 | extern pid_t audit_sig_pid; | 1714 | extern pid_t audit_sig_pid; |
1331 | extern uid_t audit_sig_uid; | 1715 | extern uid_t audit_sig_uid; |
1332 | 1716 | extern u32 audit_sig_sid; | |
1333 | if (unlikely(audit_pid && t->tgid == audit_pid)) { | 1717 | |
1334 | if (sig == SIGTERM || sig == SIGHUP) { | 1718 | if (sig == SIGTERM || sig == SIGHUP || sig == SIGUSR1) { |
1335 | struct audit_context *ctx = current->audit_context; | 1719 | struct task_struct *tsk = current; |
1336 | audit_sig_pid = current->pid; | 1720 | struct audit_context *ctx = tsk->audit_context; |
1337 | if (ctx) | 1721 | audit_sig_pid = tsk->pid; |
1338 | audit_sig_uid = ctx->loginuid; | 1722 | if (ctx) |
1339 | else | 1723 | audit_sig_uid = ctx->loginuid; |
1340 | audit_sig_uid = current->uid; | 1724 | else |
1341 | } | 1725 | audit_sig_uid = tsk->uid; |
1726 | selinux_get_task_sid(tsk, &audit_sig_sid); | ||
1342 | } | 1727 | } |
1343 | } | 1728 | } |
diff --git a/kernel/compat.c b/kernel/compat.c index c1601a84f8d8..126dee9530aa 100644 --- a/kernel/compat.c +++ b/kernel/compat.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <linux/unistd.h> | 21 | #include <linux/unistd.h> |
22 | #include <linux/security.h> | 22 | #include <linux/security.h> |
23 | #include <linux/timex.h> | 23 | #include <linux/timex.h> |
24 | #include <linux/migrate.h> | ||
24 | 25 | ||
25 | #include <asm/uaccess.h> | 26 | #include <asm/uaccess.h> |
26 | 27 | ||
@@ -729,17 +730,10 @@ void | |||
729 | sigset_from_compat (sigset_t *set, compat_sigset_t *compat) | 730 | sigset_from_compat (sigset_t *set, compat_sigset_t *compat) |
730 | { | 731 | { |
731 | switch (_NSIG_WORDS) { | 732 | switch (_NSIG_WORDS) { |
732 | #if defined (__COMPAT_ENDIAN_SWAP__) | ||
733 | case 4: set->sig[3] = compat->sig[7] | (((long)compat->sig[6]) << 32 ); | ||
734 | case 3: set->sig[2] = compat->sig[5] | (((long)compat->sig[4]) << 32 ); | ||
735 | case 2: set->sig[1] = compat->sig[3] | (((long)compat->sig[2]) << 32 ); | ||
736 | case 1: set->sig[0] = compat->sig[1] | (((long)compat->sig[0]) << 32 ); | ||
737 | #else | ||
738 | case 4: set->sig[3] = compat->sig[6] | (((long)compat->sig[7]) << 32 ); | 733 | case 4: set->sig[3] = compat->sig[6] | (((long)compat->sig[7]) << 32 ); |
739 | case 3: set->sig[2] = compat->sig[4] | (((long)compat->sig[5]) << 32 ); | 734 | case 3: set->sig[2] = compat->sig[4] | (((long)compat->sig[5]) << 32 ); |
740 | case 2: set->sig[1] = compat->sig[2] | (((long)compat->sig[3]) << 32 ); | 735 | case 2: set->sig[1] = compat->sig[2] | (((long)compat->sig[3]) << 32 ); |
741 | case 1: set->sig[0] = compat->sig[0] | (((long)compat->sig[1]) << 32 ); | 736 | case 1: set->sig[0] = compat->sig[0] | (((long)compat->sig[1]) << 32 ); |
742 | #endif | ||
743 | } | 737 | } |
744 | } | 738 | } |
745 | 739 | ||
@@ -934,3 +928,25 @@ asmlinkage long compat_sys_adjtimex(struct compat_timex __user *utp) | |||
934 | 928 | ||
935 | return ret; | 929 | return ret; |
936 | } | 930 | } |
931 | |||
932 | #ifdef CONFIG_NUMA | ||
933 | asmlinkage long compat_sys_move_pages(pid_t pid, unsigned long nr_pages, | ||
934 | compat_uptr_t __user *pages32, | ||
935 | const int __user *nodes, | ||
936 | int __user *status, | ||
937 | int flags) | ||
938 | { | ||
939 | const void __user * __user *pages; | ||
940 | int i; | ||
941 | |||
942 | pages = compat_alloc_user_space(nr_pages * sizeof(void *)); | ||
943 | for (i = 0; i < nr_pages; i++) { | ||
944 | compat_uptr_t p; | ||
945 | |||
946 | if (get_user(p, pages32 + i) || | ||
947 | put_user(compat_ptr(p), pages + i)) | ||
948 | return -EFAULT; | ||
949 | } | ||
950 | return sys_move_pages(pid, nr_pages, pages, nodes, status, flags); | ||
951 | } | ||
952 | #endif | ||
diff --git a/kernel/cpu.c b/kernel/cpu.c index fe2b8d0bfe4c..03dcd981846a 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c | |||
@@ -13,10 +13,10 @@ | |||
13 | #include <linux/module.h> | 13 | #include <linux/module.h> |
14 | #include <linux/kthread.h> | 14 | #include <linux/kthread.h> |
15 | #include <linux/stop_machine.h> | 15 | #include <linux/stop_machine.h> |
16 | #include <asm/semaphore.h> | 16 | #include <linux/mutex.h> |
17 | 17 | ||
18 | /* This protects CPUs going up and down... */ | 18 | /* This protects CPUs going up and down... */ |
19 | static DECLARE_MUTEX(cpucontrol); | 19 | static DEFINE_MUTEX(cpucontrol); |
20 | 20 | ||
21 | static BLOCKING_NOTIFIER_HEAD(cpu_chain); | 21 | static BLOCKING_NOTIFIER_HEAD(cpu_chain); |
22 | 22 | ||
@@ -30,9 +30,9 @@ static int __lock_cpu_hotplug(int interruptible) | |||
30 | 30 | ||
31 | if (lock_cpu_hotplug_owner != current) { | 31 | if (lock_cpu_hotplug_owner != current) { |
32 | if (interruptible) | 32 | if (interruptible) |
33 | ret = down_interruptible(&cpucontrol); | 33 | ret = mutex_lock_interruptible(&cpucontrol); |
34 | else | 34 | else |
35 | down(&cpucontrol); | 35 | mutex_lock(&cpucontrol); |
36 | } | 36 | } |
37 | 37 | ||
38 | /* | 38 | /* |
@@ -56,7 +56,7 @@ void unlock_cpu_hotplug(void) | |||
56 | { | 56 | { |
57 | if (--lock_cpu_hotplug_depth == 0) { | 57 | if (--lock_cpu_hotplug_depth == 0) { |
58 | lock_cpu_hotplug_owner = NULL; | 58 | lock_cpu_hotplug_owner = NULL; |
59 | up(&cpucontrol); | 59 | mutex_unlock(&cpucontrol); |
60 | } | 60 | } |
61 | } | 61 | } |
62 | EXPORT_SYMBOL_GPL(unlock_cpu_hotplug); | 62 | EXPORT_SYMBOL_GPL(unlock_cpu_hotplug); |
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index ab81fdd4572b..1535af3a912d 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -41,6 +41,7 @@ | |||
41 | #include <linux/rcupdate.h> | 41 | #include <linux/rcupdate.h> |
42 | #include <linux/sched.h> | 42 | #include <linux/sched.h> |
43 | #include <linux/seq_file.h> | 43 | #include <linux/seq_file.h> |
44 | #include <linux/security.h> | ||
44 | #include <linux/slab.h> | 45 | #include <linux/slab.h> |
45 | #include <linux/smp_lock.h> | 46 | #include <linux/smp_lock.h> |
46 | #include <linux/spinlock.h> | 47 | #include <linux/spinlock.h> |
@@ -392,11 +393,11 @@ static int cpuset_fill_super(struct super_block *sb, void *unused_data, | |||
392 | return 0; | 393 | return 0; |
393 | } | 394 | } |
394 | 395 | ||
395 | static struct super_block *cpuset_get_sb(struct file_system_type *fs_type, | 396 | static int cpuset_get_sb(struct file_system_type *fs_type, |
396 | int flags, const char *unused_dev_name, | 397 | int flags, const char *unused_dev_name, |
397 | void *data) | 398 | void *data, struct vfsmount *mnt) |
398 | { | 399 | { |
399 | return get_sb_single(fs_type, flags, data, cpuset_fill_super); | 400 | return get_sb_single(fs_type, flags, data, cpuset_fill_super, mnt); |
400 | } | 401 | } |
401 | 402 | ||
402 | static struct file_system_type cpuset_fs_type = { | 403 | static struct file_system_type cpuset_fs_type = { |
@@ -1177,6 +1178,7 @@ static int attach_task(struct cpuset *cs, char *pidbuf, char **ppathbuf) | |||
1177 | cpumask_t cpus; | 1178 | cpumask_t cpus; |
1178 | nodemask_t from, to; | 1179 | nodemask_t from, to; |
1179 | struct mm_struct *mm; | 1180 | struct mm_struct *mm; |
1181 | int retval; | ||
1180 | 1182 | ||
1181 | if (sscanf(pidbuf, "%d", &pid) != 1) | 1183 | if (sscanf(pidbuf, "%d", &pid) != 1) |
1182 | return -EIO; | 1184 | return -EIO; |
@@ -1205,6 +1207,12 @@ static int attach_task(struct cpuset *cs, char *pidbuf, char **ppathbuf) | |||
1205 | get_task_struct(tsk); | 1207 | get_task_struct(tsk); |
1206 | } | 1208 | } |
1207 | 1209 | ||
1210 | retval = security_task_setscheduler(tsk, 0, NULL); | ||
1211 | if (retval) { | ||
1212 | put_task_struct(tsk); | ||
1213 | return retval; | ||
1214 | } | ||
1215 | |||
1208 | mutex_lock(&callback_mutex); | 1216 | mutex_lock(&callback_mutex); |
1209 | 1217 | ||
1210 | task_lock(tsk); | 1218 | task_lock(tsk); |
@@ -2434,31 +2442,43 @@ void __cpuset_memory_pressure_bump(void) | |||
2434 | */ | 2442 | */ |
2435 | static int proc_cpuset_show(struct seq_file *m, void *v) | 2443 | static int proc_cpuset_show(struct seq_file *m, void *v) |
2436 | { | 2444 | { |
2445 | struct pid *pid; | ||
2437 | struct task_struct *tsk; | 2446 | struct task_struct *tsk; |
2438 | char *buf; | 2447 | char *buf; |
2439 | int retval = 0; | 2448 | int retval; |
2440 | 2449 | ||
2450 | retval = -ENOMEM; | ||
2441 | buf = kmalloc(PAGE_SIZE, GFP_KERNEL); | 2451 | buf = kmalloc(PAGE_SIZE, GFP_KERNEL); |
2442 | if (!buf) | 2452 | if (!buf) |
2443 | return -ENOMEM; | 2453 | goto out; |
2444 | 2454 | ||
2445 | tsk = m->private; | 2455 | retval = -ESRCH; |
2456 | pid = m->private; | ||
2457 | tsk = get_pid_task(pid, PIDTYPE_PID); | ||
2458 | if (!tsk) | ||
2459 | goto out_free; | ||
2460 | |||
2461 | retval = -EINVAL; | ||
2446 | mutex_lock(&manage_mutex); | 2462 | mutex_lock(&manage_mutex); |
2463 | |||
2447 | retval = cpuset_path(tsk->cpuset, buf, PAGE_SIZE); | 2464 | retval = cpuset_path(tsk->cpuset, buf, PAGE_SIZE); |
2448 | if (retval < 0) | 2465 | if (retval < 0) |
2449 | goto out; | 2466 | goto out_unlock; |
2450 | seq_puts(m, buf); | 2467 | seq_puts(m, buf); |
2451 | seq_putc(m, '\n'); | 2468 | seq_putc(m, '\n'); |
2452 | out: | 2469 | out_unlock: |
2453 | mutex_unlock(&manage_mutex); | 2470 | mutex_unlock(&manage_mutex); |
2471 | put_task_struct(tsk); | ||
2472 | out_free: | ||
2454 | kfree(buf); | 2473 | kfree(buf); |
2474 | out: | ||
2455 | return retval; | 2475 | return retval; |
2456 | } | 2476 | } |
2457 | 2477 | ||
2458 | static int cpuset_open(struct inode *inode, struct file *file) | 2478 | static int cpuset_open(struct inode *inode, struct file *file) |
2459 | { | 2479 | { |
2460 | struct task_struct *tsk = PROC_I(inode)->task; | 2480 | struct pid *pid = PROC_I(inode)->pid; |
2461 | return single_open(file, proc_cpuset_show, tsk); | 2481 | return single_open(file, proc_cpuset_show, pid); |
2462 | } | 2482 | } |
2463 | 2483 | ||
2464 | struct file_operations proc_cpuset_operations = { | 2484 | struct file_operations proc_cpuset_operations = { |
diff --git a/kernel/exit.c b/kernel/exit.c index e95b93282210..304ef637be6c 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -36,6 +36,7 @@ | |||
36 | #include <linux/compat.h> | 36 | #include <linux/compat.h> |
37 | #include <linux/pipe_fs_i.h> | 37 | #include <linux/pipe_fs_i.h> |
38 | #include <linux/audit.h> /* for audit_free() */ | 38 | #include <linux/audit.h> /* for audit_free() */ |
39 | #include <linux/resource.h> | ||
39 | 40 | ||
40 | #include <asm/uaccess.h> | 41 | #include <asm/uaccess.h> |
41 | #include <asm/unistd.h> | 42 | #include <asm/unistd.h> |
@@ -45,8 +46,6 @@ | |||
45 | extern void sem_exit (void); | 46 | extern void sem_exit (void); |
46 | extern struct task_struct *child_reaper; | 47 | extern struct task_struct *child_reaper; |
47 | 48 | ||
48 | int getrusage(struct task_struct *, int, struct rusage __user *); | ||
49 | |||
50 | static void exit_mm(struct task_struct * tsk); | 49 | static void exit_mm(struct task_struct * tsk); |
51 | 50 | ||
52 | static void __unhash_process(struct task_struct *p) | 51 | static void __unhash_process(struct task_struct *p) |
@@ -138,12 +137,8 @@ void release_task(struct task_struct * p) | |||
138 | { | 137 | { |
139 | int zap_leader; | 138 | int zap_leader; |
140 | task_t *leader; | 139 | task_t *leader; |
141 | struct dentry *proc_dentry; | ||
142 | |||
143 | repeat: | 140 | repeat: |
144 | atomic_dec(&p->user->processes); | 141 | atomic_dec(&p->user->processes); |
145 | spin_lock(&p->proc_lock); | ||
146 | proc_dentry = proc_pid_unhash(p); | ||
147 | write_lock_irq(&tasklist_lock); | 142 | write_lock_irq(&tasklist_lock); |
148 | ptrace_unlink(p); | 143 | ptrace_unlink(p); |
149 | BUG_ON(!list_empty(&p->ptrace_list) || !list_empty(&p->ptrace_children)); | 144 | BUG_ON(!list_empty(&p->ptrace_list) || !list_empty(&p->ptrace_children)); |
@@ -172,8 +167,7 @@ repeat: | |||
172 | 167 | ||
173 | sched_exit(p); | 168 | sched_exit(p); |
174 | write_unlock_irq(&tasklist_lock); | 169 | write_unlock_irq(&tasklist_lock); |
175 | spin_unlock(&p->proc_lock); | 170 | proc_flush_task(p); |
176 | proc_pid_flush(proc_dentry); | ||
177 | release_thread(p); | 171 | release_thread(p); |
178 | call_rcu(&p->rcu, delayed_put_task_struct); | 172 | call_rcu(&p->rcu, delayed_put_task_struct); |
179 | 173 | ||
@@ -579,7 +573,7 @@ static void exit_mm(struct task_struct * tsk) | |||
579 | down_read(&mm->mmap_sem); | 573 | down_read(&mm->mmap_sem); |
580 | } | 574 | } |
581 | atomic_inc(&mm->mm_count); | 575 | atomic_inc(&mm->mm_count); |
582 | if (mm != tsk->active_mm) BUG(); | 576 | BUG_ON(mm != tsk->active_mm); |
583 | /* more a memory barrier than a real lock */ | 577 | /* more a memory barrier than a real lock */ |
584 | task_lock(tsk); | 578 | task_lock(tsk); |
585 | tsk->mm = NULL; | 579 | tsk->mm = NULL; |
@@ -881,14 +875,6 @@ fastcall NORET_TYPE void do_exit(long code) | |||
881 | 875 | ||
882 | tsk->flags |= PF_EXITING; | 876 | tsk->flags |= PF_EXITING; |
883 | 877 | ||
884 | /* | ||
885 | * Make sure we don't try to process any timer firings | ||
886 | * while we are already exiting. | ||
887 | */ | ||
888 | tsk->it_virt_expires = cputime_zero; | ||
889 | tsk->it_prof_expires = cputime_zero; | ||
890 | tsk->it_sched_expires = 0; | ||
891 | |||
892 | if (unlikely(in_atomic())) | 878 | if (unlikely(in_atomic())) |
893 | printk(KERN_INFO "note: %s[%d] exited with preempt_count %d\n", | 879 | printk(KERN_INFO "note: %s[%d] exited with preempt_count %d\n", |
894 | current->comm, current->pid, | 880 | current->comm, current->pid, |
@@ -903,11 +889,11 @@ fastcall NORET_TYPE void do_exit(long code) | |||
903 | if (group_dead) { | 889 | if (group_dead) { |
904 | hrtimer_cancel(&tsk->signal->real_timer); | 890 | hrtimer_cancel(&tsk->signal->real_timer); |
905 | exit_itimers(tsk->signal); | 891 | exit_itimers(tsk->signal); |
906 | acct_process(code); | ||
907 | } | 892 | } |
893 | acct_collect(code, group_dead); | ||
908 | if (unlikely(tsk->robust_list)) | 894 | if (unlikely(tsk->robust_list)) |
909 | exit_robust_list(tsk); | 895 | exit_robust_list(tsk); |
910 | #ifdef CONFIG_COMPAT | 896 | #if defined(CONFIG_FUTEX) && defined(CONFIG_COMPAT) |
911 | if (unlikely(tsk->compat_robust_list)) | 897 | if (unlikely(tsk->compat_robust_list)) |
912 | compat_exit_robust_list(tsk); | 898 | compat_exit_robust_list(tsk); |
913 | #endif | 899 | #endif |
@@ -915,6 +901,8 @@ fastcall NORET_TYPE void do_exit(long code) | |||
915 | audit_free(tsk); | 901 | audit_free(tsk); |
916 | exit_mm(tsk); | 902 | exit_mm(tsk); |
917 | 903 | ||
904 | if (group_dead) | ||
905 | acct_process(); | ||
918 | exit_sem(tsk); | 906 | exit_sem(tsk); |
919 | __exit_files(tsk); | 907 | __exit_files(tsk); |
920 | __exit_fs(tsk); | 908 | __exit_fs(tsk); |
@@ -1538,8 +1526,7 @@ check_continued: | |||
1538 | if (options & __WNOTHREAD) | 1526 | if (options & __WNOTHREAD) |
1539 | break; | 1527 | break; |
1540 | tsk = next_thread(tsk); | 1528 | tsk = next_thread(tsk); |
1541 | if (tsk->signal != current->signal) | 1529 | BUG_ON(tsk->signal != current->signal); |
1542 | BUG(); | ||
1543 | } while (tsk != current); | 1530 | } while (tsk != current); |
1544 | 1531 | ||
1545 | read_unlock(&tasklist_lock); | 1532 | read_unlock(&tasklist_lock); |
diff --git a/kernel/fork.c b/kernel/fork.c index ac8100e3088a..9b4e54ef0225 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -368,6 +368,8 @@ void fastcall __mmdrop(struct mm_struct *mm) | |||
368 | */ | 368 | */ |
369 | void mmput(struct mm_struct *mm) | 369 | void mmput(struct mm_struct *mm) |
370 | { | 370 | { |
371 | might_sleep(); | ||
372 | |||
371 | if (atomic_dec_and_test(&mm->mm_users)) { | 373 | if (atomic_dec_and_test(&mm->mm_users)) { |
372 | exit_aio(mm); | 374 | exit_aio(mm); |
373 | exit_mmap(mm); | 375 | exit_mmap(mm); |
@@ -623,6 +625,7 @@ out: | |||
623 | /* | 625 | /* |
624 | * Allocate a new files structure and copy contents from the | 626 | * Allocate a new files structure and copy contents from the |
625 | * passed in files structure. | 627 | * passed in files structure. |
628 | * errorp will be valid only when the returned files_struct is NULL. | ||
626 | */ | 629 | */ |
627 | static struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) | 630 | static struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) |
628 | { | 631 | { |
@@ -631,6 +634,7 @@ static struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) | |||
631 | int open_files, size, i, expand; | 634 | int open_files, size, i, expand; |
632 | struct fdtable *old_fdt, *new_fdt; | 635 | struct fdtable *old_fdt, *new_fdt; |
633 | 636 | ||
637 | *errorp = -ENOMEM; | ||
634 | newf = alloc_files(); | 638 | newf = alloc_files(); |
635 | if (!newf) | 639 | if (!newf) |
636 | goto out; | 640 | goto out; |
@@ -744,7 +748,6 @@ static int copy_files(unsigned long clone_flags, struct task_struct * tsk) | |||
744 | * break this. | 748 | * break this. |
745 | */ | 749 | */ |
746 | tsk->files = NULL; | 750 | tsk->files = NULL; |
747 | error = -ENOMEM; | ||
748 | newf = dup_fd(oldf, &error); | 751 | newf = dup_fd(oldf, &error); |
749 | if (!newf) | 752 | if (!newf) |
750 | goto out; | 753 | goto out; |
@@ -871,6 +874,7 @@ static inline int copy_signal(unsigned long clone_flags, struct task_struct * ts | |||
871 | tsk->it_prof_expires = | 874 | tsk->it_prof_expires = |
872 | secs_to_cputime(sig->rlim[RLIMIT_CPU].rlim_cur); | 875 | secs_to_cputime(sig->rlim[RLIMIT_CPU].rlim_cur); |
873 | } | 876 | } |
877 | acct_init_pacct(&sig->pacct); | ||
874 | 878 | ||
875 | return 0; | 879 | return 0; |
876 | } | 880 | } |
@@ -989,13 +993,10 @@ static task_t *copy_process(unsigned long clone_flags, | |||
989 | if (put_user(p->pid, parent_tidptr)) | 993 | if (put_user(p->pid, parent_tidptr)) |
990 | goto bad_fork_cleanup; | 994 | goto bad_fork_cleanup; |
991 | 995 | ||
992 | p->proc_dentry = NULL; | ||
993 | |||
994 | INIT_LIST_HEAD(&p->children); | 996 | INIT_LIST_HEAD(&p->children); |
995 | INIT_LIST_HEAD(&p->sibling); | 997 | INIT_LIST_HEAD(&p->sibling); |
996 | p->vfork_done = NULL; | 998 | p->vfork_done = NULL; |
997 | spin_lock_init(&p->alloc_lock); | 999 | spin_lock_init(&p->alloc_lock); |
998 | spin_lock_init(&p->proc_lock); | ||
999 | 1000 | ||
1000 | clear_tsk_thread_flag(p, TIF_SIGPENDING); | 1001 | clear_tsk_thread_flag(p, TIF_SIGPENDING); |
1001 | init_sigpending(&p->pending); | 1002 | init_sigpending(&p->pending); |
@@ -1155,18 +1156,6 @@ static task_t *copy_process(unsigned long clone_flags, | |||
1155 | } | 1156 | } |
1156 | 1157 | ||
1157 | if (clone_flags & CLONE_THREAD) { | 1158 | if (clone_flags & CLONE_THREAD) { |
1158 | /* | ||
1159 | * Important: if an exit-all has been started then | ||
1160 | * do not create this new thread - the whole thread | ||
1161 | * group is supposed to exit anyway. | ||
1162 | */ | ||
1163 | if (current->signal->flags & SIGNAL_GROUP_EXIT) { | ||
1164 | spin_unlock(¤t->sighand->siglock); | ||
1165 | write_unlock_irq(&tasklist_lock); | ||
1166 | retval = -EAGAIN; | ||
1167 | goto bad_fork_cleanup_namespace; | ||
1168 | } | ||
1169 | |||
1170 | p->group_leader = current->group_leader; | 1159 | p->group_leader = current->group_leader; |
1171 | list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group); | 1160 | list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group); |
1172 | 1161 | ||
diff --git a/kernel/futex.c b/kernel/futex.c index 5699c512057b..e1a380c77a5a 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
@@ -1056,11 +1056,11 @@ asmlinkage long sys_futex(u32 __user *uaddr, int op, int val, | |||
1056 | (unsigned long)uaddr2, val2, val3); | 1056 | (unsigned long)uaddr2, val2, val3); |
1057 | } | 1057 | } |
1058 | 1058 | ||
1059 | static struct super_block * | 1059 | static int futexfs_get_sb(struct file_system_type *fs_type, |
1060 | futexfs_get_sb(struct file_system_type *fs_type, | 1060 | int flags, const char *dev_name, void *data, |
1061 | int flags, const char *dev_name, void *data) | 1061 | struct vfsmount *mnt) |
1062 | { | 1062 | { |
1063 | return get_sb_pseudo(fs_type, "futex", NULL, 0xBAD1DEA); | 1063 | return get_sb_pseudo(fs_type, "futex", NULL, 0xBAD1DEA, mnt); |
1064 | } | 1064 | } |
1065 | 1065 | ||
1066 | static struct file_system_type futex_fs_type = { | 1066 | static struct file_system_type futex_fs_type = { |
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 01fa2ae98a85..55601b3ce60e 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c | |||
@@ -98,7 +98,6 @@ static DEFINE_PER_CPU(struct hrtimer_base, hrtimer_bases[MAX_HRTIMER_BASES]) = | |||
98 | 98 | ||
99 | /** | 99 | /** |
100 | * ktime_get_ts - get the monotonic clock in timespec format | 100 | * ktime_get_ts - get the monotonic clock in timespec format |
101 | * | ||
102 | * @ts: pointer to timespec variable | 101 | * @ts: pointer to timespec variable |
103 | * | 102 | * |
104 | * The function calculates the monotonic clock from the realtime | 103 | * The function calculates the monotonic clock from the realtime |
@@ -238,7 +237,6 @@ lock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags) | |||
238 | # ifndef CONFIG_KTIME_SCALAR | 237 | # ifndef CONFIG_KTIME_SCALAR |
239 | /** | 238 | /** |
240 | * ktime_add_ns - Add a scalar nanoseconds value to a ktime_t variable | 239 | * ktime_add_ns - Add a scalar nanoseconds value to a ktime_t variable |
241 | * | ||
242 | * @kt: addend | 240 | * @kt: addend |
243 | * @nsec: the scalar nsec value to add | 241 | * @nsec: the scalar nsec value to add |
244 | * | 242 | * |
@@ -299,7 +297,6 @@ void unlock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags) | |||
299 | 297 | ||
300 | /** | 298 | /** |
301 | * hrtimer_forward - forward the timer expiry | 299 | * hrtimer_forward - forward the timer expiry |
302 | * | ||
303 | * @timer: hrtimer to forward | 300 | * @timer: hrtimer to forward |
304 | * @now: forward past this time | 301 | * @now: forward past this time |
305 | * @interval: the interval to forward | 302 | * @interval: the interval to forward |
@@ -393,7 +390,7 @@ static void __remove_hrtimer(struct hrtimer *timer, struct hrtimer_base *base) | |||
393 | if (base->first == &timer->node) | 390 | if (base->first == &timer->node) |
394 | base->first = rb_next(&timer->node); | 391 | base->first = rb_next(&timer->node); |
395 | rb_erase(&timer->node, &base->active); | 392 | rb_erase(&timer->node, &base->active); |
396 | timer->node.rb_parent = HRTIMER_INACTIVE; | 393 | rb_set_parent(&timer->node, &timer->node); |
397 | } | 394 | } |
398 | 395 | ||
399 | /* | 396 | /* |
@@ -411,7 +408,6 @@ remove_hrtimer(struct hrtimer *timer, struct hrtimer_base *base) | |||
411 | 408 | ||
412 | /** | 409 | /** |
413 | * hrtimer_start - (re)start an relative timer on the current CPU | 410 | * hrtimer_start - (re)start an relative timer on the current CPU |
414 | * | ||
415 | * @timer: the timer to be added | 411 | * @timer: the timer to be added |
416 | * @tim: expiry time | 412 | * @tim: expiry time |
417 | * @mode: expiry mode: absolute (HRTIMER_ABS) or relative (HRTIMER_REL) | 413 | * @mode: expiry mode: absolute (HRTIMER_ABS) or relative (HRTIMER_REL) |
@@ -460,14 +456,13 @@ EXPORT_SYMBOL_GPL(hrtimer_start); | |||
460 | 456 | ||
461 | /** | 457 | /** |
462 | * hrtimer_try_to_cancel - try to deactivate a timer | 458 | * hrtimer_try_to_cancel - try to deactivate a timer |
463 | * | ||
464 | * @timer: hrtimer to stop | 459 | * @timer: hrtimer to stop |
465 | * | 460 | * |
466 | * Returns: | 461 | * Returns: |
467 | * 0 when the timer was not active | 462 | * 0 when the timer was not active |
468 | * 1 when the timer was active | 463 | * 1 when the timer was active |
469 | * -1 when the timer is currently excuting the callback function and | 464 | * -1 when the timer is currently excuting the callback function and |
470 | * can not be stopped | 465 | * cannot be stopped |
471 | */ | 466 | */ |
472 | int hrtimer_try_to_cancel(struct hrtimer *timer) | 467 | int hrtimer_try_to_cancel(struct hrtimer *timer) |
473 | { | 468 | { |
@@ -489,7 +484,6 @@ EXPORT_SYMBOL_GPL(hrtimer_try_to_cancel); | |||
489 | 484 | ||
490 | /** | 485 | /** |
491 | * hrtimer_cancel - cancel a timer and wait for the handler to finish. | 486 | * hrtimer_cancel - cancel a timer and wait for the handler to finish. |
492 | * | ||
493 | * @timer: the timer to be cancelled | 487 | * @timer: the timer to be cancelled |
494 | * | 488 | * |
495 | * Returns: | 489 | * Returns: |
@@ -510,7 +504,6 @@ EXPORT_SYMBOL_GPL(hrtimer_cancel); | |||
510 | 504 | ||
511 | /** | 505 | /** |
512 | * hrtimer_get_remaining - get remaining time for the timer | 506 | * hrtimer_get_remaining - get remaining time for the timer |
513 | * | ||
514 | * @timer: the timer to read | 507 | * @timer: the timer to read |
515 | */ | 508 | */ |
516 | ktime_t hrtimer_get_remaining(const struct hrtimer *timer) | 509 | ktime_t hrtimer_get_remaining(const struct hrtimer *timer) |
@@ -564,7 +557,6 @@ ktime_t hrtimer_get_next_event(void) | |||
564 | 557 | ||
565 | /** | 558 | /** |
566 | * hrtimer_init - initialize a timer to the given clock | 559 | * hrtimer_init - initialize a timer to the given clock |
567 | * | ||
568 | * @timer: the timer to be initialized | 560 | * @timer: the timer to be initialized |
569 | * @clock_id: the clock to be used | 561 | * @clock_id: the clock to be used |
570 | * @mode: timer mode abs/rel | 562 | * @mode: timer mode abs/rel |
@@ -576,19 +568,18 @@ void hrtimer_init(struct hrtimer *timer, clockid_t clock_id, | |||
576 | 568 | ||
577 | memset(timer, 0, sizeof(struct hrtimer)); | 569 | memset(timer, 0, sizeof(struct hrtimer)); |
578 | 570 | ||
579 | bases = per_cpu(hrtimer_bases, raw_smp_processor_id()); | 571 | bases = __raw_get_cpu_var(hrtimer_bases); |
580 | 572 | ||
581 | if (clock_id == CLOCK_REALTIME && mode != HRTIMER_ABS) | 573 | if (clock_id == CLOCK_REALTIME && mode != HRTIMER_ABS) |
582 | clock_id = CLOCK_MONOTONIC; | 574 | clock_id = CLOCK_MONOTONIC; |
583 | 575 | ||
584 | timer->base = &bases[clock_id]; | 576 | timer->base = &bases[clock_id]; |
585 | timer->node.rb_parent = HRTIMER_INACTIVE; | 577 | rb_set_parent(&timer->node, &timer->node); |
586 | } | 578 | } |
587 | EXPORT_SYMBOL_GPL(hrtimer_init); | 579 | EXPORT_SYMBOL_GPL(hrtimer_init); |
588 | 580 | ||
589 | /** | 581 | /** |
590 | * hrtimer_get_res - get the timer resolution for a clock | 582 | * hrtimer_get_res - get the timer resolution for a clock |
591 | * | ||
592 | * @which_clock: which clock to query | 583 | * @which_clock: which clock to query |
593 | * @tp: pointer to timespec variable to store the resolution | 584 | * @tp: pointer to timespec variable to store the resolution |
594 | * | 585 | * |
@@ -599,7 +590,7 @@ int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp) | |||
599 | { | 590 | { |
600 | struct hrtimer_base *bases; | 591 | struct hrtimer_base *bases; |
601 | 592 | ||
602 | bases = per_cpu(hrtimer_bases, raw_smp_processor_id()); | 593 | bases = __raw_get_cpu_var(hrtimer_bases); |
603 | *tp = ktime_to_timespec(bases[which_clock].resolution); | 594 | *tp = ktime_to_timespec(bases[which_clock].resolution); |
604 | 595 | ||
605 | return 0; | 596 | return 0; |
diff --git a/kernel/intermodule.c b/kernel/intermodule.c deleted file mode 100644 index 55b1e5b85db9..000000000000 --- a/kernel/intermodule.c +++ /dev/null | |||
@@ -1,184 +0,0 @@ | |||
1 | /* Deprecated, do not use. Moved from module.c to here. --RR */ | ||
2 | |||
3 | /* Written by Keith Owens <kaos@ocs.com.au> Oct 2000 */ | ||
4 | #include <linux/module.h> | ||
5 | #include <linux/kmod.h> | ||
6 | #include <linux/spinlock.h> | ||
7 | #include <linux/list.h> | ||
8 | #include <linux/slab.h> | ||
9 | |||
10 | /* inter_module functions are always available, even when the kernel is | ||
11 | * compiled without modules. Consumers of inter_module_xxx routines | ||
12 | * will always work, even when both are built into the kernel, this | ||
13 | * approach removes lots of #ifdefs in mainline code. | ||
14 | */ | ||
15 | |||
16 | static struct list_head ime_list = LIST_HEAD_INIT(ime_list); | ||
17 | static DEFINE_SPINLOCK(ime_lock); | ||
18 | static int kmalloc_failed; | ||
19 | |||
20 | struct inter_module_entry { | ||
21 | struct list_head list; | ||
22 | const char *im_name; | ||
23 | struct module *owner; | ||
24 | const void *userdata; | ||
25 | }; | ||
26 | |||
27 | /** | ||
28 | * inter_module_register - register a new set of inter module data. | ||
29 | * @im_name: an arbitrary string to identify the data, must be unique | ||
30 | * @owner: module that is registering the data, always use THIS_MODULE | ||
31 | * @userdata: pointer to arbitrary userdata to be registered | ||
32 | * | ||
33 | * Description: Check that the im_name has not already been registered, | ||
34 | * complain if it has. For new data, add it to the inter_module_entry | ||
35 | * list. | ||
36 | */ | ||
37 | void inter_module_register(const char *im_name, struct module *owner, const void *userdata) | ||
38 | { | ||
39 | struct list_head *tmp; | ||
40 | struct inter_module_entry *ime, *ime_new; | ||
41 | |||
42 | if (!(ime_new = kzalloc(sizeof(*ime), GFP_KERNEL))) { | ||
43 | /* Overloaded kernel, not fatal */ | ||
44 | printk(KERN_ERR | ||
45 | "Aiee, inter_module_register: cannot kmalloc entry for '%s'\n", | ||
46 | im_name); | ||
47 | kmalloc_failed = 1; | ||
48 | return; | ||
49 | } | ||
50 | ime_new->im_name = im_name; | ||
51 | ime_new->owner = owner; | ||
52 | ime_new->userdata = userdata; | ||
53 | |||
54 | spin_lock(&ime_lock); | ||
55 | list_for_each(tmp, &ime_list) { | ||
56 | ime = list_entry(tmp, struct inter_module_entry, list); | ||
57 | if (strcmp(ime->im_name, im_name) == 0) { | ||
58 | spin_unlock(&ime_lock); | ||
59 | kfree(ime_new); | ||
60 | /* Program logic error, fatal */ | ||
61 | printk(KERN_ERR "inter_module_register: duplicate im_name '%s'", im_name); | ||
62 | BUG(); | ||
63 | } | ||
64 | } | ||
65 | list_add(&(ime_new->list), &ime_list); | ||
66 | spin_unlock(&ime_lock); | ||
67 | } | ||
68 | |||
69 | /** | ||
70 | * inter_module_unregister - unregister a set of inter module data. | ||
71 | * @im_name: an arbitrary string to identify the data, must be unique | ||
72 | * | ||
73 | * Description: Check that the im_name has been registered, complain if | ||
74 | * it has not. For existing data, remove it from the | ||
75 | * inter_module_entry list. | ||
76 | */ | ||
77 | void inter_module_unregister(const char *im_name) | ||
78 | { | ||
79 | struct list_head *tmp; | ||
80 | struct inter_module_entry *ime; | ||
81 | |||
82 | spin_lock(&ime_lock); | ||
83 | list_for_each(tmp, &ime_list) { | ||
84 | ime = list_entry(tmp, struct inter_module_entry, list); | ||
85 | if (strcmp(ime->im_name, im_name) == 0) { | ||
86 | list_del(&(ime->list)); | ||
87 | spin_unlock(&ime_lock); | ||
88 | kfree(ime); | ||
89 | return; | ||
90 | } | ||
91 | } | ||
92 | spin_unlock(&ime_lock); | ||
93 | if (kmalloc_failed) { | ||
94 | printk(KERN_ERR | ||
95 | "inter_module_unregister: no entry for '%s', " | ||
96 | "probably caused by previous kmalloc failure\n", | ||
97 | im_name); | ||
98 | return; | ||
99 | } | ||
100 | else { | ||
101 | /* Program logic error, fatal */ | ||
102 | printk(KERN_ERR "inter_module_unregister: no entry for '%s'", im_name); | ||
103 | BUG(); | ||
104 | } | ||
105 | } | ||
106 | |||
107 | /** | ||
108 | * inter_module_get - return arbitrary userdata from another module. | ||
109 | * @im_name: an arbitrary string to identify the data, must be unique | ||
110 | * | ||
111 | * Description: If the im_name has not been registered, return NULL. | ||
112 | * Try to increment the use count on the owning module, if that fails | ||
113 | * then return NULL. Otherwise return the userdata. | ||
114 | */ | ||
115 | static const void *inter_module_get(const char *im_name) | ||
116 | { | ||
117 | struct list_head *tmp; | ||
118 | struct inter_module_entry *ime; | ||
119 | const void *result = NULL; | ||
120 | |||
121 | spin_lock(&ime_lock); | ||
122 | list_for_each(tmp, &ime_list) { | ||
123 | ime = list_entry(tmp, struct inter_module_entry, list); | ||
124 | if (strcmp(ime->im_name, im_name) == 0) { | ||
125 | if (try_module_get(ime->owner)) | ||
126 | result = ime->userdata; | ||
127 | break; | ||
128 | } | ||
129 | } | ||
130 | spin_unlock(&ime_lock); | ||
131 | return(result); | ||
132 | } | ||
133 | |||
134 | /** | ||
135 | * inter_module_get_request - im get with automatic request_module. | ||
136 | * @im_name: an arbitrary string to identify the data, must be unique | ||
137 | * @modname: module that is expected to register im_name | ||
138 | * | ||
139 | * Description: If inter_module_get fails, do request_module then retry. | ||
140 | */ | ||
141 | const void *inter_module_get_request(const char *im_name, const char *modname) | ||
142 | { | ||
143 | const void *result = inter_module_get(im_name); | ||
144 | if (!result) { | ||
145 | request_module("%s", modname); | ||
146 | result = inter_module_get(im_name); | ||
147 | } | ||
148 | return(result); | ||
149 | } | ||
150 | |||
151 | /** | ||
152 | * inter_module_put - release use of data from another module. | ||
153 | * @im_name: an arbitrary string to identify the data, must be unique | ||
154 | * | ||
155 | * Description: If the im_name has not been registered, complain, | ||
156 | * otherwise decrement the use count on the owning module. | ||
157 | */ | ||
158 | void inter_module_put(const char *im_name) | ||
159 | { | ||
160 | struct list_head *tmp; | ||
161 | struct inter_module_entry *ime; | ||
162 | |||
163 | spin_lock(&ime_lock); | ||
164 | list_for_each(tmp, &ime_list) { | ||
165 | ime = list_entry(tmp, struct inter_module_entry, list); | ||
166 | if (strcmp(ime->im_name, im_name) == 0) { | ||
167 | if (ime->owner) | ||
168 | module_put(ime->owner); | ||
169 | spin_unlock(&ime_lock); | ||
170 | return; | ||
171 | } | ||
172 | } | ||
173 | spin_unlock(&ime_lock); | ||
174 | printk(KERN_ERR "inter_module_put: no entry for '%s'", im_name); | ||
175 | BUG(); | ||
176 | } | ||
177 | |||
178 | EXPORT_SYMBOL(inter_module_register); | ||
179 | EXPORT_SYMBOL(inter_module_unregister); | ||
180 | EXPORT_SYMBOL(inter_module_get_request); | ||
181 | EXPORT_SYMBOL(inter_module_put); | ||
182 | |||
183 | MODULE_LICENSE("GPL"); | ||
184 | |||
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index 51df337b37db..0f6530117105 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c | |||
@@ -76,10 +76,11 @@ irqreturn_t no_action(int cpl, void *dev_id, struct pt_regs *regs) | |||
76 | /* | 76 | /* |
77 | * Have got an event to handle: | 77 | * Have got an event to handle: |
78 | */ | 78 | */ |
79 | fastcall int handle_IRQ_event(unsigned int irq, struct pt_regs *regs, | 79 | fastcall irqreturn_t handle_IRQ_event(unsigned int irq, struct pt_regs *regs, |
80 | struct irqaction *action) | 80 | struct irqaction *action) |
81 | { | 81 | { |
82 | int ret, retval = 0, status = 0; | 82 | irqreturn_t ret, retval = IRQ_NONE; |
83 | unsigned int status = 0; | ||
83 | 84 | ||
84 | if (!(action->flags & SA_INTERRUPT)) | 85 | if (!(action->flags & SA_INTERRUPT)) |
85 | local_irq_enable(); | 86 | local_irq_enable(); |
diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c index 134f9f2e0e39..a12d00eb5e7c 100644 --- a/kernel/irq/migration.c +++ b/kernel/irq/migration.c | |||
@@ -30,7 +30,7 @@ void move_native_irq(int irq) | |||
30 | 30 | ||
31 | desc->move_irq = 0; | 31 | desc->move_irq = 0; |
32 | 32 | ||
33 | if (likely(cpus_empty(pending_irq_cpumask[irq]))) | 33 | if (unlikely(cpus_empty(pending_irq_cpumask[irq]))) |
34 | return; | 34 | return; |
35 | 35 | ||
36 | if (!desc->handler->set_affinity) | 36 | if (!desc->handler->set_affinity) |
@@ -49,7 +49,7 @@ void move_native_irq(int irq) | |||
49 | * cause some ioapics to mal-function. | 49 | * cause some ioapics to mal-function. |
50 | * Being paranoid i guess! | 50 | * Being paranoid i guess! |
51 | */ | 51 | */ |
52 | if (unlikely(!cpus_empty(tmp))) { | 52 | if (likely(!cpus_empty(tmp))) { |
53 | if (likely(!(desc->status & IRQ_DISABLED))) | 53 | if (likely(!(desc->status & IRQ_DISABLED))) |
54 | desc->handler->disable(irq); | 54 | desc->handler->disable(irq); |
55 | 55 | ||
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index d03b5eef8ce0..afacd6f585fa 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c | |||
@@ -24,6 +24,8 @@ static struct proc_dir_entry *smp_affinity_entry[NR_IRQS]; | |||
24 | #ifdef CONFIG_GENERIC_PENDING_IRQ | 24 | #ifdef CONFIG_GENERIC_PENDING_IRQ |
25 | void proc_set_irq_affinity(unsigned int irq, cpumask_t mask_val) | 25 | void proc_set_irq_affinity(unsigned int irq, cpumask_t mask_val) |
26 | { | 26 | { |
27 | set_balance_irq_affinity(irq, mask_val); | ||
28 | |||
27 | /* | 29 | /* |
28 | * Save these away for later use. Re-progam when the | 30 | * Save these away for later use. Re-progam when the |
29 | * interrupt is pending | 31 | * interrupt is pending |
@@ -33,6 +35,7 @@ void proc_set_irq_affinity(unsigned int irq, cpumask_t mask_val) | |||
33 | #else | 35 | #else |
34 | void proc_set_irq_affinity(unsigned int irq, cpumask_t mask_val) | 36 | void proc_set_irq_affinity(unsigned int irq, cpumask_t mask_val) |
35 | { | 37 | { |
38 | set_balance_irq_affinity(irq, mask_val); | ||
36 | irq_affinity[irq] = mask_val; | 39 | irq_affinity[irq] = mask_val; |
37 | irq_desc[irq].handler->set_affinity(irq, mask_val); | 40 | irq_desc[irq].handler->set_affinity(irq, mask_val); |
38 | } | 41 | } |
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c index 7df9abd5ec86..b2fb3c18d06b 100644 --- a/kernel/irq/spurious.c +++ b/kernel/irq/spurious.c | |||
@@ -11,7 +11,7 @@ | |||
11 | #include <linux/kallsyms.h> | 11 | #include <linux/kallsyms.h> |
12 | #include <linux/interrupt.h> | 12 | #include <linux/interrupt.h> |
13 | 13 | ||
14 | static int irqfixup; | 14 | static int irqfixup __read_mostly; |
15 | 15 | ||
16 | /* | 16 | /* |
17 | * Recovery handler for misrouted interrupts. | 17 | * Recovery handler for misrouted interrupts. |
@@ -136,9 +136,9 @@ static void report_bad_irq(unsigned int irq, irq_desc_t *desc, irqreturn_t actio | |||
136 | void note_interrupt(unsigned int irq, irq_desc_t *desc, irqreturn_t action_ret, | 136 | void note_interrupt(unsigned int irq, irq_desc_t *desc, irqreturn_t action_ret, |
137 | struct pt_regs *regs) | 137 | struct pt_regs *regs) |
138 | { | 138 | { |
139 | if (action_ret != IRQ_HANDLED) { | 139 | if (unlikely(action_ret != IRQ_HANDLED)) { |
140 | desc->irqs_unhandled++; | 140 | desc->irqs_unhandled++; |
141 | if (action_ret != IRQ_NONE) | 141 | if (unlikely(action_ret != IRQ_NONE)) |
142 | report_bad_irq(irq, desc, action_ret); | 142 | report_bad_irq(irq, desc, action_ret); |
143 | } | 143 | } |
144 | 144 | ||
@@ -152,11 +152,11 @@ void note_interrupt(unsigned int irq, irq_desc_t *desc, irqreturn_t action_ret, | |||
152 | } | 152 | } |
153 | 153 | ||
154 | desc->irq_count++; | 154 | desc->irq_count++; |
155 | if (desc->irq_count < 100000) | 155 | if (likely(desc->irq_count < 100000)) |
156 | return; | 156 | return; |
157 | 157 | ||
158 | desc->irq_count = 0; | 158 | desc->irq_count = 0; |
159 | if (desc->irqs_unhandled > 99900) { | 159 | if (unlikely(desc->irqs_unhandled > 99900)) { |
160 | /* | 160 | /* |
161 | * The interrupt is stuck | 161 | * The interrupt is stuck |
162 | */ | 162 | */ |
@@ -171,7 +171,7 @@ void note_interrupt(unsigned int irq, irq_desc_t *desc, irqreturn_t action_ret, | |||
171 | desc->irqs_unhandled = 0; | 171 | desc->irqs_unhandled = 0; |
172 | } | 172 | } |
173 | 173 | ||
174 | int noirqdebug; | 174 | int noirqdebug __read_mostly; |
175 | 175 | ||
176 | int __init noirqdebug_setup(char *str) | 176 | int __init noirqdebug_setup(char *str) |
177 | { | 177 | { |
diff --git a/kernel/kexec.c b/kernel/kexec.c index bf39d28e4c0e..58f0f382597c 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c | |||
@@ -902,14 +902,14 @@ static int kimage_load_segment(struct kimage *image, | |||
902 | * kexec does not sync, or unmount filesystems so if you need | 902 | * kexec does not sync, or unmount filesystems so if you need |
903 | * that to happen you need to do that yourself. | 903 | * that to happen you need to do that yourself. |
904 | */ | 904 | */ |
905 | struct kimage *kexec_image = NULL; | 905 | struct kimage *kexec_image; |
906 | static struct kimage *kexec_crash_image = NULL; | 906 | struct kimage *kexec_crash_image; |
907 | /* | 907 | /* |
908 | * A home grown binary mutex. | 908 | * A home grown binary mutex. |
909 | * Nothing can wait so this mutex is safe to use | 909 | * Nothing can wait so this mutex is safe to use |
910 | * in interrupt context :) | 910 | * in interrupt context :) |
911 | */ | 911 | */ |
912 | static int kexec_lock = 0; | 912 | static int kexec_lock; |
913 | 913 | ||
914 | asmlinkage long sys_kexec_load(unsigned long entry, unsigned long nr_segments, | 914 | asmlinkage long sys_kexec_load(unsigned long entry, unsigned long nr_segments, |
915 | struct kexec_segment __user *segments, | 915 | struct kexec_segment __user *segments, |
diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 1fbf466a29aa..64aab081153b 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c | |||
@@ -47,11 +47,17 @@ | |||
47 | 47 | ||
48 | static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE]; | 48 | static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE]; |
49 | static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE]; | 49 | static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE]; |
50 | static atomic_t kprobe_count; | ||
50 | 51 | ||
51 | DEFINE_MUTEX(kprobe_mutex); /* Protects kprobe_table */ | 52 | DEFINE_MUTEX(kprobe_mutex); /* Protects kprobe_table */ |
52 | DEFINE_SPINLOCK(kretprobe_lock); /* Protects kretprobe_inst_table */ | 53 | DEFINE_SPINLOCK(kretprobe_lock); /* Protects kretprobe_inst_table */ |
53 | static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL; | 54 | static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL; |
54 | 55 | ||
56 | static struct notifier_block kprobe_page_fault_nb = { | ||
57 | .notifier_call = kprobe_exceptions_notify, | ||
58 | .priority = 0x7fffffff /* we need to notified first */ | ||
59 | }; | ||
60 | |||
55 | #ifdef __ARCH_WANT_KPROBES_INSN_SLOT | 61 | #ifdef __ARCH_WANT_KPROBES_INSN_SLOT |
56 | /* | 62 | /* |
57 | * kprobe->ainsn.insn points to the copy of the instruction to be | 63 | * kprobe->ainsn.insn points to the copy of the instruction to be |
@@ -368,16 +374,15 @@ static inline void copy_kprobe(struct kprobe *old_p, struct kprobe *p) | |||
368 | */ | 374 | */ |
369 | static int __kprobes add_new_kprobe(struct kprobe *old_p, struct kprobe *p) | 375 | static int __kprobes add_new_kprobe(struct kprobe *old_p, struct kprobe *p) |
370 | { | 376 | { |
371 | struct kprobe *kp; | ||
372 | |||
373 | if (p->break_handler) { | 377 | if (p->break_handler) { |
374 | list_for_each_entry_rcu(kp, &old_p->list, list) { | 378 | if (old_p->break_handler) |
375 | if (kp->break_handler) | 379 | return -EEXIST; |
376 | return -EEXIST; | ||
377 | } | ||
378 | list_add_tail_rcu(&p->list, &old_p->list); | 380 | list_add_tail_rcu(&p->list, &old_p->list); |
381 | old_p->break_handler = aggr_break_handler; | ||
379 | } else | 382 | } else |
380 | list_add_rcu(&p->list, &old_p->list); | 383 | list_add_rcu(&p->list, &old_p->list); |
384 | if (p->post_handler && !old_p->post_handler) | ||
385 | old_p->post_handler = aggr_post_handler; | ||
381 | return 0; | 386 | return 0; |
382 | } | 387 | } |
383 | 388 | ||
@@ -390,9 +395,11 @@ static inline void add_aggr_kprobe(struct kprobe *ap, struct kprobe *p) | |||
390 | copy_kprobe(p, ap); | 395 | copy_kprobe(p, ap); |
391 | ap->addr = p->addr; | 396 | ap->addr = p->addr; |
392 | ap->pre_handler = aggr_pre_handler; | 397 | ap->pre_handler = aggr_pre_handler; |
393 | ap->post_handler = aggr_post_handler; | ||
394 | ap->fault_handler = aggr_fault_handler; | 398 | ap->fault_handler = aggr_fault_handler; |
395 | ap->break_handler = aggr_break_handler; | 399 | if (p->post_handler) |
400 | ap->post_handler = aggr_post_handler; | ||
401 | if (p->break_handler) | ||
402 | ap->break_handler = aggr_break_handler; | ||
396 | 403 | ||
397 | INIT_LIST_HEAD(&ap->list); | 404 | INIT_LIST_HEAD(&ap->list); |
398 | list_add_rcu(&p->list, &ap->list); | 405 | list_add_rcu(&p->list, &ap->list); |
@@ -464,6 +471,8 @@ static int __kprobes __register_kprobe(struct kprobe *p, | |||
464 | old_p = get_kprobe(p->addr); | 471 | old_p = get_kprobe(p->addr); |
465 | if (old_p) { | 472 | if (old_p) { |
466 | ret = register_aggr_kprobe(old_p, p); | 473 | ret = register_aggr_kprobe(old_p, p); |
474 | if (!ret) | ||
475 | atomic_inc(&kprobe_count); | ||
467 | goto out; | 476 | goto out; |
468 | } | 477 | } |
469 | 478 | ||
@@ -474,6 +483,10 @@ static int __kprobes __register_kprobe(struct kprobe *p, | |||
474 | hlist_add_head_rcu(&p->hlist, | 483 | hlist_add_head_rcu(&p->hlist, |
475 | &kprobe_table[hash_ptr(p->addr, KPROBE_HASH_BITS)]); | 484 | &kprobe_table[hash_ptr(p->addr, KPROBE_HASH_BITS)]); |
476 | 485 | ||
486 | if (atomic_add_return(1, &kprobe_count) == \ | ||
487 | (ARCH_INACTIVE_KPROBE_COUNT + 1)) | ||
488 | register_page_fault_notifier(&kprobe_page_fault_nb); | ||
489 | |||
477 | arch_arm_kprobe(p); | 490 | arch_arm_kprobe(p); |
478 | 491 | ||
479 | out: | 492 | out: |
@@ -536,14 +549,40 @@ valid_p: | |||
536 | kfree(old_p); | 549 | kfree(old_p); |
537 | } | 550 | } |
538 | arch_remove_kprobe(p); | 551 | arch_remove_kprobe(p); |
552 | } else { | ||
553 | mutex_lock(&kprobe_mutex); | ||
554 | if (p->break_handler) | ||
555 | old_p->break_handler = NULL; | ||
556 | if (p->post_handler){ | ||
557 | list_for_each_entry_rcu(list_p, &old_p->list, list){ | ||
558 | if (list_p->post_handler){ | ||
559 | cleanup_p = 2; | ||
560 | break; | ||
561 | } | ||
562 | } | ||
563 | if (cleanup_p == 0) | ||
564 | old_p->post_handler = NULL; | ||
565 | } | ||
566 | mutex_unlock(&kprobe_mutex); | ||
539 | } | 567 | } |
568 | |||
569 | /* Call unregister_page_fault_notifier() | ||
570 | * if no probes are active | ||
571 | */ | ||
572 | mutex_lock(&kprobe_mutex); | ||
573 | if (atomic_add_return(-1, &kprobe_count) == \ | ||
574 | ARCH_INACTIVE_KPROBE_COUNT) | ||
575 | unregister_page_fault_notifier(&kprobe_page_fault_nb); | ||
576 | mutex_unlock(&kprobe_mutex); | ||
577 | return; | ||
540 | } | 578 | } |
541 | 579 | ||
542 | static struct notifier_block kprobe_exceptions_nb = { | 580 | static struct notifier_block kprobe_exceptions_nb = { |
543 | .notifier_call = kprobe_exceptions_notify, | 581 | .notifier_call = kprobe_exceptions_notify, |
544 | .priority = 0x7fffffff /* we need to notified first */ | 582 | .priority = 0x7fffffff /* we need to be notified first */ |
545 | }; | 583 | }; |
546 | 584 | ||
585 | |||
547 | int __kprobes register_jprobe(struct jprobe *jp) | 586 | int __kprobes register_jprobe(struct jprobe *jp) |
548 | { | 587 | { |
549 | /* Todo: Verify probepoint is a function entry point */ | 588 | /* Todo: Verify probepoint is a function entry point */ |
@@ -652,6 +691,7 @@ static int __init init_kprobes(void) | |||
652 | INIT_HLIST_HEAD(&kprobe_table[i]); | 691 | INIT_HLIST_HEAD(&kprobe_table[i]); |
653 | INIT_HLIST_HEAD(&kretprobe_inst_table[i]); | 692 | INIT_HLIST_HEAD(&kretprobe_inst_table[i]); |
654 | } | 693 | } |
694 | atomic_set(&kprobe_count, 0); | ||
655 | 695 | ||
656 | err = arch_init_kprobes(); | 696 | err = arch_init_kprobes(); |
657 | if (!err) | 697 | if (!err) |
diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c index f119e098e67b..9e28478a17a5 100644 --- a/kernel/ksysfs.c +++ b/kernel/ksysfs.c | |||
@@ -14,6 +14,7 @@ | |||
14 | #include <linux/sysfs.h> | 14 | #include <linux/sysfs.h> |
15 | #include <linux/module.h> | 15 | #include <linux/module.h> |
16 | #include <linux/init.h> | 16 | #include <linux/init.h> |
17 | #include <linux/kexec.h> | ||
17 | 18 | ||
18 | #define KERNEL_ATTR_RO(_name) \ | 19 | #define KERNEL_ATTR_RO(_name) \ |
19 | static struct subsys_attribute _name##_attr = __ATTR_RO(_name) | 20 | static struct subsys_attribute _name##_attr = __ATTR_RO(_name) |
@@ -48,6 +49,20 @@ static ssize_t uevent_helper_store(struct subsystem *subsys, const char *page, s | |||
48 | KERNEL_ATTR_RW(uevent_helper); | 49 | KERNEL_ATTR_RW(uevent_helper); |
49 | #endif | 50 | #endif |
50 | 51 | ||
52 | #ifdef CONFIG_KEXEC | ||
53 | static ssize_t kexec_loaded_show(struct subsystem *subsys, char *page) | ||
54 | { | ||
55 | return sprintf(page, "%d\n", !!kexec_image); | ||
56 | } | ||
57 | KERNEL_ATTR_RO(kexec_loaded); | ||
58 | |||
59 | static ssize_t kexec_crash_loaded_show(struct subsystem *subsys, char *page) | ||
60 | { | ||
61 | return sprintf(page, "%d\n", !!kexec_crash_image); | ||
62 | } | ||
63 | KERNEL_ATTR_RO(kexec_crash_loaded); | ||
64 | #endif /* CONFIG_KEXEC */ | ||
65 | |||
51 | decl_subsys(kernel, NULL, NULL); | 66 | decl_subsys(kernel, NULL, NULL); |
52 | EXPORT_SYMBOL_GPL(kernel_subsys); | 67 | EXPORT_SYMBOL_GPL(kernel_subsys); |
53 | 68 | ||
@@ -56,6 +71,10 @@ static struct attribute * kernel_attrs[] = { | |||
56 | &uevent_seqnum_attr.attr, | 71 | &uevent_seqnum_attr.attr, |
57 | &uevent_helper_attr.attr, | 72 | &uevent_helper_attr.attr, |
58 | #endif | 73 | #endif |
74 | #ifdef CONFIG_KEXEC | ||
75 | &kexec_loaded_attr.attr, | ||
76 | &kexec_crash_loaded_attr.attr, | ||
77 | #endif | ||
59 | NULL | 78 | NULL |
60 | }; | 79 | }; |
61 | 80 | ||
diff --git a/kernel/kthread.c b/kernel/kthread.c index c5f3c6613b6d..24be714b04c7 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c | |||
@@ -45,6 +45,13 @@ struct kthread_stop_info | |||
45 | static DEFINE_MUTEX(kthread_stop_lock); | 45 | static DEFINE_MUTEX(kthread_stop_lock); |
46 | static struct kthread_stop_info kthread_stop_info; | 46 | static struct kthread_stop_info kthread_stop_info; |
47 | 47 | ||
48 | /** | ||
49 | * kthread_should_stop - should this kthread return now? | ||
50 | * | ||
51 | * When someone calls kthread_stop on your kthread, it will be woken | ||
52 | * and this will return true. You should then return, and your return | ||
53 | * value will be passed through to kthread_stop(). | ||
54 | */ | ||
48 | int kthread_should_stop(void) | 55 | int kthread_should_stop(void) |
49 | { | 56 | { |
50 | return (kthread_stop_info.k == current); | 57 | return (kthread_stop_info.k == current); |
@@ -122,6 +129,25 @@ static void keventd_create_kthread(void *_create) | |||
122 | complete(&create->done); | 129 | complete(&create->done); |
123 | } | 130 | } |
124 | 131 | ||
132 | /** | ||
133 | * kthread_create - create a kthread. | ||
134 | * @threadfn: the function to run until signal_pending(current). | ||
135 | * @data: data ptr for @threadfn. | ||
136 | * @namefmt: printf-style name for the thread. | ||
137 | * | ||
138 | * Description: This helper function creates and names a kernel | ||
139 | * thread. The thread will be stopped: use wake_up_process() to start | ||
140 | * it. See also kthread_run(), kthread_create_on_cpu(). | ||
141 | * | ||
142 | * When woken, the thread will run @threadfn() with @data as its | ||
143 | * argument. @threadfn can either call do_exit() directly if it is a | ||
144 | * standalone thread for which noone will call kthread_stop(), or | ||
145 | * return when 'kthread_should_stop()' is true (which means | ||
146 | * kthread_stop() has been called). The return value should be zero | ||
147 | * or a negative error number; it will be passed to kthread_stop(). | ||
148 | * | ||
149 | * Returns a task_struct or ERR_PTR(-ENOMEM). | ||
150 | */ | ||
125 | struct task_struct *kthread_create(int (*threadfn)(void *data), | 151 | struct task_struct *kthread_create(int (*threadfn)(void *data), |
126 | void *data, | 152 | void *data, |
127 | const char namefmt[], | 153 | const char namefmt[], |
@@ -156,6 +182,15 @@ struct task_struct *kthread_create(int (*threadfn)(void *data), | |||
156 | } | 182 | } |
157 | EXPORT_SYMBOL(kthread_create); | 183 | EXPORT_SYMBOL(kthread_create); |
158 | 184 | ||
185 | /** | ||
186 | * kthread_bind - bind a just-created kthread to a cpu. | ||
187 | * @k: thread created by kthread_create(). | ||
188 | * @cpu: cpu (might not be online, must be possible) for @k to run on. | ||
189 | * | ||
190 | * Description: This function is equivalent to set_cpus_allowed(), | ||
191 | * except that @cpu doesn't need to be online, and the thread must be | ||
192 | * stopped (i.e., just returned from kthread_create(). | ||
193 | */ | ||
159 | void kthread_bind(struct task_struct *k, unsigned int cpu) | 194 | void kthread_bind(struct task_struct *k, unsigned int cpu) |
160 | { | 195 | { |
161 | BUG_ON(k->state != TASK_INTERRUPTIBLE); | 196 | BUG_ON(k->state != TASK_INTERRUPTIBLE); |
@@ -166,12 +201,36 @@ void kthread_bind(struct task_struct *k, unsigned int cpu) | |||
166 | } | 201 | } |
167 | EXPORT_SYMBOL(kthread_bind); | 202 | EXPORT_SYMBOL(kthread_bind); |
168 | 203 | ||
204 | /** | ||
205 | * kthread_stop - stop a thread created by kthread_create(). | ||
206 | * @k: thread created by kthread_create(). | ||
207 | * | ||
208 | * Sets kthread_should_stop() for @k to return true, wakes it, and | ||
209 | * waits for it to exit. Your threadfn() must not call do_exit() | ||
210 | * itself if you use this function! This can also be called after | ||
211 | * kthread_create() instead of calling wake_up_process(): the thread | ||
212 | * will exit without calling threadfn(). | ||
213 | * | ||
214 | * Returns the result of threadfn(), or %-EINTR if wake_up_process() | ||
215 | * was never called. | ||
216 | */ | ||
169 | int kthread_stop(struct task_struct *k) | 217 | int kthread_stop(struct task_struct *k) |
170 | { | 218 | { |
171 | return kthread_stop_sem(k, NULL); | 219 | return kthread_stop_sem(k, NULL); |
172 | } | 220 | } |
173 | EXPORT_SYMBOL(kthread_stop); | 221 | EXPORT_SYMBOL(kthread_stop); |
174 | 222 | ||
223 | /** | ||
224 | * kthread_stop_sem - stop a thread created by kthread_create(). | ||
225 | * @k: thread created by kthread_create(). | ||
226 | * @s: semaphore that @k waits on while idle. | ||
227 | * | ||
228 | * Does essentially the same thing as kthread_stop() above, but wakes | ||
229 | * @k by calling up(@s). | ||
230 | * | ||
231 | * Returns the result of threadfn(), or %-EINTR if wake_up_process() | ||
232 | * was never called. | ||
233 | */ | ||
175 | int kthread_stop_sem(struct task_struct *k, struct semaphore *s) | 234 | int kthread_stop_sem(struct task_struct *k, struct semaphore *s) |
176 | { | 235 | { |
177 | int ret; | 236 | int ret; |
@@ -210,5 +269,5 @@ static __init int helper_init(void) | |||
210 | 269 | ||
211 | return 0; | 270 | return 0; |
212 | } | 271 | } |
213 | core_initcall(helper_init); | ||
214 | 272 | ||
273 | core_initcall(helper_init); | ||
diff --git a/kernel/module.c b/kernel/module.c index 690381508d09..10e5b872adf6 100644 --- a/kernel/module.c +++ b/kernel/module.c | |||
@@ -40,6 +40,7 @@ | |||
40 | #include <linux/string.h> | 40 | #include <linux/string.h> |
41 | #include <linux/sched.h> | 41 | #include <linux/sched.h> |
42 | #include <linux/mutex.h> | 42 | #include <linux/mutex.h> |
43 | #include <linux/unwind.h> | ||
43 | #include <asm/uaccess.h> | 44 | #include <asm/uaccess.h> |
44 | #include <asm/semaphore.h> | 45 | #include <asm/semaphore.h> |
45 | #include <asm/cacheflush.h> | 46 | #include <asm/cacheflush.h> |
@@ -1052,6 +1053,8 @@ static void free_module(struct module *mod) | |||
1052 | remove_sect_attrs(mod); | 1053 | remove_sect_attrs(mod); |
1053 | mod_kobject_remove(mod); | 1054 | mod_kobject_remove(mod); |
1054 | 1055 | ||
1056 | unwind_remove_table(mod->unwind_info, 0); | ||
1057 | |||
1055 | /* Arch-specific cleanup. */ | 1058 | /* Arch-specific cleanup. */ |
1056 | module_arch_cleanup(mod); | 1059 | module_arch_cleanup(mod); |
1057 | 1060 | ||
@@ -1317,7 +1320,7 @@ int is_exported(const char *name, const struct module *mod) | |||
1317 | if (!mod && lookup_symbol(name, __start___ksymtab, __stop___ksymtab)) | 1320 | if (!mod && lookup_symbol(name, __start___ksymtab, __stop___ksymtab)) |
1318 | return 1; | 1321 | return 1; |
1319 | else | 1322 | else |
1320 | if (lookup_symbol(name, mod->syms, mod->syms + mod->num_syms)) | 1323 | if (mod && lookup_symbol(name, mod->syms, mod->syms + mod->num_syms)) |
1321 | return 1; | 1324 | return 1; |
1322 | else | 1325 | else |
1323 | return 0; | 1326 | return 0; |
@@ -1403,7 +1406,7 @@ static struct module *load_module(void __user *umod, | |||
1403 | unsigned int i, symindex = 0, strindex = 0, setupindex, exindex, | 1406 | unsigned int i, symindex = 0, strindex = 0, setupindex, exindex, |
1404 | exportindex, modindex, obsparmindex, infoindex, gplindex, | 1407 | exportindex, modindex, obsparmindex, infoindex, gplindex, |
1405 | crcindex, gplcrcindex, versindex, pcpuindex, gplfutureindex, | 1408 | crcindex, gplcrcindex, versindex, pcpuindex, gplfutureindex, |
1406 | gplfuturecrcindex; | 1409 | gplfuturecrcindex, unwindex = 0; |
1407 | struct module *mod; | 1410 | struct module *mod; |
1408 | long err = 0; | 1411 | long err = 0; |
1409 | void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */ | 1412 | void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */ |
@@ -1493,6 +1496,9 @@ static struct module *load_module(void __user *umod, | |||
1493 | versindex = find_sec(hdr, sechdrs, secstrings, "__versions"); | 1496 | versindex = find_sec(hdr, sechdrs, secstrings, "__versions"); |
1494 | infoindex = find_sec(hdr, sechdrs, secstrings, ".modinfo"); | 1497 | infoindex = find_sec(hdr, sechdrs, secstrings, ".modinfo"); |
1495 | pcpuindex = find_pcpusec(hdr, sechdrs, secstrings); | 1498 | pcpuindex = find_pcpusec(hdr, sechdrs, secstrings); |
1499 | #ifdef ARCH_UNWIND_SECTION_NAME | ||
1500 | unwindex = find_sec(hdr, sechdrs, secstrings, ARCH_UNWIND_SECTION_NAME); | ||
1501 | #endif | ||
1496 | 1502 | ||
1497 | /* Don't keep modinfo section */ | 1503 | /* Don't keep modinfo section */ |
1498 | sechdrs[infoindex].sh_flags &= ~(unsigned long)SHF_ALLOC; | 1504 | sechdrs[infoindex].sh_flags &= ~(unsigned long)SHF_ALLOC; |
@@ -1501,6 +1507,8 @@ static struct module *load_module(void __user *umod, | |||
1501 | sechdrs[symindex].sh_flags |= SHF_ALLOC; | 1507 | sechdrs[symindex].sh_flags |= SHF_ALLOC; |
1502 | sechdrs[strindex].sh_flags |= SHF_ALLOC; | 1508 | sechdrs[strindex].sh_flags |= SHF_ALLOC; |
1503 | #endif | 1509 | #endif |
1510 | if (unwindex) | ||
1511 | sechdrs[unwindex].sh_flags |= SHF_ALLOC; | ||
1504 | 1512 | ||
1505 | /* Check module struct version now, before we try to use module. */ | 1513 | /* Check module struct version now, before we try to use module. */ |
1506 | if (!check_modstruct_version(sechdrs, versindex, mod)) { | 1514 | if (!check_modstruct_version(sechdrs, versindex, mod)) { |
@@ -1729,6 +1737,11 @@ static struct module *load_module(void __user *umod, | |||
1729 | goto arch_cleanup; | 1737 | goto arch_cleanup; |
1730 | add_sect_attrs(mod, hdr->e_shnum, secstrings, sechdrs); | 1738 | add_sect_attrs(mod, hdr->e_shnum, secstrings, sechdrs); |
1731 | 1739 | ||
1740 | /* Size of section 0 is 0, so this works well if no unwind info. */ | ||
1741 | mod->unwind_info = unwind_add_table(mod, | ||
1742 | (void *)sechdrs[unwindex].sh_addr, | ||
1743 | sechdrs[unwindex].sh_size); | ||
1744 | |||
1732 | /* Get rid of temporary copy */ | 1745 | /* Get rid of temporary copy */ |
1733 | vfree(hdr); | 1746 | vfree(hdr); |
1734 | 1747 | ||
@@ -1827,6 +1840,7 @@ sys_init_module(void __user *umod, | |||
1827 | mod->state = MODULE_STATE_LIVE; | 1840 | mod->state = MODULE_STATE_LIVE; |
1828 | /* Drop initial reference. */ | 1841 | /* Drop initial reference. */ |
1829 | module_put(mod); | 1842 | module_put(mod); |
1843 | unwind_remove_table(mod->unwind_info, 1); | ||
1830 | module_free(mod, mod->module_init); | 1844 | module_free(mod, mod->module_init); |
1831 | mod->module_init = NULL; | 1845 | mod->module_init = NULL; |
1832 | mod->init_size = 0; | 1846 | mod->init_size = 0; |
diff --git a/kernel/mutex-debug.c b/kernel/mutex-debug.c index f4913c376950..036b6285b15c 100644 --- a/kernel/mutex-debug.c +++ b/kernel/mutex-debug.c | |||
@@ -153,13 +153,13 @@ next: | |||
153 | continue; | 153 | continue; |
154 | count++; | 154 | count++; |
155 | cursor = curr->next; | 155 | cursor = curr->next; |
156 | debug_spin_lock_restore(&debug_mutex_lock, flags); | 156 | debug_spin_unlock_restore(&debug_mutex_lock, flags); |
157 | 157 | ||
158 | printk("\n#%03d: ", count); | 158 | printk("\n#%03d: ", count); |
159 | printk_lock(lock, filter ? 0 : 1); | 159 | printk_lock(lock, filter ? 0 : 1); |
160 | goto next; | 160 | goto next; |
161 | } | 161 | } |
162 | debug_spin_lock_restore(&debug_mutex_lock, flags); | 162 | debug_spin_unlock_restore(&debug_mutex_lock, flags); |
163 | printk("\n"); | 163 | printk("\n"); |
164 | } | 164 | } |
165 | 165 | ||
@@ -316,7 +316,7 @@ void mutex_debug_check_no_locks_held(struct task_struct *task) | |||
316 | continue; | 316 | continue; |
317 | list_del_init(curr); | 317 | list_del_init(curr); |
318 | DEBUG_OFF(); | 318 | DEBUG_OFF(); |
319 | debug_spin_lock_restore(&debug_mutex_lock, flags); | 319 | debug_spin_unlock_restore(&debug_mutex_lock, flags); |
320 | 320 | ||
321 | printk("BUG: %s/%d, lock held at task exit time!\n", | 321 | printk("BUG: %s/%d, lock held at task exit time!\n", |
322 | task->comm, task->pid); | 322 | task->comm, task->pid); |
@@ -325,7 +325,7 @@ void mutex_debug_check_no_locks_held(struct task_struct *task) | |||
325 | printk("exiting task is not even the owner??\n"); | 325 | printk("exiting task is not even the owner??\n"); |
326 | return; | 326 | return; |
327 | } | 327 | } |
328 | debug_spin_lock_restore(&debug_mutex_lock, flags); | 328 | debug_spin_unlock_restore(&debug_mutex_lock, flags); |
329 | } | 329 | } |
330 | 330 | ||
331 | /* | 331 | /* |
@@ -352,7 +352,7 @@ void mutex_debug_check_no_locks_freed(const void *from, unsigned long len) | |||
352 | continue; | 352 | continue; |
353 | list_del_init(curr); | 353 | list_del_init(curr); |
354 | DEBUG_OFF(); | 354 | DEBUG_OFF(); |
355 | debug_spin_lock_restore(&debug_mutex_lock, flags); | 355 | debug_spin_unlock_restore(&debug_mutex_lock, flags); |
356 | 356 | ||
357 | printk("BUG: %s/%d, active lock [%p(%p-%p)] freed!\n", | 357 | printk("BUG: %s/%d, active lock [%p(%p-%p)] freed!\n", |
358 | current->comm, current->pid, lock, from, to); | 358 | current->comm, current->pid, lock, from, to); |
@@ -362,7 +362,7 @@ void mutex_debug_check_no_locks_freed(const void *from, unsigned long len) | |||
362 | printk("freeing task is not even the owner??\n"); | 362 | printk("freeing task is not even the owner??\n"); |
363 | return; | 363 | return; |
364 | } | 364 | } |
365 | debug_spin_lock_restore(&debug_mutex_lock, flags); | 365 | debug_spin_unlock_restore(&debug_mutex_lock, flags); |
366 | } | 366 | } |
367 | 367 | ||
368 | /* | 368 | /* |
diff --git a/kernel/mutex-debug.h b/kernel/mutex-debug.h index fd384050acb1..a5196c36a5fd 100644 --- a/kernel/mutex-debug.h +++ b/kernel/mutex-debug.h | |||
@@ -46,21 +46,6 @@ extern void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter, | |||
46 | extern void debug_mutex_unlock(struct mutex *lock); | 46 | extern void debug_mutex_unlock(struct mutex *lock); |
47 | extern void debug_mutex_init(struct mutex *lock, const char *name); | 47 | extern void debug_mutex_init(struct mutex *lock, const char *name); |
48 | 48 | ||
49 | #define debug_spin_lock(lock) \ | ||
50 | do { \ | ||
51 | local_irq_disable(); \ | ||
52 | if (debug_mutex_on) \ | ||
53 | spin_lock(lock); \ | ||
54 | } while (0) | ||
55 | |||
56 | #define debug_spin_unlock(lock) \ | ||
57 | do { \ | ||
58 | if (debug_mutex_on) \ | ||
59 | spin_unlock(lock); \ | ||
60 | local_irq_enable(); \ | ||
61 | preempt_check_resched(); \ | ||
62 | } while (0) | ||
63 | |||
64 | #define debug_spin_lock_save(lock, flags) \ | 49 | #define debug_spin_lock_save(lock, flags) \ |
65 | do { \ | 50 | do { \ |
66 | local_irq_save(flags); \ | 51 | local_irq_save(flags); \ |
@@ -68,7 +53,7 @@ extern void debug_mutex_init(struct mutex *lock, const char *name); | |||
68 | spin_lock(lock); \ | 53 | spin_lock(lock); \ |
69 | } while (0) | 54 | } while (0) |
70 | 55 | ||
71 | #define debug_spin_lock_restore(lock, flags) \ | 56 | #define debug_spin_unlock_restore(lock, flags) \ |
72 | do { \ | 57 | do { \ |
73 | if (debug_mutex_on) \ | 58 | if (debug_mutex_on) \ |
74 | spin_unlock(lock); \ | 59 | spin_unlock(lock); \ |
@@ -76,20 +61,20 @@ extern void debug_mutex_init(struct mutex *lock, const char *name); | |||
76 | preempt_check_resched(); \ | 61 | preempt_check_resched(); \ |
77 | } while (0) | 62 | } while (0) |
78 | 63 | ||
79 | #define spin_lock_mutex(lock) \ | 64 | #define spin_lock_mutex(lock, flags) \ |
80 | do { \ | 65 | do { \ |
81 | struct mutex *l = container_of(lock, struct mutex, wait_lock); \ | 66 | struct mutex *l = container_of(lock, struct mutex, wait_lock); \ |
82 | \ | 67 | \ |
83 | DEBUG_WARN_ON(in_interrupt()); \ | 68 | DEBUG_WARN_ON(in_interrupt()); \ |
84 | debug_spin_lock(&debug_mutex_lock); \ | 69 | debug_spin_lock_save(&debug_mutex_lock, flags); \ |
85 | spin_lock(lock); \ | 70 | spin_lock(lock); \ |
86 | DEBUG_WARN_ON(l->magic != l); \ | 71 | DEBUG_WARN_ON(l->magic != l); \ |
87 | } while (0) | 72 | } while (0) |
88 | 73 | ||
89 | #define spin_unlock_mutex(lock) \ | 74 | #define spin_unlock_mutex(lock, flags) \ |
90 | do { \ | 75 | do { \ |
91 | spin_unlock(lock); \ | 76 | spin_unlock(lock); \ |
92 | debug_spin_unlock(&debug_mutex_lock); \ | 77 | debug_spin_unlock_restore(&debug_mutex_lock, flags); \ |
93 | } while (0) | 78 | } while (0) |
94 | 79 | ||
95 | #define DEBUG_OFF() \ | 80 | #define DEBUG_OFF() \ |
diff --git a/kernel/mutex.c b/kernel/mutex.c index 5449b210d9ed..7043db21bbce 100644 --- a/kernel/mutex.c +++ b/kernel/mutex.c | |||
@@ -125,10 +125,11 @@ __mutex_lock_common(struct mutex *lock, long state __IP_DECL__) | |||
125 | struct task_struct *task = current; | 125 | struct task_struct *task = current; |
126 | struct mutex_waiter waiter; | 126 | struct mutex_waiter waiter; |
127 | unsigned int old_val; | 127 | unsigned int old_val; |
128 | unsigned long flags; | ||
128 | 129 | ||
129 | debug_mutex_init_waiter(&waiter); | 130 | debug_mutex_init_waiter(&waiter); |
130 | 131 | ||
131 | spin_lock_mutex(&lock->wait_lock); | 132 | spin_lock_mutex(&lock->wait_lock, flags); |
132 | 133 | ||
133 | debug_mutex_add_waiter(lock, &waiter, task->thread_info, ip); | 134 | debug_mutex_add_waiter(lock, &waiter, task->thread_info, ip); |
134 | 135 | ||
@@ -157,7 +158,7 @@ __mutex_lock_common(struct mutex *lock, long state __IP_DECL__) | |||
157 | if (unlikely(state == TASK_INTERRUPTIBLE && | 158 | if (unlikely(state == TASK_INTERRUPTIBLE && |
158 | signal_pending(task))) { | 159 | signal_pending(task))) { |
159 | mutex_remove_waiter(lock, &waiter, task->thread_info); | 160 | mutex_remove_waiter(lock, &waiter, task->thread_info); |
160 | spin_unlock_mutex(&lock->wait_lock); | 161 | spin_unlock_mutex(&lock->wait_lock, flags); |
161 | 162 | ||
162 | debug_mutex_free_waiter(&waiter); | 163 | debug_mutex_free_waiter(&waiter); |
163 | return -EINTR; | 164 | return -EINTR; |
@@ -165,9 +166,9 @@ __mutex_lock_common(struct mutex *lock, long state __IP_DECL__) | |||
165 | __set_task_state(task, state); | 166 | __set_task_state(task, state); |
166 | 167 | ||
167 | /* didnt get the lock, go to sleep: */ | 168 | /* didnt get the lock, go to sleep: */ |
168 | spin_unlock_mutex(&lock->wait_lock); | 169 | spin_unlock_mutex(&lock->wait_lock, flags); |
169 | schedule(); | 170 | schedule(); |
170 | spin_lock_mutex(&lock->wait_lock); | 171 | spin_lock_mutex(&lock->wait_lock, flags); |
171 | } | 172 | } |
172 | 173 | ||
173 | /* got the lock - rejoice! */ | 174 | /* got the lock - rejoice! */ |
@@ -178,7 +179,7 @@ __mutex_lock_common(struct mutex *lock, long state __IP_DECL__) | |||
178 | if (likely(list_empty(&lock->wait_list))) | 179 | if (likely(list_empty(&lock->wait_list))) |
179 | atomic_set(&lock->count, 0); | 180 | atomic_set(&lock->count, 0); |
180 | 181 | ||
181 | spin_unlock_mutex(&lock->wait_lock); | 182 | spin_unlock_mutex(&lock->wait_lock, flags); |
182 | 183 | ||
183 | debug_mutex_free_waiter(&waiter); | 184 | debug_mutex_free_waiter(&waiter); |
184 | 185 | ||
@@ -203,10 +204,11 @@ static fastcall noinline void | |||
203 | __mutex_unlock_slowpath(atomic_t *lock_count __IP_DECL__) | 204 | __mutex_unlock_slowpath(atomic_t *lock_count __IP_DECL__) |
204 | { | 205 | { |
205 | struct mutex *lock = container_of(lock_count, struct mutex, count); | 206 | struct mutex *lock = container_of(lock_count, struct mutex, count); |
207 | unsigned long flags; | ||
206 | 208 | ||
207 | DEBUG_WARN_ON(lock->owner != current_thread_info()); | 209 | DEBUG_WARN_ON(lock->owner != current_thread_info()); |
208 | 210 | ||
209 | spin_lock_mutex(&lock->wait_lock); | 211 | spin_lock_mutex(&lock->wait_lock, flags); |
210 | 212 | ||
211 | /* | 213 | /* |
212 | * some architectures leave the lock unlocked in the fastpath failure | 214 | * some architectures leave the lock unlocked in the fastpath failure |
@@ -231,7 +233,7 @@ __mutex_unlock_slowpath(atomic_t *lock_count __IP_DECL__) | |||
231 | 233 | ||
232 | debug_mutex_clear_owner(lock); | 234 | debug_mutex_clear_owner(lock); |
233 | 235 | ||
234 | spin_unlock_mutex(&lock->wait_lock); | 236 | spin_unlock_mutex(&lock->wait_lock, flags); |
235 | } | 237 | } |
236 | 238 | ||
237 | /* | 239 | /* |
@@ -276,9 +278,10 @@ __mutex_lock_interruptible_slowpath(atomic_t *lock_count __IP_DECL__) | |||
276 | static inline int __mutex_trylock_slowpath(atomic_t *lock_count) | 278 | static inline int __mutex_trylock_slowpath(atomic_t *lock_count) |
277 | { | 279 | { |
278 | struct mutex *lock = container_of(lock_count, struct mutex, count); | 280 | struct mutex *lock = container_of(lock_count, struct mutex, count); |
281 | unsigned long flags; | ||
279 | int prev; | 282 | int prev; |
280 | 283 | ||
281 | spin_lock_mutex(&lock->wait_lock); | 284 | spin_lock_mutex(&lock->wait_lock, flags); |
282 | 285 | ||
283 | prev = atomic_xchg(&lock->count, -1); | 286 | prev = atomic_xchg(&lock->count, -1); |
284 | if (likely(prev == 1)) | 287 | if (likely(prev == 1)) |
@@ -287,7 +290,7 @@ static inline int __mutex_trylock_slowpath(atomic_t *lock_count) | |||
287 | if (likely(list_empty(&lock->wait_list))) | 290 | if (likely(list_empty(&lock->wait_list))) |
288 | atomic_set(&lock->count, 0); | 291 | atomic_set(&lock->count, 0); |
289 | 292 | ||
290 | spin_unlock_mutex(&lock->wait_lock); | 293 | spin_unlock_mutex(&lock->wait_lock, flags); |
291 | 294 | ||
292 | return prev == 1; | 295 | return prev == 1; |
293 | } | 296 | } |
diff --git a/kernel/mutex.h b/kernel/mutex.h index 00fe84e7b672..069189947257 100644 --- a/kernel/mutex.h +++ b/kernel/mutex.h | |||
@@ -9,8 +9,10 @@ | |||
9 | * !CONFIG_DEBUG_MUTEXES case. Most of them are NOPs: | 9 | * !CONFIG_DEBUG_MUTEXES case. Most of them are NOPs: |
10 | */ | 10 | */ |
11 | 11 | ||
12 | #define spin_lock_mutex(lock) spin_lock(lock) | 12 | #define spin_lock_mutex(lock, flags) \ |
13 | #define spin_unlock_mutex(lock) spin_unlock(lock) | 13 | do { spin_lock(lock); (void)(flags); } while (0) |
14 | #define spin_unlock_mutex(lock, flags) \ | ||
15 | do { spin_unlock(lock); (void)(flags); } while (0) | ||
14 | #define mutex_remove_waiter(lock, waiter, ti) \ | 16 | #define mutex_remove_waiter(lock, waiter, ti) \ |
15 | __list_del((waiter)->list.prev, (waiter)->list.next) | 17 | __list_del((waiter)->list.prev, (waiter)->list.next) |
16 | 18 | ||
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index 520f6c59948d..d38d9ec3276c 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c | |||
@@ -555,9 +555,6 @@ static void arm_timer(struct k_itimer *timer, union cpu_time_count now) | |||
555 | struct cpu_timer_list *next; | 555 | struct cpu_timer_list *next; |
556 | unsigned long i; | 556 | unsigned long i; |
557 | 557 | ||
558 | if (CPUCLOCK_PERTHREAD(timer->it_clock) && (p->flags & PF_EXITING)) | ||
559 | return; | ||
560 | |||
561 | head = (CPUCLOCK_PERTHREAD(timer->it_clock) ? | 558 | head = (CPUCLOCK_PERTHREAD(timer->it_clock) ? |
562 | p->cpu_timers : p->signal->cpu_timers); | 559 | p->cpu_timers : p->signal->cpu_timers); |
563 | head += CPUCLOCK_WHICH(timer->it_clock); | 560 | head += CPUCLOCK_WHICH(timer->it_clock); |
@@ -1173,6 +1170,9 @@ static void check_process_timers(struct task_struct *tsk, | |||
1173 | } | 1170 | } |
1174 | t = tsk; | 1171 | t = tsk; |
1175 | do { | 1172 | do { |
1173 | if (unlikely(t->flags & PF_EXITING)) | ||
1174 | continue; | ||
1175 | |||
1176 | ticks = cputime_add(cputime_add(t->utime, t->stime), | 1176 | ticks = cputime_add(cputime_add(t->utime, t->stime), |
1177 | prof_left); | 1177 | prof_left); |
1178 | if (!cputime_eq(prof_expires, cputime_zero) && | 1178 | if (!cputime_eq(prof_expires, cputime_zero) && |
@@ -1193,11 +1193,7 @@ static void check_process_timers(struct task_struct *tsk, | |||
1193 | t->it_sched_expires > sched)) { | 1193 | t->it_sched_expires > sched)) { |
1194 | t->it_sched_expires = sched; | 1194 | t->it_sched_expires = sched; |
1195 | } | 1195 | } |
1196 | 1196 | } while ((t = next_thread(t)) != tsk); | |
1197 | do { | ||
1198 | t = next_thread(t); | ||
1199 | } while (unlikely(t->flags & PF_EXITING)); | ||
1200 | } while (t != tsk); | ||
1201 | } | 1197 | } |
1202 | } | 1198 | } |
1203 | 1199 | ||
@@ -1289,30 +1285,30 @@ void run_posix_cpu_timers(struct task_struct *tsk) | |||
1289 | 1285 | ||
1290 | #undef UNEXPIRED | 1286 | #undef UNEXPIRED |
1291 | 1287 | ||
1292 | BUG_ON(tsk->exit_state); | ||
1293 | |||
1294 | /* | 1288 | /* |
1295 | * Double-check with locks held. | 1289 | * Double-check with locks held. |
1296 | */ | 1290 | */ |
1297 | read_lock(&tasklist_lock); | 1291 | read_lock(&tasklist_lock); |
1298 | spin_lock(&tsk->sighand->siglock); | 1292 | if (likely(tsk->signal != NULL)) { |
1293 | spin_lock(&tsk->sighand->siglock); | ||
1299 | 1294 | ||
1300 | /* | 1295 | /* |
1301 | * Here we take off tsk->cpu_timers[N] and tsk->signal->cpu_timers[N] | 1296 | * Here we take off tsk->cpu_timers[N] and tsk->signal->cpu_timers[N] |
1302 | * all the timers that are firing, and put them on the firing list. | 1297 | * all the timers that are firing, and put them on the firing list. |
1303 | */ | 1298 | */ |
1304 | check_thread_timers(tsk, &firing); | 1299 | check_thread_timers(tsk, &firing); |
1305 | check_process_timers(tsk, &firing); | 1300 | check_process_timers(tsk, &firing); |
1306 | 1301 | ||
1307 | /* | 1302 | /* |
1308 | * We must release these locks before taking any timer's lock. | 1303 | * We must release these locks before taking any timer's lock. |
1309 | * There is a potential race with timer deletion here, as the | 1304 | * There is a potential race with timer deletion here, as the |
1310 | * siglock now protects our private firing list. We have set | 1305 | * siglock now protects our private firing list. We have set |
1311 | * the firing flag in each timer, so that a deletion attempt | 1306 | * the firing flag in each timer, so that a deletion attempt |
1312 | * that gets the timer lock before we do will give it up and | 1307 | * that gets the timer lock before we do will give it up and |
1313 | * spin until we've taken care of that timer below. | 1308 | * spin until we've taken care of that timer below. |
1314 | */ | 1309 | */ |
1315 | spin_unlock(&tsk->sighand->siglock); | 1310 | spin_unlock(&tsk->sighand->siglock); |
1311 | } | ||
1316 | read_unlock(&tasklist_lock); | 1312 | read_unlock(&tasklist_lock); |
1317 | 1313 | ||
1318 | /* | 1314 | /* |
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index ce0dfb8f4a4e..fc311a4673a2 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig | |||
@@ -36,6 +36,15 @@ config PM_DEBUG | |||
36 | code. This is helpful when debugging and reporting various PM bugs, | 36 | code. This is helpful when debugging and reporting various PM bugs, |
37 | like suspend support. | 37 | like suspend support. |
38 | 38 | ||
39 | config PM_TRACE | ||
40 | bool "Suspend/resume event tracing" | ||
41 | depends on PM && PM_DEBUG && X86_32 | ||
42 | default y | ||
43 | ---help--- | ||
44 | This enables some cheesy code to save the last PM event point in the | ||
45 | RTC across reboots, so that you can debug a machine that just hangs | ||
46 | during suspend (or more commonly, during resume). | ||
47 | |||
39 | config SOFTWARE_SUSPEND | 48 | config SOFTWARE_SUSPEND |
40 | bool "Software Suspend" | 49 | bool "Software Suspend" |
41 | depends on PM && SWAP && (X86 && (!SMP || SUSPEND_SMP)) || ((FRV || PPC32) && !SMP) | 50 | depends on PM && SWAP && (X86 && (!SMP || SUSPEND_SMP)) || ((FRV || PPC32) && !SMP) |
diff --git a/kernel/power/disk.c b/kernel/power/disk.c index 81d4d982f3f0..e13e74067845 100644 --- a/kernel/power/disk.c +++ b/kernel/power/disk.c | |||
@@ -231,7 +231,7 @@ static int software_resume(void) | |||
231 | late_initcall(software_resume); | 231 | late_initcall(software_resume); |
232 | 232 | ||
233 | 233 | ||
234 | static char * pm_disk_modes[] = { | 234 | static const char * const pm_disk_modes[] = { |
235 | [PM_DISK_FIRMWARE] = "firmware", | 235 | [PM_DISK_FIRMWARE] = "firmware", |
236 | [PM_DISK_PLATFORM] = "platform", | 236 | [PM_DISK_PLATFORM] = "platform", |
237 | [PM_DISK_SHUTDOWN] = "shutdown", | 237 | [PM_DISK_SHUTDOWN] = "shutdown", |
diff --git a/kernel/power/main.c b/kernel/power/main.c index a6d9ef46009e..6d295c776794 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c | |||
@@ -15,7 +15,7 @@ | |||
15 | #include <linux/errno.h> | 15 | #include <linux/errno.h> |
16 | #include <linux/init.h> | 16 | #include <linux/init.h> |
17 | #include <linux/pm.h> | 17 | #include <linux/pm.h> |
18 | 18 | #include <linux/console.h> | |
19 | 19 | ||
20 | #include "power.h" | 20 | #include "power.h" |
21 | 21 | ||
@@ -86,6 +86,7 @@ static int suspend_prepare(suspend_state_t state) | |||
86 | goto Thaw; | 86 | goto Thaw; |
87 | } | 87 | } |
88 | 88 | ||
89 | suspend_console(); | ||
89 | if ((error = device_suspend(PMSG_SUSPEND))) { | 90 | if ((error = device_suspend(PMSG_SUSPEND))) { |
90 | printk(KERN_ERR "Some devices failed to suspend\n"); | 91 | printk(KERN_ERR "Some devices failed to suspend\n"); |
91 | goto Finish; | 92 | goto Finish; |
@@ -133,6 +134,7 @@ int suspend_enter(suspend_state_t state) | |||
133 | static void suspend_finish(suspend_state_t state) | 134 | static void suspend_finish(suspend_state_t state) |
134 | { | 135 | { |
135 | device_resume(); | 136 | device_resume(); |
137 | resume_console(); | ||
136 | thaw_processes(); | 138 | thaw_processes(); |
137 | enable_nonboot_cpus(); | 139 | enable_nonboot_cpus(); |
138 | if (pm_ops && pm_ops->finish) | 140 | if (pm_ops && pm_ops->finish) |
@@ -143,7 +145,7 @@ static void suspend_finish(suspend_state_t state) | |||
143 | 145 | ||
144 | 146 | ||
145 | 147 | ||
146 | static char *pm_states[PM_SUSPEND_MAX] = { | 148 | static const char * const pm_states[PM_SUSPEND_MAX] = { |
147 | [PM_SUSPEND_STANDBY] = "standby", | 149 | [PM_SUSPEND_STANDBY] = "standby", |
148 | [PM_SUSPEND_MEM] = "mem", | 150 | [PM_SUSPEND_MEM] = "mem", |
149 | #ifdef CONFIG_SOFTWARE_SUSPEND | 151 | #ifdef CONFIG_SOFTWARE_SUSPEND |
@@ -260,7 +262,7 @@ static ssize_t state_show(struct subsystem * subsys, char * buf) | |||
260 | static ssize_t state_store(struct subsystem * subsys, const char * buf, size_t n) | 262 | static ssize_t state_store(struct subsystem * subsys, const char * buf, size_t n) |
261 | { | 263 | { |
262 | suspend_state_t state = PM_SUSPEND_STANDBY; | 264 | suspend_state_t state = PM_SUSPEND_STANDBY; |
263 | char ** s; | 265 | const char * const *s; |
264 | char *p; | 266 | char *p; |
265 | int error; | 267 | int error; |
266 | int len; | 268 | int len; |
diff --git a/kernel/power/power.h b/kernel/power/power.h index f06f12f21767..57a792982fb9 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h | |||
@@ -55,7 +55,7 @@ struct snapshot_handle { | |||
55 | unsigned int page; | 55 | unsigned int page; |
56 | unsigned int page_offset; | 56 | unsigned int page_offset; |
57 | unsigned int prev; | 57 | unsigned int prev; |
58 | struct pbe *pbe; | 58 | struct pbe *pbe, *last_pbe; |
59 | void *buffer; | 59 | void *buffer; |
60 | unsigned int buf_offset; | 60 | unsigned int buf_offset; |
61 | }; | 61 | }; |
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 3eeedbb13b78..24c96f354231 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c | |||
@@ -150,6 +150,10 @@ int restore_highmem(void) | |||
150 | } | 150 | } |
151 | return 0; | 151 | return 0; |
152 | } | 152 | } |
153 | #else | ||
154 | static inline unsigned int count_highmem_pages(void) {return 0;} | ||
155 | static inline int save_highmem(void) {return 0;} | ||
156 | static inline int restore_highmem(void) {return 0;} | ||
153 | #endif | 157 | #endif |
154 | 158 | ||
155 | static int pfn_is_nosave(unsigned long pfn) | 159 | static int pfn_is_nosave(unsigned long pfn) |
@@ -293,62 +297,29 @@ static inline void create_pbe_list(struct pbe *pblist, unsigned int nr_pages) | |||
293 | } | 297 | } |
294 | } | 298 | } |
295 | 299 | ||
296 | /** | 300 | static unsigned int unsafe_pages; |
297 | * On resume it is necessary to trace and eventually free the unsafe | ||
298 | * pages that have been allocated, because they are needed for I/O | ||
299 | * (on x86-64 we likely will "eat" these pages once again while | ||
300 | * creating the temporary page translation tables) | ||
301 | */ | ||
302 | |||
303 | struct eaten_page { | ||
304 | struct eaten_page *next; | ||
305 | char padding[PAGE_SIZE - sizeof(void *)]; | ||
306 | }; | ||
307 | |||
308 | static struct eaten_page *eaten_pages = NULL; | ||
309 | |||
310 | static void release_eaten_pages(void) | ||
311 | { | ||
312 | struct eaten_page *p, *q; | ||
313 | |||
314 | p = eaten_pages; | ||
315 | while (p) { | ||
316 | q = p->next; | ||
317 | /* We don't want swsusp_free() to free this page again */ | ||
318 | ClearPageNosave(virt_to_page(p)); | ||
319 | free_page((unsigned long)p); | ||
320 | p = q; | ||
321 | } | ||
322 | eaten_pages = NULL; | ||
323 | } | ||
324 | 301 | ||
325 | /** | 302 | /** |
326 | * @safe_needed - on resume, for storing the PBE list and the image, | 303 | * @safe_needed - on resume, for storing the PBE list and the image, |
327 | * we can only use memory pages that do not conflict with the pages | 304 | * we can only use memory pages that do not conflict with the pages |
328 | * which had been used before suspend. | 305 | * used before suspend. |
329 | * | 306 | * |
330 | * The unsafe pages are marked with the PG_nosave_free flag | 307 | * The unsafe pages are marked with the PG_nosave_free flag |
331 | * | 308 | * and we count them using unsafe_pages |
332 | * Allocated but unusable (ie eaten) memory pages should be marked | ||
333 | * so that swsusp_free() can release them | ||
334 | */ | 309 | */ |
335 | 310 | ||
336 | static inline void *alloc_image_page(gfp_t gfp_mask, int safe_needed) | 311 | static inline void *alloc_image_page(gfp_t gfp_mask, int safe_needed) |
337 | { | 312 | { |
338 | void *res; | 313 | void *res; |
339 | 314 | ||
315 | res = (void *)get_zeroed_page(gfp_mask); | ||
340 | if (safe_needed) | 316 | if (safe_needed) |
341 | do { | 317 | while (res && PageNosaveFree(virt_to_page(res))) { |
318 | /* The page is unsafe, mark it for swsusp_free() */ | ||
319 | SetPageNosave(virt_to_page(res)); | ||
320 | unsafe_pages++; | ||
342 | res = (void *)get_zeroed_page(gfp_mask); | 321 | res = (void *)get_zeroed_page(gfp_mask); |
343 | if (res && PageNosaveFree(virt_to_page(res))) { | 322 | } |
344 | /* This is for swsusp_free() */ | ||
345 | SetPageNosave(virt_to_page(res)); | ||
346 | ((struct eaten_page *)res)->next = eaten_pages; | ||
347 | eaten_pages = res; | ||
348 | } | ||
349 | } while (res && PageNosaveFree(virt_to_page(res))); | ||
350 | else | ||
351 | res = (void *)get_zeroed_page(gfp_mask); | ||
352 | if (res) { | 323 | if (res) { |
353 | SetPageNosave(virt_to_page(res)); | 324 | SetPageNosave(virt_to_page(res)); |
354 | SetPageNosaveFree(virt_to_page(res)); | 325 | SetPageNosaveFree(virt_to_page(res)); |
@@ -374,7 +345,8 @@ unsigned long get_safe_page(gfp_t gfp_mask) | |||
374 | * On each page we set up a list of struct_pbe elements. | 345 | * On each page we set up a list of struct_pbe elements. |
375 | */ | 346 | */ |
376 | 347 | ||
377 | struct pbe *alloc_pagedir(unsigned int nr_pages, gfp_t gfp_mask, int safe_needed) | 348 | static struct pbe *alloc_pagedir(unsigned int nr_pages, gfp_t gfp_mask, |
349 | int safe_needed) | ||
378 | { | 350 | { |
379 | unsigned int num; | 351 | unsigned int num; |
380 | struct pbe *pblist, *pbe; | 352 | struct pbe *pblist, *pbe; |
@@ -642,6 +614,8 @@ static int mark_unsafe_pages(struct pbe *pblist) | |||
642 | return -EFAULT; | 614 | return -EFAULT; |
643 | } | 615 | } |
644 | 616 | ||
617 | unsafe_pages = 0; | ||
618 | |||
645 | return 0; | 619 | return 0; |
646 | } | 620 | } |
647 | 621 | ||
@@ -719,42 +693,99 @@ static inline struct pbe *unpack_orig_addresses(unsigned long *buf, | |||
719 | } | 693 | } |
720 | 694 | ||
721 | /** | 695 | /** |
722 | * create_image - use metadata contained in the PBE list | 696 | * prepare_image - use metadata contained in the PBE list |
723 | * pointed to by pagedir_nosave to mark the pages that will | 697 | * pointed to by pagedir_nosave to mark the pages that will |
724 | * be overwritten in the process of restoring the system | 698 | * be overwritten in the process of restoring the system |
725 | * memory state from the image and allocate memory for | 699 | * memory state from the image ("unsafe" pages) and allocate |
726 | * the image avoiding these pages | 700 | * memory for the image |
701 | * | ||
702 | * The idea is to allocate the PBE list first and then | ||
703 | * allocate as many pages as it's needed for the image data, | ||
704 | * but not to assign these pages to the PBEs initially. | ||
705 | * Instead, we just mark them as allocated and create a list | ||
706 | * of "safe" which will be used later | ||
727 | */ | 707 | */ |
728 | 708 | ||
729 | static int create_image(struct snapshot_handle *handle) | 709 | struct safe_page { |
710 | struct safe_page *next; | ||
711 | char padding[PAGE_SIZE - sizeof(void *)]; | ||
712 | }; | ||
713 | |||
714 | static struct safe_page *safe_pages; | ||
715 | |||
716 | static int prepare_image(struct snapshot_handle *handle) | ||
730 | { | 717 | { |
731 | int error = 0; | 718 | int error = 0; |
732 | struct pbe *p, *pblist; | 719 | unsigned int nr_pages = nr_copy_pages; |
720 | struct pbe *p, *pblist = NULL; | ||
733 | 721 | ||
734 | p = pagedir_nosave; | 722 | p = pagedir_nosave; |
735 | error = mark_unsafe_pages(p); | 723 | error = mark_unsafe_pages(p); |
736 | if (!error) { | 724 | if (!error) { |
737 | pblist = alloc_pagedir(nr_copy_pages, GFP_ATOMIC, 1); | 725 | pblist = alloc_pagedir(nr_pages, GFP_ATOMIC, 1); |
738 | if (pblist) | 726 | if (pblist) |
739 | copy_page_backup_list(pblist, p); | 727 | copy_page_backup_list(pblist, p); |
740 | free_pagedir(p, 0); | 728 | free_pagedir(p, 0); |
741 | if (!pblist) | 729 | if (!pblist) |
742 | error = -ENOMEM; | 730 | error = -ENOMEM; |
743 | } | 731 | } |
744 | if (!error) | 732 | safe_pages = NULL; |
745 | error = alloc_data_pages(pblist, GFP_ATOMIC, 1); | 733 | if (!error && nr_pages > unsafe_pages) { |
734 | nr_pages -= unsafe_pages; | ||
735 | while (nr_pages--) { | ||
736 | struct safe_page *ptr; | ||
737 | |||
738 | ptr = (struct safe_page *)get_zeroed_page(GFP_ATOMIC); | ||
739 | if (!ptr) { | ||
740 | error = -ENOMEM; | ||
741 | break; | ||
742 | } | ||
743 | if (!PageNosaveFree(virt_to_page(ptr))) { | ||
744 | /* The page is "safe", add it to the list */ | ||
745 | ptr->next = safe_pages; | ||
746 | safe_pages = ptr; | ||
747 | } | ||
748 | /* Mark the page as allocated */ | ||
749 | SetPageNosave(virt_to_page(ptr)); | ||
750 | SetPageNosaveFree(virt_to_page(ptr)); | ||
751 | } | ||
752 | } | ||
746 | if (!error) { | 753 | if (!error) { |
747 | release_eaten_pages(); | ||
748 | pagedir_nosave = pblist; | 754 | pagedir_nosave = pblist; |
749 | } else { | 755 | } else { |
750 | pagedir_nosave = NULL; | ||
751 | handle->pbe = NULL; | 756 | handle->pbe = NULL; |
752 | nr_copy_pages = 0; | 757 | swsusp_free(); |
753 | nr_meta_pages = 0; | ||
754 | } | 758 | } |
755 | return error; | 759 | return error; |
756 | } | 760 | } |
757 | 761 | ||
762 | static void *get_buffer(struct snapshot_handle *handle) | ||
763 | { | ||
764 | struct pbe *pbe = handle->pbe, *last = handle->last_pbe; | ||
765 | struct page *page = virt_to_page(pbe->orig_address); | ||
766 | |||
767 | if (PageNosave(page) && PageNosaveFree(page)) { | ||
768 | /* | ||
769 | * We have allocated the "original" page frame and we can | ||
770 | * use it directly to store the read page | ||
771 | */ | ||
772 | pbe->address = 0; | ||
773 | if (last && last->next) | ||
774 | last->next = NULL; | ||
775 | return (void *)pbe->orig_address; | ||
776 | } | ||
777 | /* | ||
778 | * The "original" page frame has not been allocated and we have to | ||
779 | * use a "safe" page frame to store the read page | ||
780 | */ | ||
781 | pbe->address = (unsigned long)safe_pages; | ||
782 | safe_pages = safe_pages->next; | ||
783 | if (last) | ||
784 | last->next = pbe; | ||
785 | handle->last_pbe = pbe; | ||
786 | return (void *)pbe->address; | ||
787 | } | ||
788 | |||
758 | /** | 789 | /** |
759 | * snapshot_write_next - used for writing the system memory snapshot. | 790 | * snapshot_write_next - used for writing the system memory snapshot. |
760 | * | 791 | * |
@@ -799,15 +830,16 @@ int snapshot_write_next(struct snapshot_handle *handle, size_t count) | |||
799 | } else if (handle->prev <= nr_meta_pages) { | 830 | } else if (handle->prev <= nr_meta_pages) { |
800 | handle->pbe = unpack_orig_addresses(buffer, handle->pbe); | 831 | handle->pbe = unpack_orig_addresses(buffer, handle->pbe); |
801 | if (!handle->pbe) { | 832 | if (!handle->pbe) { |
802 | error = create_image(handle); | 833 | error = prepare_image(handle); |
803 | if (error) | 834 | if (error) |
804 | return error; | 835 | return error; |
805 | handle->pbe = pagedir_nosave; | 836 | handle->pbe = pagedir_nosave; |
806 | handle->buffer = (void *)handle->pbe->address; | 837 | handle->last_pbe = NULL; |
838 | handle->buffer = get_buffer(handle); | ||
807 | } | 839 | } |
808 | } else { | 840 | } else { |
809 | handle->pbe = handle->pbe->next; | 841 | handle->pbe = handle->pbe->next; |
810 | handle->buffer = (void *)handle->pbe->address; | 842 | handle->buffer = get_buffer(handle); |
811 | } | 843 | } |
812 | handle->prev = handle->page; | 844 | handle->prev = handle->page; |
813 | } | 845 | } |
diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c index c4016cbbd3e0..17f669c83012 100644 --- a/kernel/power/swsusp.c +++ b/kernel/power/swsusp.c | |||
@@ -67,9 +67,9 @@ unsigned int count_highmem_pages(void); | |||
67 | int save_highmem(void); | 67 | int save_highmem(void); |
68 | int restore_highmem(void); | 68 | int restore_highmem(void); |
69 | #else | 69 | #else |
70 | static int save_highmem(void) { return 0; } | 70 | static inline int save_highmem(void) { return 0; } |
71 | static int restore_highmem(void) { return 0; } | 71 | static inline int restore_highmem(void) { return 0; } |
72 | static unsigned int count_highmem_pages(void) { return 0; } | 72 | static inline unsigned int count_highmem_pages(void) { return 0; } |
73 | #endif | 73 | #endif |
74 | 74 | ||
75 | /** | 75 | /** |
@@ -175,6 +175,12 @@ void free_all_swap_pages(int swap, struct bitmap_page *bitmap) | |||
175 | */ | 175 | */ |
176 | 176 | ||
177 | #define SHRINK_BITE 10000 | 177 | #define SHRINK_BITE 10000 |
178 | static inline unsigned long __shrink_memory(long tmp) | ||
179 | { | ||
180 | if (tmp > SHRINK_BITE) | ||
181 | tmp = SHRINK_BITE; | ||
182 | return shrink_all_memory(tmp); | ||
183 | } | ||
178 | 184 | ||
179 | int swsusp_shrink_memory(void) | 185 | int swsusp_shrink_memory(void) |
180 | { | 186 | { |
@@ -192,15 +198,17 @@ int swsusp_shrink_memory(void) | |||
192 | PAGES_FOR_IO; | 198 | PAGES_FOR_IO; |
193 | tmp = size; | 199 | tmp = size; |
194 | for_each_zone (zone) | 200 | for_each_zone (zone) |
195 | if (!is_highmem(zone)) | 201 | if (!is_highmem(zone) && populated_zone(zone)) { |
196 | tmp -= zone->free_pages; | 202 | tmp -= zone->free_pages; |
203 | tmp += zone->lowmem_reserve[ZONE_NORMAL]; | ||
204 | } | ||
197 | if (tmp > 0) { | 205 | if (tmp > 0) { |
198 | tmp = shrink_all_memory(SHRINK_BITE); | 206 | tmp = __shrink_memory(tmp); |
199 | if (!tmp) | 207 | if (!tmp) |
200 | return -ENOMEM; | 208 | return -ENOMEM; |
201 | pages += tmp; | 209 | pages += tmp; |
202 | } else if (size > image_size / PAGE_SIZE) { | 210 | } else if (size > image_size / PAGE_SIZE) { |
203 | tmp = shrink_all_memory(SHRINK_BITE); | 211 | tmp = __shrink_memory(size - (image_size / PAGE_SIZE)); |
204 | pages += tmp; | 212 | pages += tmp; |
205 | } | 213 | } |
206 | printk("\b%c", p[i++%4]); | 214 | printk("\b%c", p[i++%4]); |
diff --git a/kernel/printk.c b/kernel/printk.c index c056f3324432..95b7fe17f124 100644 --- a/kernel/printk.c +++ b/kernel/printk.c | |||
@@ -24,6 +24,7 @@ | |||
24 | #include <linux/console.h> | 24 | #include <linux/console.h> |
25 | #include <linux/init.h> | 25 | #include <linux/init.h> |
26 | #include <linux/module.h> | 26 | #include <linux/module.h> |
27 | #include <linux/moduleparam.h> | ||
27 | #include <linux/interrupt.h> /* For in_interrupt() */ | 28 | #include <linux/interrupt.h> /* For in_interrupt() */ |
28 | #include <linux/config.h> | 29 | #include <linux/config.h> |
29 | #include <linux/delay.h> | 30 | #include <linux/delay.h> |
@@ -67,6 +68,7 @@ EXPORT_SYMBOL(oops_in_progress); | |||
67 | * driver system. | 68 | * driver system. |
68 | */ | 69 | */ |
69 | static DECLARE_MUTEX(console_sem); | 70 | static DECLARE_MUTEX(console_sem); |
71 | static DECLARE_MUTEX(secondary_console_sem); | ||
70 | struct console *console_drivers; | 72 | struct console *console_drivers; |
71 | /* | 73 | /* |
72 | * This is used for debugging the mess that is the VT code by | 74 | * This is used for debugging the mess that is the VT code by |
@@ -76,7 +78,7 @@ struct console *console_drivers; | |||
76 | * path in the console code where we end up in places I want | 78 | * path in the console code where we end up in places I want |
77 | * locked without the console sempahore held | 79 | * locked without the console sempahore held |
78 | */ | 80 | */ |
79 | static int console_locked; | 81 | static int console_locked, console_suspended; |
80 | 82 | ||
81 | /* | 83 | /* |
82 | * logbuf_lock protects log_buf, log_start, log_end, con_start and logged_chars | 84 | * logbuf_lock protects log_buf, log_start, log_end, con_start and logged_chars |
@@ -326,7 +328,9 @@ static void __call_console_drivers(unsigned long start, unsigned long end) | |||
326 | struct console *con; | 328 | struct console *con; |
327 | 329 | ||
328 | for (con = console_drivers; con; con = con->next) { | 330 | for (con = console_drivers; con; con = con->next) { |
329 | if ((con->flags & CON_ENABLED) && con->write) | 331 | if ((con->flags & CON_ENABLED) && con->write && |
332 | (cpu_online(smp_processor_id()) || | ||
333 | (con->flags & CON_ANYTIME))) | ||
330 | con->write(con, &LOG_BUF(start), end - start); | 334 | con->write(con, &LOG_BUF(start), end - start); |
331 | } | 335 | } |
332 | } | 336 | } |
@@ -436,6 +440,7 @@ static int printk_time = 1; | |||
436 | #else | 440 | #else |
437 | static int printk_time = 0; | 441 | static int printk_time = 0; |
438 | #endif | 442 | #endif |
443 | module_param(printk_time, int, S_IRUGO | S_IWUSR); | ||
439 | 444 | ||
440 | static int __init printk_time_setup(char *str) | 445 | static int __init printk_time_setup(char *str) |
441 | { | 446 | { |
@@ -452,6 +457,18 @@ __attribute__((weak)) unsigned long long printk_clock(void) | |||
452 | return sched_clock(); | 457 | return sched_clock(); |
453 | } | 458 | } |
454 | 459 | ||
460 | /* Check if we have any console registered that can be called early in boot. */ | ||
461 | static int have_callable_console(void) | ||
462 | { | ||
463 | struct console *con; | ||
464 | |||
465 | for (con = console_drivers; con; con = con->next) | ||
466 | if (con->flags & CON_ANYTIME) | ||
467 | return 1; | ||
468 | |||
469 | return 0; | ||
470 | } | ||
471 | |||
455 | /** | 472 | /** |
456 | * printk - print a kernel message | 473 | * printk - print a kernel message |
457 | * @fmt: format string | 474 | * @fmt: format string |
@@ -565,27 +582,29 @@ asmlinkage int vprintk(const char *fmt, va_list args) | |||
565 | log_level_unknown = 1; | 582 | log_level_unknown = 1; |
566 | } | 583 | } |
567 | 584 | ||
568 | if (!cpu_online(smp_processor_id())) { | 585 | if (!down_trylock(&console_sem)) { |
569 | /* | 586 | /* |
570 | * Some console drivers may assume that per-cpu resources have | 587 | * We own the drivers. We can drop the spinlock and |
571 | * been allocated. So don't allow them to be called by this | 588 | * let release_console_sem() print the text, maybe ... |
572 | * CPU until it is officially up. We shouldn't be calling into | ||
573 | * random console drivers on a CPU which doesn't exist yet.. | ||
574 | */ | 589 | */ |
590 | console_locked = 1; | ||
575 | printk_cpu = UINT_MAX; | 591 | printk_cpu = UINT_MAX; |
576 | spin_unlock_irqrestore(&logbuf_lock, flags); | 592 | spin_unlock_irqrestore(&logbuf_lock, flags); |
577 | goto out; | 593 | |
578 | } | ||
579 | if (!down_trylock(&console_sem)) { | ||
580 | console_locked = 1; | ||
581 | /* | 594 | /* |
582 | * We own the drivers. We can drop the spinlock and let | 595 | * Console drivers may assume that per-cpu resources have |
583 | * release_console_sem() print the text | 596 | * been allocated. So unless they're explicitly marked as |
597 | * being able to cope (CON_ANYTIME) don't call them until | ||
598 | * this CPU is officially up. | ||
584 | */ | 599 | */ |
585 | printk_cpu = UINT_MAX; | 600 | if (cpu_online(smp_processor_id()) || have_callable_console()) { |
586 | spin_unlock_irqrestore(&logbuf_lock, flags); | 601 | console_may_schedule = 0; |
587 | console_may_schedule = 0; | 602 | release_console_sem(); |
588 | release_console_sem(); | 603 | } else { |
604 | /* Release by hand to avoid flushing the buffer. */ | ||
605 | console_locked = 0; | ||
606 | up(&console_sem); | ||
607 | } | ||
589 | } else { | 608 | } else { |
590 | /* | 609 | /* |
591 | * Someone else owns the drivers. We drop the spinlock, which | 610 | * Someone else owns the drivers. We drop the spinlock, which |
@@ -595,7 +614,7 @@ asmlinkage int vprintk(const char *fmt, va_list args) | |||
595 | printk_cpu = UINT_MAX; | 614 | printk_cpu = UINT_MAX; |
596 | spin_unlock_irqrestore(&logbuf_lock, flags); | 615 | spin_unlock_irqrestore(&logbuf_lock, flags); |
597 | } | 616 | } |
598 | out: | 617 | |
599 | preempt_enable(); | 618 | preempt_enable(); |
600 | return printed_len; | 619 | return printed_len; |
601 | } | 620 | } |
@@ -698,6 +717,23 @@ int __init add_preferred_console(char *name, int idx, char *options) | |||
698 | } | 717 | } |
699 | 718 | ||
700 | /** | 719 | /** |
720 | * suspend_console - suspend the console subsystem | ||
721 | * | ||
722 | * This disables printk() while we go into suspend states | ||
723 | */ | ||
724 | void suspend_console(void) | ||
725 | { | ||
726 | acquire_console_sem(); | ||
727 | console_suspended = 1; | ||
728 | } | ||
729 | |||
730 | void resume_console(void) | ||
731 | { | ||
732 | console_suspended = 0; | ||
733 | release_console_sem(); | ||
734 | } | ||
735 | |||
736 | /** | ||
701 | * acquire_console_sem - lock the console system for exclusive use. | 737 | * acquire_console_sem - lock the console system for exclusive use. |
702 | * | 738 | * |
703 | * Acquires a semaphore which guarantees that the caller has | 739 | * Acquires a semaphore which guarantees that the caller has |
@@ -708,6 +744,10 @@ int __init add_preferred_console(char *name, int idx, char *options) | |||
708 | void acquire_console_sem(void) | 744 | void acquire_console_sem(void) |
709 | { | 745 | { |
710 | BUG_ON(in_interrupt()); | 746 | BUG_ON(in_interrupt()); |
747 | if (console_suspended) { | ||
748 | down(&secondary_console_sem); | ||
749 | return; | ||
750 | } | ||
711 | down(&console_sem); | 751 | down(&console_sem); |
712 | console_locked = 1; | 752 | console_locked = 1; |
713 | console_may_schedule = 1; | 753 | console_may_schedule = 1; |
@@ -750,6 +790,10 @@ void release_console_sem(void) | |||
750 | unsigned long _con_start, _log_end; | 790 | unsigned long _con_start, _log_end; |
751 | unsigned long wake_klogd = 0; | 791 | unsigned long wake_klogd = 0; |
752 | 792 | ||
793 | if (console_suspended) { | ||
794 | up(&secondary_console_sem); | ||
795 | return; | ||
796 | } | ||
753 | for ( ; ; ) { | 797 | for ( ; ; ) { |
754 | spin_lock_irqsave(&logbuf_lock, flags); | 798 | spin_lock_irqsave(&logbuf_lock, flags); |
755 | wake_klogd |= log_start - log_end; | 799 | wake_klogd |= log_start - log_end; |
diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 921c22ad16e4..335c5b932e14 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c | |||
@@ -120,8 +120,18 @@ int ptrace_check_attach(struct task_struct *child, int kill) | |||
120 | 120 | ||
121 | static int may_attach(struct task_struct *task) | 121 | static int may_attach(struct task_struct *task) |
122 | { | 122 | { |
123 | if (!task->mm) | 123 | /* May we inspect the given task? |
124 | return -EPERM; | 124 | * This check is used both for attaching with ptrace |
125 | * and for allowing access to sensitive information in /proc. | ||
126 | * | ||
127 | * ptrace_attach denies several cases that /proc allows | ||
128 | * because setting up the necessary parent/child relationship | ||
129 | * or halting the specified task is impossible. | ||
130 | */ | ||
131 | int dumpable = 0; | ||
132 | /* Don't let security modules deny introspection */ | ||
133 | if (task == current) | ||
134 | return 0; | ||
125 | if (((current->uid != task->euid) || | 135 | if (((current->uid != task->euid) || |
126 | (current->uid != task->suid) || | 136 | (current->uid != task->suid) || |
127 | (current->uid != task->uid) || | 137 | (current->uid != task->uid) || |
@@ -130,7 +140,9 @@ static int may_attach(struct task_struct *task) | |||
130 | (current->gid != task->gid)) && !capable(CAP_SYS_PTRACE)) | 140 | (current->gid != task->gid)) && !capable(CAP_SYS_PTRACE)) |
131 | return -EPERM; | 141 | return -EPERM; |
132 | smp_rmb(); | 142 | smp_rmb(); |
133 | if (!task->mm->dumpable && !capable(CAP_SYS_PTRACE)) | 143 | if (task->mm) |
144 | dumpable = task->mm->dumpable; | ||
145 | if (!dumpable && !capable(CAP_SYS_PTRACE)) | ||
134 | return -EPERM; | 146 | return -EPERM; |
135 | 147 | ||
136 | return security_ptrace(current, task); | 148 | return security_ptrace(current, task); |
@@ -176,6 +188,8 @@ repeat: | |||
176 | goto repeat; | 188 | goto repeat; |
177 | } | 189 | } |
178 | 190 | ||
191 | if (!task->mm) | ||
192 | goto bad; | ||
179 | /* the same process cannot be attached many times */ | 193 | /* the same process cannot be attached many times */ |
180 | if (task->ptrace & PT_PTRACED) | 194 | if (task->ptrace & PT_PTRACED) |
181 | goto bad; | 195 | goto bad; |
@@ -200,7 +214,7 @@ out: | |||
200 | return retval; | 214 | return retval; |
201 | } | 215 | } |
202 | 216 | ||
203 | void __ptrace_detach(struct task_struct *child, unsigned int data) | 217 | static inline void __ptrace_detach(struct task_struct *child, unsigned int data) |
204 | { | 218 | { |
205 | child->exit_code = data; | 219 | child->exit_code = data; |
206 | /* .. re-parent .. */ | 220 | /* .. re-parent .. */ |
@@ -219,6 +233,7 @@ int ptrace_detach(struct task_struct *child, unsigned int data) | |||
219 | ptrace_disable(child); | 233 | ptrace_disable(child); |
220 | 234 | ||
221 | write_lock_irq(&tasklist_lock); | 235 | write_lock_irq(&tasklist_lock); |
236 | /* protect against de_thread()->release_task() */ | ||
222 | if (child->ptrace) | 237 | if (child->ptrace) |
223 | __ptrace_detach(child, data); | 238 | __ptrace_detach(child, data); |
224 | write_unlock_irq(&tasklist_lock); | 239 | write_unlock_irq(&tasklist_lock); |
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index 2058f88c7bbb..20e9710fc21c 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c | |||
@@ -612,14 +612,6 @@ void synchronize_rcu(void) | |||
612 | wait_for_completion(&rcu.completion); | 612 | wait_for_completion(&rcu.completion); |
613 | } | 613 | } |
614 | 614 | ||
615 | /* | ||
616 | * Deprecated, use synchronize_rcu() or synchronize_sched() instead. | ||
617 | */ | ||
618 | void synchronize_kernel(void) | ||
619 | { | ||
620 | synchronize_rcu(); | ||
621 | } | ||
622 | |||
623 | module_param(blimit, int, 0); | 615 | module_param(blimit, int, 0); |
624 | module_param(qhimark, int, 0); | 616 | module_param(qhimark, int, 0); |
625 | module_param(qlowmark, int, 0); | 617 | module_param(qlowmark, int, 0); |
@@ -627,7 +619,6 @@ module_param(qlowmark, int, 0); | |||
627 | module_param(rsinterval, int, 0); | 619 | module_param(rsinterval, int, 0); |
628 | #endif | 620 | #endif |
629 | EXPORT_SYMBOL_GPL(rcu_batches_completed); | 621 | EXPORT_SYMBOL_GPL(rcu_batches_completed); |
630 | EXPORT_SYMBOL_GPL_FUTURE(call_rcu); /* WARNING: GPL-only in April 2006. */ | 622 | EXPORT_SYMBOL_GPL(call_rcu); |
631 | EXPORT_SYMBOL_GPL_FUTURE(call_rcu_bh); /* WARNING: GPL-only in April 2006. */ | 623 | EXPORT_SYMBOL_GPL(call_rcu_bh); |
632 | EXPORT_SYMBOL_GPL(synchronize_rcu); | 624 | EXPORT_SYMBOL_GPL(synchronize_rcu); |
633 | EXPORT_SYMBOL_GPL_FUTURE(synchronize_kernel); /* WARNING: GPL-only in April 2006. */ | ||
diff --git a/kernel/sched.c b/kernel/sched.c index c13f1bd2df7d..a856040c200a 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -818,6 +818,11 @@ static void deactivate_task(struct task_struct *p, runqueue_t *rq) | |||
818 | * the target CPU. | 818 | * the target CPU. |
819 | */ | 819 | */ |
820 | #ifdef CONFIG_SMP | 820 | #ifdef CONFIG_SMP |
821 | |||
822 | #ifndef tsk_is_polling | ||
823 | #define tsk_is_polling(t) test_tsk_thread_flag(t, TIF_POLLING_NRFLAG) | ||
824 | #endif | ||
825 | |||
821 | static void resched_task(task_t *p) | 826 | static void resched_task(task_t *p) |
822 | { | 827 | { |
823 | int cpu; | 828 | int cpu; |
@@ -833,9 +838,9 @@ static void resched_task(task_t *p) | |||
833 | if (cpu == smp_processor_id()) | 838 | if (cpu == smp_processor_id()) |
834 | return; | 839 | return; |
835 | 840 | ||
836 | /* NEED_RESCHED must be visible before we test POLLING_NRFLAG */ | 841 | /* NEED_RESCHED must be visible before we test polling */ |
837 | smp_mb(); | 842 | smp_mb(); |
838 | if (!test_tsk_thread_flag(p, TIF_POLLING_NRFLAG)) | 843 | if (!tsk_is_polling(p)) |
839 | smp_send_reschedule(cpu); | 844 | smp_send_reschedule(cpu); |
840 | } | 845 | } |
841 | #else | 846 | #else |
@@ -3886,6 +3891,10 @@ long sched_setaffinity(pid_t pid, cpumask_t new_mask) | |||
3886 | !capable(CAP_SYS_NICE)) | 3891 | !capable(CAP_SYS_NICE)) |
3887 | goto out_unlock; | 3892 | goto out_unlock; |
3888 | 3893 | ||
3894 | retval = security_task_setscheduler(p, 0, NULL); | ||
3895 | if (retval) | ||
3896 | goto out_unlock; | ||
3897 | |||
3889 | cpus_allowed = cpuset_cpus_allowed(p); | 3898 | cpus_allowed = cpuset_cpus_allowed(p); |
3890 | cpus_and(new_mask, new_mask, cpus_allowed); | 3899 | cpus_and(new_mask, new_mask, cpus_allowed); |
3891 | retval = set_cpus_allowed(p, new_mask); | 3900 | retval = set_cpus_allowed(p, new_mask); |
@@ -3954,7 +3963,10 @@ long sched_getaffinity(pid_t pid, cpumask_t *mask) | |||
3954 | if (!p) | 3963 | if (!p) |
3955 | goto out_unlock; | 3964 | goto out_unlock; |
3956 | 3965 | ||
3957 | retval = 0; | 3966 | retval = security_task_getscheduler(p); |
3967 | if (retval) | ||
3968 | goto out_unlock; | ||
3969 | |||
3958 | cpus_and(*mask, p->cpus_allowed, cpu_online_map); | 3970 | cpus_and(*mask, p->cpus_allowed, cpu_online_map); |
3959 | 3971 | ||
3960 | out_unlock: | 3972 | out_unlock: |
@@ -4046,6 +4058,9 @@ asmlinkage long sys_sched_yield(void) | |||
4046 | 4058 | ||
4047 | static inline void __cond_resched(void) | 4059 | static inline void __cond_resched(void) |
4048 | { | 4060 | { |
4061 | #ifdef CONFIG_DEBUG_SPINLOCK_SLEEP | ||
4062 | __might_sleep(__FILE__, __LINE__); | ||
4063 | #endif | ||
4049 | /* | 4064 | /* |
4050 | * The BKS might be reacquired before we have dropped | 4065 | * The BKS might be reacquired before we have dropped |
4051 | * PREEMPT_ACTIVE, which could trigger a second | 4066 | * PREEMPT_ACTIVE, which could trigger a second |
@@ -4142,7 +4157,7 @@ EXPORT_SYMBOL(yield); | |||
4142 | */ | 4157 | */ |
4143 | void __sched io_schedule(void) | 4158 | void __sched io_schedule(void) |
4144 | { | 4159 | { |
4145 | struct runqueue *rq = &per_cpu(runqueues, raw_smp_processor_id()); | 4160 | struct runqueue *rq = &__raw_get_cpu_var(runqueues); |
4146 | 4161 | ||
4147 | atomic_inc(&rq->nr_iowait); | 4162 | atomic_inc(&rq->nr_iowait); |
4148 | schedule(); | 4163 | schedule(); |
@@ -4153,7 +4168,7 @@ EXPORT_SYMBOL(io_schedule); | |||
4153 | 4168 | ||
4154 | long __sched io_schedule_timeout(long timeout) | 4169 | long __sched io_schedule_timeout(long timeout) |
4155 | { | 4170 | { |
4156 | struct runqueue *rq = &per_cpu(runqueues, raw_smp_processor_id()); | 4171 | struct runqueue *rq = &__raw_get_cpu_var(runqueues); |
4157 | long ret; | 4172 | long ret; |
4158 | 4173 | ||
4159 | atomic_inc(&rq->nr_iowait); | 4174 | atomic_inc(&rq->nr_iowait); |
@@ -4237,7 +4252,7 @@ long sys_sched_rr_get_interval(pid_t pid, struct timespec __user *interval) | |||
4237 | if (retval) | 4252 | if (retval) |
4238 | goto out_unlock; | 4253 | goto out_unlock; |
4239 | 4254 | ||
4240 | jiffies_to_timespec(p->policy & SCHED_FIFO ? | 4255 | jiffies_to_timespec(p->policy == SCHED_FIFO ? |
4241 | 0 : task_timeslice(p), &t); | 4256 | 0 : task_timeslice(p), &t); |
4242 | read_unlock(&tasklist_lock); | 4257 | read_unlock(&tasklist_lock); |
4243 | retval = copy_to_user(interval, &t, sizeof(t)) ? -EFAULT : 0; | 4258 | retval = copy_to_user(interval, &t, sizeof(t)) ? -EFAULT : 0; |
@@ -4746,6 +4761,8 @@ static int migration_call(struct notifier_block *nfb, unsigned long action, | |||
4746 | break; | 4761 | break; |
4747 | #ifdef CONFIG_HOTPLUG_CPU | 4762 | #ifdef CONFIG_HOTPLUG_CPU |
4748 | case CPU_UP_CANCELED: | 4763 | case CPU_UP_CANCELED: |
4764 | if (!cpu_rq(cpu)->migration_thread) | ||
4765 | break; | ||
4749 | /* Unbind it from offline cpu so it can run. Fall thru. */ | 4766 | /* Unbind it from offline cpu so it can run. Fall thru. */ |
4750 | kthread_bind(cpu_rq(cpu)->migration_thread, | 4767 | kthread_bind(cpu_rq(cpu)->migration_thread, |
4751 | any_online_cpu(cpu_online_map)); | 4768 | any_online_cpu(cpu_online_map)); |
diff --git a/kernel/signal.c b/kernel/signal.c index e5f8aea78ffe..52adf53929f6 100644 --- a/kernel/signal.c +++ b/kernel/signal.c | |||
@@ -23,12 +23,12 @@ | |||
23 | #include <linux/syscalls.h> | 23 | #include <linux/syscalls.h> |
24 | #include <linux/ptrace.h> | 24 | #include <linux/ptrace.h> |
25 | #include <linux/signal.h> | 25 | #include <linux/signal.h> |
26 | #include <linux/audit.h> | ||
27 | #include <linux/capability.h> | 26 | #include <linux/capability.h> |
28 | #include <asm/param.h> | 27 | #include <asm/param.h> |
29 | #include <asm/uaccess.h> | 28 | #include <asm/uaccess.h> |
30 | #include <asm/unistd.h> | 29 | #include <asm/unistd.h> |
31 | #include <asm/siginfo.h> | 30 | #include <asm/siginfo.h> |
31 | #include "audit.h" /* audit_signal_info() */ | ||
32 | 32 | ||
33 | /* | 33 | /* |
34 | * SLAB caches for signal bits. | 34 | * SLAB caches for signal bits. |
@@ -1531,6 +1531,35 @@ static void do_notify_parent_cldstop(struct task_struct *tsk, int why) | |||
1531 | spin_unlock_irqrestore(&sighand->siglock, flags); | 1531 | spin_unlock_irqrestore(&sighand->siglock, flags); |
1532 | } | 1532 | } |
1533 | 1533 | ||
1534 | static inline int may_ptrace_stop(void) | ||
1535 | { | ||
1536 | if (!likely(current->ptrace & PT_PTRACED)) | ||
1537 | return 0; | ||
1538 | |||
1539 | if (unlikely(current->parent == current->real_parent && | ||
1540 | (current->ptrace & PT_ATTACHED))) | ||
1541 | return 0; | ||
1542 | |||
1543 | if (unlikely(current->signal == current->parent->signal) && | ||
1544 | unlikely(current->signal->flags & SIGNAL_GROUP_EXIT)) | ||
1545 | return 0; | ||
1546 | |||
1547 | /* | ||
1548 | * Are we in the middle of do_coredump? | ||
1549 | * If so and our tracer is also part of the coredump stopping | ||
1550 | * is a deadlock situation, and pointless because our tracer | ||
1551 | * is dead so don't allow us to stop. | ||
1552 | * If SIGKILL was already sent before the caller unlocked | ||
1553 | * ->siglock we must see ->core_waiters != 0. Otherwise it | ||
1554 | * is safe to enter schedule(). | ||
1555 | */ | ||
1556 | if (unlikely(current->mm->core_waiters) && | ||
1557 | unlikely(current->mm == current->parent->mm)) | ||
1558 | return 0; | ||
1559 | |||
1560 | return 1; | ||
1561 | } | ||
1562 | |||
1534 | /* | 1563 | /* |
1535 | * This must be called with current->sighand->siglock held. | 1564 | * This must be called with current->sighand->siglock held. |
1536 | * | 1565 | * |
@@ -1559,11 +1588,7 @@ static void ptrace_stop(int exit_code, int nostop_code, siginfo_t *info) | |||
1559 | spin_unlock_irq(¤t->sighand->siglock); | 1588 | spin_unlock_irq(¤t->sighand->siglock); |
1560 | try_to_freeze(); | 1589 | try_to_freeze(); |
1561 | read_lock(&tasklist_lock); | 1590 | read_lock(&tasklist_lock); |
1562 | if (likely(current->ptrace & PT_PTRACED) && | 1591 | if (may_ptrace_stop()) { |
1563 | likely(current->parent != current->real_parent || | ||
1564 | !(current->ptrace & PT_ATTACHED)) && | ||
1565 | (likely(current->parent->signal != current->signal) || | ||
1566 | !unlikely(current->signal->flags & SIGNAL_GROUP_EXIT))) { | ||
1567 | do_notify_parent_cldstop(current, CLD_TRAPPED); | 1592 | do_notify_parent_cldstop(current, CLD_TRAPPED); |
1568 | read_unlock(&tasklist_lock); | 1593 | read_unlock(&tasklist_lock); |
1569 | schedule(); | 1594 | schedule(); |
diff --git a/kernel/softirq.c b/kernel/softirq.c index 336f92d64e2e..9e2f1c6e73d7 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c | |||
@@ -470,6 +470,8 @@ static int cpu_callback(struct notifier_block *nfb, | |||
470 | break; | 470 | break; |
471 | #ifdef CONFIG_HOTPLUG_CPU | 471 | #ifdef CONFIG_HOTPLUG_CPU |
472 | case CPU_UP_CANCELED: | 472 | case CPU_UP_CANCELED: |
473 | if (!per_cpu(ksoftirqd, hotcpu)) | ||
474 | break; | ||
473 | /* Unbind so it can run. Fall thru. */ | 475 | /* Unbind so it can run. Fall thru. */ |
474 | kthread_bind(per_cpu(ksoftirqd, hotcpu), | 476 | kthread_bind(per_cpu(ksoftirqd, hotcpu), |
475 | any_online_cpu(cpu_online_map)); | 477 | any_online_cpu(cpu_online_map)); |
diff --git a/kernel/softlockup.c b/kernel/softlockup.c index 14c7faf02909..b5c3b94e01ce 100644 --- a/kernel/softlockup.c +++ b/kernel/softlockup.c | |||
@@ -36,7 +36,7 @@ static struct notifier_block panic_block = { | |||
36 | 36 | ||
37 | void touch_softlockup_watchdog(void) | 37 | void touch_softlockup_watchdog(void) |
38 | { | 38 | { |
39 | per_cpu(touch_timestamp, raw_smp_processor_id()) = jiffies; | 39 | __raw_get_cpu_var(touch_timestamp) = jiffies; |
40 | } | 40 | } |
41 | EXPORT_SYMBOL(touch_softlockup_watchdog); | 41 | EXPORT_SYMBOL(touch_softlockup_watchdog); |
42 | 42 | ||
@@ -127,6 +127,8 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
127 | break; | 127 | break; |
128 | #ifdef CONFIG_HOTPLUG_CPU | 128 | #ifdef CONFIG_HOTPLUG_CPU |
129 | case CPU_UP_CANCELED: | 129 | case CPU_UP_CANCELED: |
130 | if (!per_cpu(watchdog_task, hotcpu)) | ||
131 | break; | ||
130 | /* Unbind so it can run. Fall thru. */ | 132 | /* Unbind so it can run. Fall thru. */ |
131 | kthread_bind(per_cpu(watchdog_task, hotcpu), | 133 | kthread_bind(per_cpu(watchdog_task, hotcpu), |
132 | any_online_cpu(cpu_online_map)); | 134 | any_online_cpu(cpu_online_map)); |
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index dcfb5d731466..2c0aacc37c55 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c | |||
@@ -4,6 +4,7 @@ | |||
4 | #include <linux/cpu.h> | 4 | #include <linux/cpu.h> |
5 | #include <linux/err.h> | 5 | #include <linux/err.h> |
6 | #include <linux/syscalls.h> | 6 | #include <linux/syscalls.h> |
7 | #include <linux/kthread.h> | ||
7 | #include <asm/atomic.h> | 8 | #include <asm/atomic.h> |
8 | #include <asm/semaphore.h> | 9 | #include <asm/semaphore.h> |
9 | #include <asm/uaccess.h> | 10 | #include <asm/uaccess.h> |
@@ -25,13 +26,11 @@ static unsigned int stopmachine_num_threads; | |||
25 | static atomic_t stopmachine_thread_ack; | 26 | static atomic_t stopmachine_thread_ack; |
26 | static DECLARE_MUTEX(stopmachine_mutex); | 27 | static DECLARE_MUTEX(stopmachine_mutex); |
27 | 28 | ||
28 | static int stopmachine(void *cpu) | 29 | static int stopmachine(void *unused) |
29 | { | 30 | { |
30 | int irqs_disabled = 0; | 31 | int irqs_disabled = 0; |
31 | int prepared = 0; | 32 | int prepared = 0; |
32 | 33 | ||
33 | set_cpus_allowed(current, cpumask_of_cpu((int)(long)cpu)); | ||
34 | |||
35 | /* Ack: we are alive */ | 34 | /* Ack: we are alive */ |
36 | smp_mb(); /* Theoretically the ack = 0 might not be on this CPU yet. */ | 35 | smp_mb(); /* Theoretically the ack = 0 might not be on this CPU yet. */ |
37 | atomic_inc(&stopmachine_thread_ack); | 36 | atomic_inc(&stopmachine_thread_ack); |
@@ -85,7 +84,8 @@ static void stopmachine_set_state(enum stopmachine_state state) | |||
85 | 84 | ||
86 | static int stop_machine(void) | 85 | static int stop_machine(void) |
87 | { | 86 | { |
88 | int i, ret = 0; | 87 | int ret = 0; |
88 | unsigned int i; | ||
89 | struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; | 89 | struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; |
90 | 90 | ||
91 | /* One high-prio thread per cpu. We'll do this one. */ | 91 | /* One high-prio thread per cpu. We'll do this one. */ |
@@ -96,11 +96,16 @@ static int stop_machine(void) | |||
96 | stopmachine_state = STOPMACHINE_WAIT; | 96 | stopmachine_state = STOPMACHINE_WAIT; |
97 | 97 | ||
98 | for_each_online_cpu(i) { | 98 | for_each_online_cpu(i) { |
99 | struct task_struct *tsk; | ||
99 | if (i == raw_smp_processor_id()) | 100 | if (i == raw_smp_processor_id()) |
100 | continue; | 101 | continue; |
101 | ret = kernel_thread(stopmachine, (void *)(long)i,CLONE_KERNEL); | 102 | tsk = kthread_create(stopmachine, NULL, "stopmachine"); |
102 | if (ret < 0) | 103 | if (IS_ERR(tsk)) { |
104 | ret = PTR_ERR(tsk); | ||
103 | break; | 105 | break; |
106 | } | ||
107 | kthread_bind(tsk, i); | ||
108 | wake_up_process(tsk); | ||
104 | stopmachine_num_threads++; | 109 | stopmachine_num_threads++; |
105 | } | 110 | } |
106 | 111 | ||
diff --git a/kernel/sys.c b/kernel/sys.c index 0b6ec0e7936f..2d5179c67cec 100644 --- a/kernel/sys.c +++ b/kernel/sys.c | |||
@@ -13,7 +13,6 @@ | |||
13 | #include <linux/notifier.h> | 13 | #include <linux/notifier.h> |
14 | #include <linux/reboot.h> | 14 | #include <linux/reboot.h> |
15 | #include <linux/prctl.h> | 15 | #include <linux/prctl.h> |
16 | #include <linux/init.h> | ||
17 | #include <linux/highuid.h> | 16 | #include <linux/highuid.h> |
18 | #include <linux/fs.h> | 17 | #include <linux/fs.h> |
19 | #include <linux/kernel.h> | 18 | #include <linux/kernel.h> |
@@ -57,6 +56,12 @@ | |||
57 | #ifndef GET_FPEXC_CTL | 56 | #ifndef GET_FPEXC_CTL |
58 | # define GET_FPEXC_CTL(a,b) (-EINVAL) | 57 | # define GET_FPEXC_CTL(a,b) (-EINVAL) |
59 | #endif | 58 | #endif |
59 | #ifndef GET_ENDIAN | ||
60 | # define GET_ENDIAN(a,b) (-EINVAL) | ||
61 | #endif | ||
62 | #ifndef SET_ENDIAN | ||
63 | # define SET_ENDIAN(a,b) (-EINVAL) | ||
64 | #endif | ||
60 | 65 | ||
61 | /* | 66 | /* |
62 | * this is where the system-wide overflow UID and GID are defined, for | 67 | * this is where the system-wide overflow UID and GID are defined, for |
@@ -132,14 +137,15 @@ static int __kprobes notifier_call_chain(struct notifier_block **nl, | |||
132 | unsigned long val, void *v) | 137 | unsigned long val, void *v) |
133 | { | 138 | { |
134 | int ret = NOTIFY_DONE; | 139 | int ret = NOTIFY_DONE; |
135 | struct notifier_block *nb; | 140 | struct notifier_block *nb, *next_nb; |
136 | 141 | ||
137 | nb = rcu_dereference(*nl); | 142 | nb = rcu_dereference(*nl); |
138 | while (nb) { | 143 | while (nb) { |
144 | next_nb = rcu_dereference(nb->next); | ||
139 | ret = nb->notifier_call(nb, val, v); | 145 | ret = nb->notifier_call(nb, val, v); |
140 | if ((ret & NOTIFY_STOP_MASK) == NOTIFY_STOP_MASK) | 146 | if ((ret & NOTIFY_STOP_MASK) == NOTIFY_STOP_MASK) |
141 | break; | 147 | break; |
142 | nb = rcu_dereference(nb->next); | 148 | nb = next_nb; |
143 | } | 149 | } |
144 | return ret; | 150 | return ret; |
145 | } | 151 | } |
@@ -583,7 +589,7 @@ void emergency_restart(void) | |||
583 | } | 589 | } |
584 | EXPORT_SYMBOL_GPL(emergency_restart); | 590 | EXPORT_SYMBOL_GPL(emergency_restart); |
585 | 591 | ||
586 | void kernel_restart_prepare(char *cmd) | 592 | static void kernel_restart_prepare(char *cmd) |
587 | { | 593 | { |
588 | blocking_notifier_call_chain(&reboot_notifier_list, SYS_RESTART, cmd); | 594 | blocking_notifier_call_chain(&reboot_notifier_list, SYS_RESTART, cmd); |
589 | system_state = SYSTEM_RESTART; | 595 | system_state = SYSTEM_RESTART; |
@@ -617,7 +623,7 @@ EXPORT_SYMBOL_GPL(kernel_restart); | |||
617 | * Move into place and start executing a preloaded standalone | 623 | * Move into place and start executing a preloaded standalone |
618 | * executable. If nothing was preloaded return an error. | 624 | * executable. If nothing was preloaded return an error. |
619 | */ | 625 | */ |
620 | void kernel_kexec(void) | 626 | static void kernel_kexec(void) |
621 | { | 627 | { |
622 | #ifdef CONFIG_KEXEC | 628 | #ifdef CONFIG_KEXEC |
623 | struct kimage *image; | 629 | struct kimage *image; |
@@ -631,7 +637,6 @@ void kernel_kexec(void) | |||
631 | machine_kexec(image); | 637 | machine_kexec(image); |
632 | #endif | 638 | #endif |
633 | } | 639 | } |
634 | EXPORT_SYMBOL_GPL(kernel_kexec); | ||
635 | 640 | ||
636 | void kernel_shutdown_prepare(enum system_states state) | 641 | void kernel_shutdown_prepare(enum system_states state) |
637 | { | 642 | { |
@@ -1860,23 +1865,20 @@ out: | |||
1860 | * fields when reaping, so a sample either gets all the additions of a | 1865 | * fields when reaping, so a sample either gets all the additions of a |
1861 | * given child after it's reaped, or none so this sample is before reaping. | 1866 | * given child after it's reaped, or none so this sample is before reaping. |
1862 | * | 1867 | * |
1863 | * tasklist_lock locking optimisation: | 1868 | * Locking: |
1864 | * If we are current and single threaded, we do not need to take the tasklist | 1869 | * We need to take the siglock for CHILDEREN, SELF and BOTH |
1865 | * lock or the siglock. No one else can take our signal_struct away, | 1870 | * for the cases current multithreaded, non-current single threaded |
1866 | * no one else can reap the children to update signal->c* counters, and | 1871 | * non-current multithreaded. Thread traversal is now safe with |
1867 | * no one else can race with the signal-> fields. | 1872 | * the siglock held. |
1868 | * If we do not take the tasklist_lock, the signal-> fields could be read | 1873 | * Strictly speaking, we donot need to take the siglock if we are current and |
1869 | * out of order while another thread was just exiting. So we place a | 1874 | * single threaded, as no one else can take our signal_struct away, no one |
1870 | * read memory barrier when we avoid the lock. On the writer side, | 1875 | * else can reap the children to update signal->c* counters, and no one else |
1871 | * write memory barrier is implied in __exit_signal as __exit_signal releases | 1876 | * can race with the signal-> fields. If we do not take any lock, the |
1872 | * the siglock spinlock after updating the signal-> fields. | 1877 | * signal-> fields could be read out of order while another thread was just |
1873 | * | 1878 | * exiting. So we should place a read memory barrier when we avoid the lock. |
1874 | * We don't really need the siglock when we access the non c* fields | 1879 | * On the writer side, write memory barrier is implied in __exit_signal |
1875 | * of the signal_struct (for RUSAGE_SELF) even in multithreaded | 1880 | * as __exit_signal releases the siglock spinlock after updating the signal-> |
1876 | * case, since we take the tasklist lock for read and the non c* signal-> | 1881 | * fields. But we don't do this yet to keep things simple. |
1877 | * fields are updated only in __exit_signal, which is called with | ||
1878 | * tasklist_lock taken for write, hence these two threads cannot execute | ||
1879 | * concurrently. | ||
1880 | * | 1882 | * |
1881 | */ | 1883 | */ |
1882 | 1884 | ||
@@ -1885,35 +1887,25 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r) | |||
1885 | struct task_struct *t; | 1887 | struct task_struct *t; |
1886 | unsigned long flags; | 1888 | unsigned long flags; |
1887 | cputime_t utime, stime; | 1889 | cputime_t utime, stime; |
1888 | int need_lock = 0; | ||
1889 | 1890 | ||
1890 | memset((char *) r, 0, sizeof *r); | 1891 | memset((char *) r, 0, sizeof *r); |
1891 | utime = stime = cputime_zero; | 1892 | utime = stime = cputime_zero; |
1892 | 1893 | ||
1893 | if (p != current || !thread_group_empty(p)) | 1894 | rcu_read_lock(); |
1894 | need_lock = 1; | 1895 | if (!lock_task_sighand(p, &flags)) { |
1895 | 1896 | rcu_read_unlock(); | |
1896 | if (need_lock) { | 1897 | return; |
1897 | read_lock(&tasklist_lock); | 1898 | } |
1898 | if (unlikely(!p->signal)) { | ||
1899 | read_unlock(&tasklist_lock); | ||
1900 | return; | ||
1901 | } | ||
1902 | } else | ||
1903 | /* See locking comments above */ | ||
1904 | smp_rmb(); | ||
1905 | 1899 | ||
1906 | switch (who) { | 1900 | switch (who) { |
1907 | case RUSAGE_BOTH: | 1901 | case RUSAGE_BOTH: |
1908 | case RUSAGE_CHILDREN: | 1902 | case RUSAGE_CHILDREN: |
1909 | spin_lock_irqsave(&p->sighand->siglock, flags); | ||
1910 | utime = p->signal->cutime; | 1903 | utime = p->signal->cutime; |
1911 | stime = p->signal->cstime; | 1904 | stime = p->signal->cstime; |
1912 | r->ru_nvcsw = p->signal->cnvcsw; | 1905 | r->ru_nvcsw = p->signal->cnvcsw; |
1913 | r->ru_nivcsw = p->signal->cnivcsw; | 1906 | r->ru_nivcsw = p->signal->cnivcsw; |
1914 | r->ru_minflt = p->signal->cmin_flt; | 1907 | r->ru_minflt = p->signal->cmin_flt; |
1915 | r->ru_majflt = p->signal->cmaj_flt; | 1908 | r->ru_majflt = p->signal->cmaj_flt; |
1916 | spin_unlock_irqrestore(&p->sighand->siglock, flags); | ||
1917 | 1909 | ||
1918 | if (who == RUSAGE_CHILDREN) | 1910 | if (who == RUSAGE_CHILDREN) |
1919 | break; | 1911 | break; |
@@ -1941,8 +1933,9 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r) | |||
1941 | BUG(); | 1933 | BUG(); |
1942 | } | 1934 | } |
1943 | 1935 | ||
1944 | if (need_lock) | 1936 | unlock_task_sighand(p, &flags); |
1945 | read_unlock(&tasklist_lock); | 1937 | rcu_read_unlock(); |
1938 | |||
1946 | cputime_to_timeval(utime, &r->ru_utime); | 1939 | cputime_to_timeval(utime, &r->ru_utime); |
1947 | cputime_to_timeval(stime, &r->ru_stime); | 1940 | cputime_to_timeval(stime, &r->ru_stime); |
1948 | } | 1941 | } |
@@ -2057,6 +2050,13 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3, | |||
2057 | return -EFAULT; | 2050 | return -EFAULT; |
2058 | return 0; | 2051 | return 0; |
2059 | } | 2052 | } |
2053 | case PR_GET_ENDIAN: | ||
2054 | error = GET_ENDIAN(current, arg2); | ||
2055 | break; | ||
2056 | case PR_SET_ENDIAN: | ||
2057 | error = SET_ENDIAN(current, arg2); | ||
2058 | break; | ||
2059 | |||
2060 | default: | 2060 | default: |
2061 | error = -EINVAL; | 2061 | error = -EINVAL; |
2062 | break; | 2062 | break; |
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index 5433195040f1..6991bece67e8 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c | |||
@@ -87,6 +87,7 @@ cond_syscall(sys_inotify_init); | |||
87 | cond_syscall(sys_inotify_add_watch); | 87 | cond_syscall(sys_inotify_add_watch); |
88 | cond_syscall(sys_inotify_rm_watch); | 88 | cond_syscall(sys_inotify_rm_watch); |
89 | cond_syscall(sys_migrate_pages); | 89 | cond_syscall(sys_migrate_pages); |
90 | cond_syscall(sys_move_pages); | ||
90 | cond_syscall(sys_chown16); | 91 | cond_syscall(sys_chown16); |
91 | cond_syscall(sys_fchown16); | 92 | cond_syscall(sys_fchown16); |
92 | cond_syscall(sys_getegid16); | 93 | cond_syscall(sys_getegid16); |
@@ -132,3 +133,4 @@ cond_syscall(sys_mincore); | |||
132 | cond_syscall(sys_madvise); | 133 | cond_syscall(sys_madvise); |
133 | cond_syscall(sys_mremap); | 134 | cond_syscall(sys_mremap); |
134 | cond_syscall(sys_remap_file_pages); | 135 | cond_syscall(sys_remap_file_pages); |
136 | cond_syscall(compat_sys_move_pages); | ||
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index e82726faeeff..f1a4eb1a655e 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -59,6 +59,7 @@ extern int proc_nr_files(ctl_table *table, int write, struct file *filp, | |||
59 | extern int C_A_D; | 59 | extern int C_A_D; |
60 | extern int sysctl_overcommit_memory; | 60 | extern int sysctl_overcommit_memory; |
61 | extern int sysctl_overcommit_ratio; | 61 | extern int sysctl_overcommit_ratio; |
62 | extern int sysctl_panic_on_oom; | ||
62 | extern int max_threads; | 63 | extern int max_threads; |
63 | extern int sysrq_enabled; | 64 | extern int sysrq_enabled; |
64 | extern int core_uses_pid; | 65 | extern int core_uses_pid; |
@@ -72,6 +73,7 @@ extern int printk_ratelimit_burst; | |||
72 | extern int pid_max_min, pid_max_max; | 73 | extern int pid_max_min, pid_max_max; |
73 | extern int sysctl_drop_caches; | 74 | extern int sysctl_drop_caches; |
74 | extern int percpu_pagelist_fraction; | 75 | extern int percpu_pagelist_fraction; |
76 | extern int compat_log; | ||
75 | 77 | ||
76 | #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) | 78 | #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) |
77 | int unknown_nmi_panic; | 79 | int unknown_nmi_panic; |
@@ -142,7 +144,6 @@ static struct ctl_table_header root_table_header = | |||
142 | 144 | ||
143 | static ctl_table kern_table[]; | 145 | static ctl_table kern_table[]; |
144 | static ctl_table vm_table[]; | 146 | static ctl_table vm_table[]; |
145 | static ctl_table proc_table[]; | ||
146 | static ctl_table fs_table[]; | 147 | static ctl_table fs_table[]; |
147 | static ctl_table debug_table[]; | 148 | static ctl_table debug_table[]; |
148 | static ctl_table dev_table[]; | 149 | static ctl_table dev_table[]; |
@@ -150,7 +151,7 @@ extern ctl_table random_table[]; | |||
150 | #ifdef CONFIG_UNIX98_PTYS | 151 | #ifdef CONFIG_UNIX98_PTYS |
151 | extern ctl_table pty_table[]; | 152 | extern ctl_table pty_table[]; |
152 | #endif | 153 | #endif |
153 | #ifdef CONFIG_INOTIFY | 154 | #ifdef CONFIG_INOTIFY_USER |
154 | extern ctl_table inotify_table[]; | 155 | extern ctl_table inotify_table[]; |
155 | #endif | 156 | #endif |
156 | 157 | ||
@@ -202,12 +203,6 @@ static ctl_table root_table[] = { | |||
202 | }, | 203 | }, |
203 | #endif | 204 | #endif |
204 | { | 205 | { |
205 | .ctl_name = CTL_PROC, | ||
206 | .procname = "proc", | ||
207 | .mode = 0555, | ||
208 | .child = proc_table, | ||
209 | }, | ||
210 | { | ||
211 | .ctl_name = CTL_FS, | 206 | .ctl_name = CTL_FS, |
212 | .procname = "fs", | 207 | .procname = "fs", |
213 | .mode = 0555, | 208 | .mode = 0555, |
@@ -398,7 +393,7 @@ static ctl_table kern_table[] = { | |||
398 | .strategy = &sysctl_string, | 393 | .strategy = &sysctl_string, |
399 | }, | 394 | }, |
400 | #endif | 395 | #endif |
401 | #ifdef CONFIG_HOTPLUG | 396 | #if defined(CONFIG_HOTPLUG) && defined(CONFIG_NET) |
402 | { | 397 | { |
403 | .ctl_name = KERN_HOTPLUG, | 398 | .ctl_name = KERN_HOTPLUG, |
404 | .procname = "hotplug", | 399 | .procname = "hotplug", |
@@ -683,6 +678,16 @@ static ctl_table kern_table[] = { | |||
683 | .proc_handler = &proc_dointvec, | 678 | .proc_handler = &proc_dointvec, |
684 | }, | 679 | }, |
685 | #endif | 680 | #endif |
681 | #ifdef CONFIG_COMPAT | ||
682 | { | ||
683 | .ctl_name = KERN_COMPAT_LOG, | ||
684 | .procname = "compat-log", | ||
685 | .data = &compat_log, | ||
686 | .maxlen = sizeof (int), | ||
687 | .mode = 0644, | ||
688 | .proc_handler = &proc_dointvec, | ||
689 | }, | ||
690 | #endif | ||
686 | { .ctl_name = 0 } | 691 | { .ctl_name = 0 } |
687 | }; | 692 | }; |
688 | 693 | ||
@@ -702,6 +707,14 @@ static ctl_table vm_table[] = { | |||
702 | .proc_handler = &proc_dointvec, | 707 | .proc_handler = &proc_dointvec, |
703 | }, | 708 | }, |
704 | { | 709 | { |
710 | .ctl_name = VM_PANIC_ON_OOM, | ||
711 | .procname = "panic_on_oom", | ||
712 | .data = &sysctl_panic_on_oom, | ||
713 | .maxlen = sizeof(sysctl_panic_on_oom), | ||
714 | .mode = 0644, | ||
715 | .proc_handler = &proc_dointvec, | ||
716 | }, | ||
717 | { | ||
705 | .ctl_name = VM_OVERCOMMIT_RATIO, | 718 | .ctl_name = VM_OVERCOMMIT_RATIO, |
706 | .procname = "overcommit_ratio", | 719 | .procname = "overcommit_ratio", |
707 | .data = &sysctl_overcommit_ratio, | 720 | .data = &sysctl_overcommit_ratio, |
@@ -918,10 +931,6 @@ static ctl_table vm_table[] = { | |||
918 | { .ctl_name = 0 } | 931 | { .ctl_name = 0 } |
919 | }; | 932 | }; |
920 | 933 | ||
921 | static ctl_table proc_table[] = { | ||
922 | { .ctl_name = 0 } | ||
923 | }; | ||
924 | |||
925 | static ctl_table fs_table[] = { | 934 | static ctl_table fs_table[] = { |
926 | { | 935 | { |
927 | .ctl_name = FS_NRINODE, | 936 | .ctl_name = FS_NRINODE, |
@@ -1028,7 +1037,7 @@ static ctl_table fs_table[] = { | |||
1028 | .mode = 0644, | 1037 | .mode = 0644, |
1029 | .proc_handler = &proc_doulongvec_minmax, | 1038 | .proc_handler = &proc_doulongvec_minmax, |
1030 | }, | 1039 | }, |
1031 | #ifdef CONFIG_INOTIFY | 1040 | #ifdef CONFIG_INOTIFY_USER |
1032 | { | 1041 | { |
1033 | .ctl_name = FS_INOTIFY, | 1042 | .ctl_name = FS_INOTIFY, |
1034 | .procname = "inotify", | 1043 | .procname = "inotify", |
diff --git a/kernel/time.c b/kernel/time.c index b00ddc71cedb..5bd489747643 100644 --- a/kernel/time.c +++ b/kernel/time.c | |||
@@ -523,6 +523,7 @@ EXPORT_SYMBOL(do_gettimeofday); | |||
523 | 523 | ||
524 | 524 | ||
525 | #else | 525 | #else |
526 | #ifndef CONFIG_GENERIC_TIME | ||
526 | /* | 527 | /* |
527 | * Simulate gettimeofday using do_gettimeofday which only allows a timeval | 528 | * Simulate gettimeofday using do_gettimeofday which only allows a timeval |
528 | * and therefore only yields usec accuracy | 529 | * and therefore only yields usec accuracy |
@@ -537,6 +538,7 @@ void getnstimeofday(struct timespec *tv) | |||
537 | } | 538 | } |
538 | EXPORT_SYMBOL_GPL(getnstimeofday); | 539 | EXPORT_SYMBOL_GPL(getnstimeofday); |
539 | #endif | 540 | #endif |
541 | #endif | ||
540 | 542 | ||
541 | /* Converts Gregorian date to seconds since 1970-01-01 00:00:00. | 543 | /* Converts Gregorian date to seconds since 1970-01-01 00:00:00. |
542 | * Assumes input in normal date format, i.e. 1980-12-31 23:59:59 | 544 | * Assumes input in normal date format, i.e. 1980-12-31 23:59:59 |
diff --git a/kernel/time/Makefile b/kernel/time/Makefile new file mode 100644 index 000000000000..e1dfd8e86cce --- /dev/null +++ b/kernel/time/Makefile | |||
@@ -0,0 +1 @@ | |||
obj-y += clocksource.o jiffies.o | |||
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c new file mode 100644 index 000000000000..74eca5939bd9 --- /dev/null +++ b/kernel/time/clocksource.c | |||
@@ -0,0 +1,349 @@ | |||
1 | /* | ||
2 | * linux/kernel/time/clocksource.c | ||
3 | * | ||
4 | * This file contains the functions which manage clocksource drivers. | ||
5 | * | ||
6 | * Copyright (C) 2004, 2005 IBM, John Stultz (johnstul@us.ibm.com) | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify | ||
9 | * it under the terms of the GNU General Public License as published by | ||
10 | * the Free Software Foundation; either version 2 of the License, or | ||
11 | * (at your option) any later version. | ||
12 | * | ||
13 | * This program is distributed in the hope that it will be useful, | ||
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
16 | * GNU General Public License for more details. | ||
17 | * | ||
18 | * You should have received a copy of the GNU General Public License | ||
19 | * along with this program; if not, write to the Free Software | ||
20 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
21 | * | ||
22 | * TODO WishList: | ||
23 | * o Allow clocksource drivers to be unregistered | ||
24 | * o get rid of clocksource_jiffies extern | ||
25 | */ | ||
26 | |||
27 | #include <linux/clocksource.h> | ||
28 | #include <linux/sysdev.h> | ||
29 | #include <linux/init.h> | ||
30 | #include <linux/module.h> | ||
31 | |||
32 | /* XXX - Would like a better way for initializing curr_clocksource */ | ||
33 | extern struct clocksource clocksource_jiffies; | ||
34 | |||
35 | /*[Clocksource internal variables]--------- | ||
36 | * curr_clocksource: | ||
37 | * currently selected clocksource. Initialized to clocksource_jiffies. | ||
38 | * next_clocksource: | ||
39 | * pending next selected clocksource. | ||
40 | * clocksource_list: | ||
41 | * linked list with the registered clocksources | ||
42 | * clocksource_lock: | ||
43 | * protects manipulations to curr_clocksource and next_clocksource | ||
44 | * and the clocksource_list | ||
45 | * override_name: | ||
46 | * Name of the user-specified clocksource. | ||
47 | */ | ||
48 | static struct clocksource *curr_clocksource = &clocksource_jiffies; | ||
49 | static struct clocksource *next_clocksource; | ||
50 | static LIST_HEAD(clocksource_list); | ||
51 | static DEFINE_SPINLOCK(clocksource_lock); | ||
52 | static char override_name[32]; | ||
53 | static int finished_booting; | ||
54 | |||
55 | /* clocksource_done_booting - Called near the end of bootup | ||
56 | * | ||
57 | * Hack to avoid lots of clocksource churn at boot time | ||
58 | */ | ||
59 | static int __init clocksource_done_booting(void) | ||
60 | { | ||
61 | finished_booting = 1; | ||
62 | return 0; | ||
63 | } | ||
64 | |||
65 | late_initcall(clocksource_done_booting); | ||
66 | |||
67 | /** | ||
68 | * clocksource_get_next - Returns the selected clocksource | ||
69 | * | ||
70 | */ | ||
71 | struct clocksource *clocksource_get_next(void) | ||
72 | { | ||
73 | unsigned long flags; | ||
74 | |||
75 | spin_lock_irqsave(&clocksource_lock, flags); | ||
76 | if (next_clocksource && finished_booting) { | ||
77 | curr_clocksource = next_clocksource; | ||
78 | next_clocksource = NULL; | ||
79 | } | ||
80 | spin_unlock_irqrestore(&clocksource_lock, flags); | ||
81 | |||
82 | return curr_clocksource; | ||
83 | } | ||
84 | |||
85 | /** | ||
86 | * select_clocksource - Finds the best registered clocksource. | ||
87 | * | ||
88 | * Private function. Must hold clocksource_lock when called. | ||
89 | * | ||
90 | * Looks through the list of registered clocksources, returning | ||
91 | * the one with the highest rating value. If there is a clocksource | ||
92 | * name that matches the override string, it returns that clocksource. | ||
93 | */ | ||
94 | static struct clocksource *select_clocksource(void) | ||
95 | { | ||
96 | struct clocksource *best = NULL; | ||
97 | struct list_head *tmp; | ||
98 | |||
99 | list_for_each(tmp, &clocksource_list) { | ||
100 | struct clocksource *src; | ||
101 | |||
102 | src = list_entry(tmp, struct clocksource, list); | ||
103 | if (!best) | ||
104 | best = src; | ||
105 | |||
106 | /* check for override: */ | ||
107 | if (strlen(src->name) == strlen(override_name) && | ||
108 | !strcmp(src->name, override_name)) { | ||
109 | best = src; | ||
110 | break; | ||
111 | } | ||
112 | /* pick the highest rating: */ | ||
113 | if (src->rating > best->rating) | ||
114 | best = src; | ||
115 | } | ||
116 | |||
117 | return best; | ||
118 | } | ||
119 | |||
120 | /** | ||
121 | * is_registered_source - Checks if clocksource is registered | ||
122 | * @c: pointer to a clocksource | ||
123 | * | ||
124 | * Private helper function. Must hold clocksource_lock when called. | ||
125 | * | ||
126 | * Returns one if the clocksource is already registered, zero otherwise. | ||
127 | */ | ||
128 | static int is_registered_source(struct clocksource *c) | ||
129 | { | ||
130 | int len = strlen(c->name); | ||
131 | struct list_head *tmp; | ||
132 | |||
133 | list_for_each(tmp, &clocksource_list) { | ||
134 | struct clocksource *src; | ||
135 | |||
136 | src = list_entry(tmp, struct clocksource, list); | ||
137 | if (strlen(src->name) == len && !strcmp(src->name, c->name)) | ||
138 | return 1; | ||
139 | } | ||
140 | |||
141 | return 0; | ||
142 | } | ||
143 | |||
144 | /** | ||
145 | * clocksource_register - Used to install new clocksources | ||
146 | * @t: clocksource to be registered | ||
147 | * | ||
148 | * Returns -EBUSY if registration fails, zero otherwise. | ||
149 | */ | ||
150 | int clocksource_register(struct clocksource *c) | ||
151 | { | ||
152 | int ret = 0; | ||
153 | unsigned long flags; | ||
154 | |||
155 | spin_lock_irqsave(&clocksource_lock, flags); | ||
156 | /* check if clocksource is already registered */ | ||
157 | if (is_registered_source(c)) { | ||
158 | printk("register_clocksource: Cannot register %s. " | ||
159 | "Already registered!", c->name); | ||
160 | ret = -EBUSY; | ||
161 | } else { | ||
162 | /* register it */ | ||
163 | list_add(&c->list, &clocksource_list); | ||
164 | /* scan the registered clocksources, and pick the best one */ | ||
165 | next_clocksource = select_clocksource(); | ||
166 | } | ||
167 | spin_unlock_irqrestore(&clocksource_lock, flags); | ||
168 | return ret; | ||
169 | } | ||
170 | EXPORT_SYMBOL(clocksource_register); | ||
171 | |||
172 | /** | ||
173 | * clocksource_reselect - Rescan list for next clocksource | ||
174 | * | ||
175 | * A quick helper function to be used if a clocksource changes its | ||
176 | * rating. Forces the clocksource list to be re-scanned for the best | ||
177 | * clocksource. | ||
178 | */ | ||
179 | void clocksource_reselect(void) | ||
180 | { | ||
181 | unsigned long flags; | ||
182 | |||
183 | spin_lock_irqsave(&clocksource_lock, flags); | ||
184 | next_clocksource = select_clocksource(); | ||
185 | spin_unlock_irqrestore(&clocksource_lock, flags); | ||
186 | } | ||
187 | EXPORT_SYMBOL(clocksource_reselect); | ||
188 | |||
189 | /** | ||
190 | * sysfs_show_current_clocksources - sysfs interface for current clocksource | ||
191 | * @dev: unused | ||
192 | * @buf: char buffer to be filled with clocksource list | ||
193 | * | ||
194 | * Provides sysfs interface for listing current clocksource. | ||
195 | */ | ||
196 | static ssize_t | ||
197 | sysfs_show_current_clocksources(struct sys_device *dev, char *buf) | ||
198 | { | ||
199 | char *curr = buf; | ||
200 | |||
201 | spin_lock_irq(&clocksource_lock); | ||
202 | curr += sprintf(curr, "%s ", curr_clocksource->name); | ||
203 | spin_unlock_irq(&clocksource_lock); | ||
204 | |||
205 | curr += sprintf(curr, "\n"); | ||
206 | |||
207 | return curr - buf; | ||
208 | } | ||
209 | |||
210 | /** | ||
211 | * sysfs_override_clocksource - interface for manually overriding clocksource | ||
212 | * @dev: unused | ||
213 | * @buf: name of override clocksource | ||
214 | * @count: length of buffer | ||
215 | * | ||
216 | * Takes input from sysfs interface for manually overriding the default | ||
217 | * clocksource selction. | ||
218 | */ | ||
219 | static ssize_t sysfs_override_clocksource(struct sys_device *dev, | ||
220 | const char *buf, size_t count) | ||
221 | { | ||
222 | size_t ret = count; | ||
223 | /* strings from sysfs write are not 0 terminated! */ | ||
224 | if (count >= sizeof(override_name)) | ||
225 | return -EINVAL; | ||
226 | |||
227 | /* strip of \n: */ | ||
228 | if (buf[count-1] == '\n') | ||
229 | count--; | ||
230 | if (count < 1) | ||
231 | return -EINVAL; | ||
232 | |||
233 | spin_lock_irq(&clocksource_lock); | ||
234 | |||
235 | /* copy the name given: */ | ||
236 | memcpy(override_name, buf, count); | ||
237 | override_name[count] = 0; | ||
238 | |||
239 | /* try to select it: */ | ||
240 | next_clocksource = select_clocksource(); | ||
241 | |||
242 | spin_unlock_irq(&clocksource_lock); | ||
243 | |||
244 | return ret; | ||
245 | } | ||
246 | |||
247 | /** | ||
248 | * sysfs_show_available_clocksources - sysfs interface for listing clocksource | ||
249 | * @dev: unused | ||
250 | * @buf: char buffer to be filled with clocksource list | ||
251 | * | ||
252 | * Provides sysfs interface for listing registered clocksources | ||
253 | */ | ||
254 | static ssize_t | ||
255 | sysfs_show_available_clocksources(struct sys_device *dev, char *buf) | ||
256 | { | ||
257 | struct list_head *tmp; | ||
258 | char *curr = buf; | ||
259 | |||
260 | spin_lock_irq(&clocksource_lock); | ||
261 | list_for_each(tmp, &clocksource_list) { | ||
262 | struct clocksource *src; | ||
263 | |||
264 | src = list_entry(tmp, struct clocksource, list); | ||
265 | curr += sprintf(curr, "%s ", src->name); | ||
266 | } | ||
267 | spin_unlock_irq(&clocksource_lock); | ||
268 | |||
269 | curr += sprintf(curr, "\n"); | ||
270 | |||
271 | return curr - buf; | ||
272 | } | ||
273 | |||
274 | /* | ||
275 | * Sysfs setup bits: | ||
276 | */ | ||
277 | static SYSDEV_ATTR(current_clocksource, 0600, sysfs_show_current_clocksources, | ||
278 | sysfs_override_clocksource); | ||
279 | |||
280 | static SYSDEV_ATTR(available_clocksource, 0600, | ||
281 | sysfs_show_available_clocksources, NULL); | ||
282 | |||
283 | static struct sysdev_class clocksource_sysclass = { | ||
284 | set_kset_name("clocksource"), | ||
285 | }; | ||
286 | |||
287 | static struct sys_device device_clocksource = { | ||
288 | .id = 0, | ||
289 | .cls = &clocksource_sysclass, | ||
290 | }; | ||
291 | |||
292 | static int __init init_clocksource_sysfs(void) | ||
293 | { | ||
294 | int error = sysdev_class_register(&clocksource_sysclass); | ||
295 | |||
296 | if (!error) | ||
297 | error = sysdev_register(&device_clocksource); | ||
298 | if (!error) | ||
299 | error = sysdev_create_file( | ||
300 | &device_clocksource, | ||
301 | &attr_current_clocksource); | ||
302 | if (!error) | ||
303 | error = sysdev_create_file( | ||
304 | &device_clocksource, | ||
305 | &attr_available_clocksource); | ||
306 | return error; | ||
307 | } | ||
308 | |||
309 | device_initcall(init_clocksource_sysfs); | ||
310 | |||
311 | /** | ||
312 | * boot_override_clocksource - boot clock override | ||
313 | * @str: override name | ||
314 | * | ||
315 | * Takes a clocksource= boot argument and uses it | ||
316 | * as the clocksource override name. | ||
317 | */ | ||
318 | static int __init boot_override_clocksource(char* str) | ||
319 | { | ||
320 | unsigned long flags; | ||
321 | spin_lock_irqsave(&clocksource_lock, flags); | ||
322 | if (str) | ||
323 | strlcpy(override_name, str, sizeof(override_name)); | ||
324 | spin_unlock_irqrestore(&clocksource_lock, flags); | ||
325 | return 1; | ||
326 | } | ||
327 | |||
328 | __setup("clocksource=", boot_override_clocksource); | ||
329 | |||
330 | /** | ||
331 | * boot_override_clock - Compatibility layer for deprecated boot option | ||
332 | * @str: override name | ||
333 | * | ||
334 | * DEPRECATED! Takes a clock= boot argument and uses it | ||
335 | * as the clocksource override name | ||
336 | */ | ||
337 | static int __init boot_override_clock(char* str) | ||
338 | { | ||
339 | if (!strcmp(str, "pmtmr")) { | ||
340 | printk("Warning: clock=pmtmr is deprecated. " | ||
341 | "Use clocksource=acpi_pm.\n"); | ||
342 | return boot_override_clocksource("acpi_pm"); | ||
343 | } | ||
344 | printk("Warning! clock= boot option is deprecated. " | ||
345 | "Use clocksource=xyz\n"); | ||
346 | return boot_override_clocksource(str); | ||
347 | } | ||
348 | |||
349 | __setup("clock=", boot_override_clock); | ||
diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c new file mode 100644 index 000000000000..126bb30c4afe --- /dev/null +++ b/kernel/time/jiffies.c | |||
@@ -0,0 +1,73 @@ | |||
1 | /*********************************************************************** | ||
2 | * linux/kernel/time/jiffies.c | ||
3 | * | ||
4 | * This file contains the jiffies based clocksource. | ||
5 | * | ||
6 | * Copyright (C) 2004, 2005 IBM, John Stultz (johnstul@us.ibm.com) | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify | ||
9 | * it under the terms of the GNU General Public License as published by | ||
10 | * the Free Software Foundation; either version 2 of the License, or | ||
11 | * (at your option) any later version. | ||
12 | * | ||
13 | * This program is distributed in the hope that it will be useful, | ||
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
16 | * GNU General Public License for more details. | ||
17 | * | ||
18 | * You should have received a copy of the GNU General Public License | ||
19 | * along with this program; if not, write to the Free Software | ||
20 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
21 | * | ||
22 | ************************************************************************/ | ||
23 | #include <linux/clocksource.h> | ||
24 | #include <linux/jiffies.h> | ||
25 | #include <linux/init.h> | ||
26 | |||
27 | /* The Jiffies based clocksource is the lowest common | ||
28 | * denominator clock source which should function on | ||
29 | * all systems. It has the same coarse resolution as | ||
30 | * the timer interrupt frequency HZ and it suffers | ||
31 | * inaccuracies caused by missed or lost timer | ||
32 | * interrupts and the inability for the timer | ||
33 | * interrupt hardware to accuratly tick at the | ||
34 | * requested HZ value. It is also not reccomended | ||
35 | * for "tick-less" systems. | ||
36 | */ | ||
37 | #define NSEC_PER_JIFFY ((u32)((((u64)NSEC_PER_SEC)<<8)/ACTHZ)) | ||
38 | |||
39 | /* Since jiffies uses a simple NSEC_PER_JIFFY multiplier | ||
40 | * conversion, the .shift value could be zero. However | ||
41 | * this would make NTP adjustments impossible as they are | ||
42 | * in units of 1/2^.shift. Thus we use JIFFIES_SHIFT to | ||
43 | * shift both the nominator and denominator the same | ||
44 | * amount, and give ntp adjustments in units of 1/2^8 | ||
45 | * | ||
46 | * The value 8 is somewhat carefully chosen, as anything | ||
47 | * larger can result in overflows. NSEC_PER_JIFFY grows as | ||
48 | * HZ shrinks, so values greater then 8 overflow 32bits when | ||
49 | * HZ=100. | ||
50 | */ | ||
51 | #define JIFFIES_SHIFT 8 | ||
52 | |||
53 | static cycle_t jiffies_read(void) | ||
54 | { | ||
55 | return (cycle_t) jiffies; | ||
56 | } | ||
57 | |||
58 | struct clocksource clocksource_jiffies = { | ||
59 | .name = "jiffies", | ||
60 | .rating = 0, /* lowest rating*/ | ||
61 | .read = jiffies_read, | ||
62 | .mask = 0xffffffff, /*32bits*/ | ||
63 | .mult = NSEC_PER_JIFFY << JIFFIES_SHIFT, /* details above */ | ||
64 | .shift = JIFFIES_SHIFT, | ||
65 | .is_continuous = 0, /* tick based, not free running */ | ||
66 | }; | ||
67 | |||
68 | static int __init init_jiffies_clocksource(void) | ||
69 | { | ||
70 | return clocksource_register(&clocksource_jiffies); | ||
71 | } | ||
72 | |||
73 | module_init(init_jiffies_clocksource); | ||
diff --git a/kernel/timer.c b/kernel/timer.c index 9e49deed468c..5bb6b7976eec 100644 --- a/kernel/timer.c +++ b/kernel/timer.c | |||
@@ -146,7 +146,7 @@ static void internal_add_timer(tvec_base_t *base, struct timer_list *timer) | |||
146 | void fastcall init_timer(struct timer_list *timer) | 146 | void fastcall init_timer(struct timer_list *timer) |
147 | { | 147 | { |
148 | timer->entry.next = NULL; | 148 | timer->entry.next = NULL; |
149 | timer->base = per_cpu(tvec_bases, raw_smp_processor_id()); | 149 | timer->base = __raw_get_cpu_var(tvec_bases); |
150 | } | 150 | } |
151 | EXPORT_SYMBOL(init_timer); | 151 | EXPORT_SYMBOL(init_timer); |
152 | 152 | ||
@@ -383,23 +383,19 @@ EXPORT_SYMBOL(del_timer_sync); | |||
383 | static int cascade(tvec_base_t *base, tvec_t *tv, int index) | 383 | static int cascade(tvec_base_t *base, tvec_t *tv, int index) |
384 | { | 384 | { |
385 | /* cascade all the timers from tv up one level */ | 385 | /* cascade all the timers from tv up one level */ |
386 | struct list_head *head, *curr; | 386 | struct timer_list *timer, *tmp; |
387 | struct list_head tv_list; | ||
388 | |||
389 | list_replace_init(tv->vec + index, &tv_list); | ||
387 | 390 | ||
388 | head = tv->vec + index; | ||
389 | curr = head->next; | ||
390 | /* | 391 | /* |
391 | * We are removing _all_ timers from the list, so we don't have to | 392 | * We are removing _all_ timers from the list, so we |
392 | * detach them individually, just clear the list afterwards. | 393 | * don't have to detach them individually. |
393 | */ | 394 | */ |
394 | while (curr != head) { | 395 | list_for_each_entry_safe(timer, tmp, &tv_list, entry) { |
395 | struct timer_list *tmp; | 396 | BUG_ON(timer->base != base); |
396 | 397 | internal_add_timer(base, timer); | |
397 | tmp = list_entry(curr, struct timer_list, entry); | ||
398 | BUG_ON(tmp->base != base); | ||
399 | curr = curr->next; | ||
400 | internal_add_timer(base, tmp); | ||
401 | } | 398 | } |
402 | INIT_LIST_HEAD(head); | ||
403 | 399 | ||
404 | return index; | 400 | return index; |
405 | } | 401 | } |
@@ -419,10 +415,10 @@ static inline void __run_timers(tvec_base_t *base) | |||
419 | 415 | ||
420 | spin_lock_irq(&base->lock); | 416 | spin_lock_irq(&base->lock); |
421 | while (time_after_eq(jiffies, base->timer_jiffies)) { | 417 | while (time_after_eq(jiffies, base->timer_jiffies)) { |
422 | struct list_head work_list = LIST_HEAD_INIT(work_list); | 418 | struct list_head work_list; |
423 | struct list_head *head = &work_list; | 419 | struct list_head *head = &work_list; |
424 | int index = base->timer_jiffies & TVR_MASK; | 420 | int index = base->timer_jiffies & TVR_MASK; |
425 | 421 | ||
426 | /* | 422 | /* |
427 | * Cascade timers: | 423 | * Cascade timers: |
428 | */ | 424 | */ |
@@ -431,8 +427,8 @@ static inline void __run_timers(tvec_base_t *base) | |||
431 | (!cascade(base, &base->tv3, INDEX(1))) && | 427 | (!cascade(base, &base->tv3, INDEX(1))) && |
432 | !cascade(base, &base->tv4, INDEX(2))) | 428 | !cascade(base, &base->tv4, INDEX(2))) |
433 | cascade(base, &base->tv5, INDEX(3)); | 429 | cascade(base, &base->tv5, INDEX(3)); |
434 | ++base->timer_jiffies; | 430 | ++base->timer_jiffies; |
435 | list_splice_init(base->tv1.vec + index, &work_list); | 431 | list_replace_init(base->tv1.vec + index, &work_list); |
436 | while (!list_empty(head)) { | 432 | while (!list_empty(head)) { |
437 | void (*fn)(unsigned long); | 433 | void (*fn)(unsigned long); |
438 | unsigned long data; | 434 | unsigned long data; |
@@ -601,7 +597,6 @@ long time_tolerance = MAXFREQ; /* frequency tolerance (ppm) */ | |||
601 | long time_precision = 1; /* clock precision (us) */ | 597 | long time_precision = 1; /* clock precision (us) */ |
602 | long time_maxerror = NTP_PHASE_LIMIT; /* maximum error (us) */ | 598 | long time_maxerror = NTP_PHASE_LIMIT; /* maximum error (us) */ |
603 | long time_esterror = NTP_PHASE_LIMIT; /* estimated error (us) */ | 599 | long time_esterror = NTP_PHASE_LIMIT; /* estimated error (us) */ |
604 | static long time_phase; /* phase offset (scaled us) */ | ||
605 | long time_freq = (((NSEC_PER_SEC + HZ/2) % HZ - HZ/2) << SHIFT_USEC) / NSEC_PER_USEC; | 600 | long time_freq = (((NSEC_PER_SEC + HZ/2) % HZ - HZ/2) << SHIFT_USEC) / NSEC_PER_USEC; |
606 | /* frequency offset (scaled ppm)*/ | 601 | /* frequency offset (scaled ppm)*/ |
607 | static long time_adj; /* tick adjust (scaled 1 / HZ) */ | 602 | static long time_adj; /* tick adjust (scaled 1 / HZ) */ |
@@ -751,27 +746,14 @@ static long adjtime_adjustment(void) | |||
751 | } | 746 | } |
752 | 747 | ||
753 | /* in the NTP reference this is called "hardclock()" */ | 748 | /* in the NTP reference this is called "hardclock()" */ |
754 | static void update_wall_time_one_tick(void) | 749 | static void update_ntp_one_tick(void) |
755 | { | 750 | { |
756 | long time_adjust_step, delta_nsec; | 751 | long time_adjust_step; |
757 | 752 | ||
758 | time_adjust_step = adjtime_adjustment(); | 753 | time_adjust_step = adjtime_adjustment(); |
759 | if (time_adjust_step) | 754 | if (time_adjust_step) |
760 | /* Reduce by this step the amount of time left */ | 755 | /* Reduce by this step the amount of time left */ |
761 | time_adjust -= time_adjust_step; | 756 | time_adjust -= time_adjust_step; |
762 | delta_nsec = tick_nsec + time_adjust_step * 1000; | ||
763 | /* | ||
764 | * Advance the phase, once it gets to one microsecond, then | ||
765 | * advance the tick more. | ||
766 | */ | ||
767 | time_phase += time_adj; | ||
768 | if ((time_phase >= FINENSEC) || (time_phase <= -FINENSEC)) { | ||
769 | long ltemp = shift_right(time_phase, (SHIFT_SCALE - 10)); | ||
770 | time_phase -= ltemp << (SHIFT_SCALE - 10); | ||
771 | delta_nsec += ltemp; | ||
772 | } | ||
773 | xtime.tv_nsec += delta_nsec; | ||
774 | time_interpolator_update(delta_nsec); | ||
775 | 757 | ||
776 | /* Changes by adjtime() do not take effect till next tick. */ | 758 | /* Changes by adjtime() do not take effect till next tick. */ |
777 | if (time_next_adjust != 0) { | 759 | if (time_next_adjust != 0) { |
@@ -784,36 +766,378 @@ static void update_wall_time_one_tick(void) | |||
784 | * Return how long ticks are at the moment, that is, how much time | 766 | * Return how long ticks are at the moment, that is, how much time |
785 | * update_wall_time_one_tick will add to xtime next time we call it | 767 | * update_wall_time_one_tick will add to xtime next time we call it |
786 | * (assuming no calls to do_adjtimex in the meantime). | 768 | * (assuming no calls to do_adjtimex in the meantime). |
787 | * The return value is in fixed-point nanoseconds with SHIFT_SCALE-10 | 769 | * The return value is in fixed-point nanoseconds shifted by the |
788 | * bits to the right of the binary point. | 770 | * specified number of bits to the right of the binary point. |
789 | * This function has no side-effects. | 771 | * This function has no side-effects. |
790 | */ | 772 | */ |
791 | u64 current_tick_length(void) | 773 | u64 current_tick_length(void) |
792 | { | 774 | { |
793 | long delta_nsec; | 775 | long delta_nsec; |
776 | u64 ret; | ||
794 | 777 | ||
778 | /* calculate the finest interval NTP will allow. | ||
779 | * ie: nanosecond value shifted by (SHIFT_SCALE - 10) | ||
780 | */ | ||
795 | delta_nsec = tick_nsec + adjtime_adjustment() * 1000; | 781 | delta_nsec = tick_nsec + adjtime_adjustment() * 1000; |
796 | return ((u64) delta_nsec << (SHIFT_SCALE - 10)) + time_adj; | 782 | ret = (u64)delta_nsec << TICK_LENGTH_SHIFT; |
783 | ret += (s64)time_adj << (TICK_LENGTH_SHIFT - (SHIFT_SCALE - 10)); | ||
784 | |||
785 | return ret; | ||
797 | } | 786 | } |
798 | 787 | ||
799 | /* | 788 | /* XXX - all of this timekeeping code should be later moved to time.c */ |
800 | * Using a loop looks inefficient, but "ticks" is | 789 | #include <linux/clocksource.h> |
801 | * usually just one (we shouldn't be losing ticks, | 790 | static struct clocksource *clock; /* pointer to current clocksource */ |
802 | * we're doing this this way mainly for interrupt | 791 | |
803 | * latency reasons, not because we think we'll | 792 | #ifdef CONFIG_GENERIC_TIME |
804 | * have lots of lost timer ticks | 793 | /** |
794 | * __get_nsec_offset - Returns nanoseconds since last call to periodic_hook | ||
795 | * | ||
796 | * private function, must hold xtime_lock lock when being | ||
797 | * called. Returns the number of nanoseconds since the | ||
798 | * last call to update_wall_time() (adjusted by NTP scaling) | ||
799 | */ | ||
800 | static inline s64 __get_nsec_offset(void) | ||
801 | { | ||
802 | cycle_t cycle_now, cycle_delta; | ||
803 | s64 ns_offset; | ||
804 | |||
805 | /* read clocksource: */ | ||
806 | cycle_now = clocksource_read(clock); | ||
807 | |||
808 | /* calculate the delta since the last update_wall_time: */ | ||
809 | cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; | ||
810 | |||
811 | /* convert to nanoseconds: */ | ||
812 | ns_offset = cyc2ns(clock, cycle_delta); | ||
813 | |||
814 | return ns_offset; | ||
815 | } | ||
816 | |||
817 | /** | ||
818 | * __get_realtime_clock_ts - Returns the time of day in a timespec | ||
819 | * @ts: pointer to the timespec to be set | ||
820 | * | ||
821 | * Returns the time of day in a timespec. Used by | ||
822 | * do_gettimeofday() and get_realtime_clock_ts(). | ||
823 | */ | ||
824 | static inline void __get_realtime_clock_ts(struct timespec *ts) | ||
825 | { | ||
826 | unsigned long seq; | ||
827 | s64 nsecs; | ||
828 | |||
829 | do { | ||
830 | seq = read_seqbegin(&xtime_lock); | ||
831 | |||
832 | *ts = xtime; | ||
833 | nsecs = __get_nsec_offset(); | ||
834 | |||
835 | } while (read_seqretry(&xtime_lock, seq)); | ||
836 | |||
837 | timespec_add_ns(ts, nsecs); | ||
838 | } | ||
839 | |||
840 | /** | ||
841 | * getnstimeofday - Returns the time of day in a timespec | ||
842 | * @ts: pointer to the timespec to be set | ||
843 | * | ||
844 | * Returns the time of day in a timespec. | ||
845 | */ | ||
846 | void getnstimeofday(struct timespec *ts) | ||
847 | { | ||
848 | __get_realtime_clock_ts(ts); | ||
849 | } | ||
850 | |||
851 | EXPORT_SYMBOL(getnstimeofday); | ||
852 | |||
853 | /** | ||
854 | * do_gettimeofday - Returns the time of day in a timeval | ||
855 | * @tv: pointer to the timeval to be set | ||
856 | * | ||
857 | * NOTE: Users should be converted to using get_realtime_clock_ts() | ||
858 | */ | ||
859 | void do_gettimeofday(struct timeval *tv) | ||
860 | { | ||
861 | struct timespec now; | ||
862 | |||
863 | __get_realtime_clock_ts(&now); | ||
864 | tv->tv_sec = now.tv_sec; | ||
865 | tv->tv_usec = now.tv_nsec/1000; | ||
866 | } | ||
867 | |||
868 | EXPORT_SYMBOL(do_gettimeofday); | ||
869 | /** | ||
870 | * do_settimeofday - Sets the time of day | ||
871 | * @tv: pointer to the timespec variable containing the new time | ||
872 | * | ||
873 | * Sets the time of day to the new time and update NTP and notify hrtimers | ||
874 | */ | ||
875 | int do_settimeofday(struct timespec *tv) | ||
876 | { | ||
877 | unsigned long flags; | ||
878 | time_t wtm_sec, sec = tv->tv_sec; | ||
879 | long wtm_nsec, nsec = tv->tv_nsec; | ||
880 | |||
881 | if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC) | ||
882 | return -EINVAL; | ||
883 | |||
884 | write_seqlock_irqsave(&xtime_lock, flags); | ||
885 | |||
886 | nsec -= __get_nsec_offset(); | ||
887 | |||
888 | wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec); | ||
889 | wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec); | ||
890 | |||
891 | set_normalized_timespec(&xtime, sec, nsec); | ||
892 | set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec); | ||
893 | |||
894 | ntp_clear(); | ||
895 | |||
896 | write_sequnlock_irqrestore(&xtime_lock, flags); | ||
897 | |||
898 | /* signal hrtimers about time change */ | ||
899 | clock_was_set(); | ||
900 | |||
901 | return 0; | ||
902 | } | ||
903 | |||
904 | EXPORT_SYMBOL(do_settimeofday); | ||
905 | |||
906 | /** | ||
907 | * change_clocksource - Swaps clocksources if a new one is available | ||
908 | * | ||
909 | * Accumulates current time interval and initializes new clocksource | ||
910 | */ | ||
911 | static int change_clocksource(void) | ||
912 | { | ||
913 | struct clocksource *new; | ||
914 | cycle_t now; | ||
915 | u64 nsec; | ||
916 | new = clocksource_get_next(); | ||
917 | if (clock != new) { | ||
918 | now = clocksource_read(new); | ||
919 | nsec = __get_nsec_offset(); | ||
920 | timespec_add_ns(&xtime, nsec); | ||
921 | |||
922 | clock = new; | ||
923 | clock->cycle_last = now; | ||
924 | printk(KERN_INFO "Time: %s clocksource has been installed.\n", | ||
925 | clock->name); | ||
926 | return 1; | ||
927 | } else if (clock->update_callback) { | ||
928 | return clock->update_callback(); | ||
929 | } | ||
930 | return 0; | ||
931 | } | ||
932 | #else | ||
933 | #define change_clocksource() (0) | ||
934 | #endif | ||
935 | |||
936 | /** | ||
937 | * timeofday_is_continuous - check to see if timekeeping is free running | ||
805 | */ | 938 | */ |
806 | static void update_wall_time(unsigned long ticks) | 939 | int timekeeping_is_continuous(void) |
807 | { | 940 | { |
941 | unsigned long seq; | ||
942 | int ret; | ||
943 | |||
808 | do { | 944 | do { |
809 | ticks--; | 945 | seq = read_seqbegin(&xtime_lock); |
810 | update_wall_time_one_tick(); | 946 | |
811 | if (xtime.tv_nsec >= 1000000000) { | 947 | ret = clock->is_continuous; |
812 | xtime.tv_nsec -= 1000000000; | 948 | |
949 | } while (read_seqretry(&xtime_lock, seq)); | ||
950 | |||
951 | return ret; | ||
952 | } | ||
953 | |||
954 | /* | ||
955 | * timekeeping_init - Initializes the clocksource and common timekeeping values | ||
956 | */ | ||
957 | void __init timekeeping_init(void) | ||
958 | { | ||
959 | unsigned long flags; | ||
960 | |||
961 | write_seqlock_irqsave(&xtime_lock, flags); | ||
962 | clock = clocksource_get_next(); | ||
963 | clocksource_calculate_interval(clock, tick_nsec); | ||
964 | clock->cycle_last = clocksource_read(clock); | ||
965 | ntp_clear(); | ||
966 | write_sequnlock_irqrestore(&xtime_lock, flags); | ||
967 | } | ||
968 | |||
969 | |||
970 | /* | ||
971 | * timekeeping_resume - Resumes the generic timekeeping subsystem. | ||
972 | * @dev: unused | ||
973 | * | ||
974 | * This is for the generic clocksource timekeeping. | ||
975 | * xtime/wall_to_monotonic/jiffies/wall_jiffies/etc are | ||
976 | * still managed by arch specific suspend/resume code. | ||
977 | */ | ||
978 | static int timekeeping_resume(struct sys_device *dev) | ||
979 | { | ||
980 | unsigned long flags; | ||
981 | |||
982 | write_seqlock_irqsave(&xtime_lock, flags); | ||
983 | /* restart the last cycle value */ | ||
984 | clock->cycle_last = clocksource_read(clock); | ||
985 | write_sequnlock_irqrestore(&xtime_lock, flags); | ||
986 | return 0; | ||
987 | } | ||
988 | |||
989 | /* sysfs resume/suspend bits for timekeeping */ | ||
990 | static struct sysdev_class timekeeping_sysclass = { | ||
991 | .resume = timekeeping_resume, | ||
992 | set_kset_name("timekeeping"), | ||
993 | }; | ||
994 | |||
995 | static struct sys_device device_timer = { | ||
996 | .id = 0, | ||
997 | .cls = &timekeeping_sysclass, | ||
998 | }; | ||
999 | |||
1000 | static int __init timekeeping_init_device(void) | ||
1001 | { | ||
1002 | int error = sysdev_class_register(&timekeeping_sysclass); | ||
1003 | if (!error) | ||
1004 | error = sysdev_register(&device_timer); | ||
1005 | return error; | ||
1006 | } | ||
1007 | |||
1008 | device_initcall(timekeeping_init_device); | ||
1009 | |||
1010 | /* | ||
1011 | * If the error is already larger, we look ahead another tick, | ||
1012 | * to compensate for late or lost adjustments. | ||
1013 | */ | ||
1014 | static __always_inline int clocksource_bigadjust(int sign, s64 error, s64 *interval, s64 *offset) | ||
1015 | { | ||
1016 | int adj; | ||
1017 | |||
1018 | /* | ||
1019 | * As soon as the machine is synchronized to the external time | ||
1020 | * source this should be the common case. | ||
1021 | */ | ||
1022 | error >>= 2; | ||
1023 | if (likely(sign > 0 ? error <= *interval : error >= *interval)) | ||
1024 | return sign; | ||
1025 | |||
1026 | /* | ||
1027 | * An extra look ahead dampens the effect of the current error, | ||
1028 | * which can grow quite large with continously late updates, as | ||
1029 | * it would dominate the adjustment value and can lead to | ||
1030 | * oscillation. | ||
1031 | */ | ||
1032 | error += current_tick_length() >> (TICK_LENGTH_SHIFT - clock->shift + 1); | ||
1033 | error -= clock->xtime_interval >> 1; | ||
1034 | |||
1035 | adj = 0; | ||
1036 | while (1) { | ||
1037 | error >>= 1; | ||
1038 | if (sign > 0 ? error <= *interval : error >= *interval) | ||
1039 | break; | ||
1040 | adj++; | ||
1041 | } | ||
1042 | |||
1043 | /* | ||
1044 | * Add the current adjustments to the error and take the offset | ||
1045 | * into account, the latter can cause the error to be hardly | ||
1046 | * reduced at the next tick. Check the error again if there's | ||
1047 | * room for another adjustment, thus further reducing the error | ||
1048 | * which otherwise had to be corrected at the next update. | ||
1049 | */ | ||
1050 | error = (error << 1) - *interval + *offset; | ||
1051 | if (sign > 0 ? error > *interval : error < *interval) | ||
1052 | adj++; | ||
1053 | |||
1054 | *interval <<= adj; | ||
1055 | *offset <<= adj; | ||
1056 | return sign << adj; | ||
1057 | } | ||
1058 | |||
1059 | /* | ||
1060 | * Adjust the multiplier to reduce the error value, | ||
1061 | * this is optimized for the most common adjustments of -1,0,1, | ||
1062 | * for other values we can do a bit more work. | ||
1063 | */ | ||
1064 | static void clocksource_adjust(struct clocksource *clock, s64 offset) | ||
1065 | { | ||
1066 | s64 error, interval = clock->cycle_interval; | ||
1067 | int adj; | ||
1068 | |||
1069 | error = clock->error >> (TICK_LENGTH_SHIFT - clock->shift - 1); | ||
1070 | if (error > interval) { | ||
1071 | adj = clocksource_bigadjust(1, error, &interval, &offset); | ||
1072 | } else if (error < -interval) { | ||
1073 | interval = -interval; | ||
1074 | offset = -offset; | ||
1075 | adj = clocksource_bigadjust(-1, error, &interval, &offset); | ||
1076 | } else | ||
1077 | return; | ||
1078 | |||
1079 | clock->mult += adj; | ||
1080 | clock->xtime_interval += interval; | ||
1081 | clock->xtime_nsec -= offset; | ||
1082 | clock->error -= (interval - offset) << (TICK_LENGTH_SHIFT - clock->shift); | ||
1083 | } | ||
1084 | |||
1085 | /* | ||
1086 | * update_wall_time - Uses the current clocksource to increment the wall time | ||
1087 | * | ||
1088 | * Called from the timer interrupt, must hold a write on xtime_lock. | ||
1089 | */ | ||
1090 | static void update_wall_time(void) | ||
1091 | { | ||
1092 | cycle_t offset; | ||
1093 | |||
1094 | clock->xtime_nsec += (s64)xtime.tv_nsec << clock->shift; | ||
1095 | |||
1096 | #ifdef CONFIG_GENERIC_TIME | ||
1097 | offset = (clocksource_read(clock) - clock->cycle_last) & clock->mask; | ||
1098 | #else | ||
1099 | offset = clock->cycle_interval; | ||
1100 | #endif | ||
1101 | |||
1102 | /* normally this loop will run just once, however in the | ||
1103 | * case of lost or late ticks, it will accumulate correctly. | ||
1104 | */ | ||
1105 | while (offset >= clock->cycle_interval) { | ||
1106 | /* accumulate one interval */ | ||
1107 | clock->xtime_nsec += clock->xtime_interval; | ||
1108 | clock->cycle_last += clock->cycle_interval; | ||
1109 | offset -= clock->cycle_interval; | ||
1110 | |||
1111 | if (clock->xtime_nsec >= (u64)NSEC_PER_SEC << clock->shift) { | ||
1112 | clock->xtime_nsec -= (u64)NSEC_PER_SEC << clock->shift; | ||
813 | xtime.tv_sec++; | 1113 | xtime.tv_sec++; |
814 | second_overflow(); | 1114 | second_overflow(); |
815 | } | 1115 | } |
816 | } while (ticks); | 1116 | |
1117 | /* interpolator bits */ | ||
1118 | time_interpolator_update(clock->xtime_interval | ||
1119 | >> clock->shift); | ||
1120 | /* increment the NTP state machine */ | ||
1121 | update_ntp_one_tick(); | ||
1122 | |||
1123 | /* accumulate error between NTP and clock interval */ | ||
1124 | clock->error += current_tick_length(); | ||
1125 | clock->error -= clock->xtime_interval << (TICK_LENGTH_SHIFT - clock->shift); | ||
1126 | } | ||
1127 | |||
1128 | /* correct the clock when NTP error is too big */ | ||
1129 | clocksource_adjust(clock, offset); | ||
1130 | |||
1131 | /* store full nanoseconds into xtime */ | ||
1132 | xtime.tv_nsec = clock->xtime_nsec >> clock->shift; | ||
1133 | clock->xtime_nsec -= (s64)xtime.tv_nsec << clock->shift; | ||
1134 | |||
1135 | /* check to see if there is a new clocksource to use */ | ||
1136 | if (change_clocksource()) { | ||
1137 | clock->error = 0; | ||
1138 | clock->xtime_nsec = 0; | ||
1139 | clocksource_calculate_interval(clock, tick_nsec); | ||
1140 | } | ||
817 | } | 1141 | } |
818 | 1142 | ||
819 | /* | 1143 | /* |
@@ -919,10 +1243,8 @@ static inline void update_times(void) | |||
919 | unsigned long ticks; | 1243 | unsigned long ticks; |
920 | 1244 | ||
921 | ticks = jiffies - wall_jiffies; | 1245 | ticks = jiffies - wall_jiffies; |
922 | if (ticks) { | 1246 | wall_jiffies += ticks; |
923 | wall_jiffies += ticks; | 1247 | update_wall_time(); |
924 | update_wall_time(ticks); | ||
925 | } | ||
926 | calc_load(ticks); | 1248 | calc_load(ticks); |
927 | } | 1249 | } |
928 | 1250 | ||
diff --git a/kernel/unwind.c b/kernel/unwind.c new file mode 100644 index 000000000000..f69c804c8e62 --- /dev/null +++ b/kernel/unwind.c | |||
@@ -0,0 +1,918 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2002-2006 Novell, Inc. | ||
3 | * Jan Beulich <jbeulich@novell.com> | ||
4 | * This code is released under version 2 of the GNU GPL. | ||
5 | * | ||
6 | * A simple API for unwinding kernel stacks. This is used for | ||
7 | * debugging and error reporting purposes. The kernel doesn't need | ||
8 | * full-blown stack unwinding with all the bells and whistles, so there | ||
9 | * is not much point in implementing the full Dwarf2 unwind API. | ||
10 | */ | ||
11 | |||
12 | #include <linux/unwind.h> | ||
13 | #include <linux/module.h> | ||
14 | #include <linux/delay.h> | ||
15 | #include <linux/stop_machine.h> | ||
16 | #include <asm/sections.h> | ||
17 | #include <asm/uaccess.h> | ||
18 | #include <asm/unaligned.h> | ||
19 | |||
20 | extern char __start_unwind[], __end_unwind[]; | ||
21 | |||
22 | #define MAX_STACK_DEPTH 8 | ||
23 | |||
24 | #define EXTRA_INFO(f) { \ | ||
25 | BUILD_BUG_ON_ZERO(offsetof(struct unwind_frame_info, f) \ | ||
26 | % FIELD_SIZEOF(struct unwind_frame_info, f)) \ | ||
27 | + offsetof(struct unwind_frame_info, f) \ | ||
28 | / FIELD_SIZEOF(struct unwind_frame_info, f), \ | ||
29 | FIELD_SIZEOF(struct unwind_frame_info, f) \ | ||
30 | } | ||
31 | #define PTREGS_INFO(f) EXTRA_INFO(regs.f) | ||
32 | |||
33 | static const struct { | ||
34 | unsigned offs:BITS_PER_LONG / 2; | ||
35 | unsigned width:BITS_PER_LONG / 2; | ||
36 | } reg_info[] = { | ||
37 | UNW_REGISTER_INFO | ||
38 | }; | ||
39 | |||
40 | #undef PTREGS_INFO | ||
41 | #undef EXTRA_INFO | ||
42 | |||
43 | #ifndef REG_INVALID | ||
44 | #define REG_INVALID(r) (reg_info[r].width == 0) | ||
45 | #endif | ||
46 | |||
47 | #define DW_CFA_nop 0x00 | ||
48 | #define DW_CFA_set_loc 0x01 | ||
49 | #define DW_CFA_advance_loc1 0x02 | ||
50 | #define DW_CFA_advance_loc2 0x03 | ||
51 | #define DW_CFA_advance_loc4 0x04 | ||
52 | #define DW_CFA_offset_extended 0x05 | ||
53 | #define DW_CFA_restore_extended 0x06 | ||
54 | #define DW_CFA_undefined 0x07 | ||
55 | #define DW_CFA_same_value 0x08 | ||
56 | #define DW_CFA_register 0x09 | ||
57 | #define DW_CFA_remember_state 0x0a | ||
58 | #define DW_CFA_restore_state 0x0b | ||
59 | #define DW_CFA_def_cfa 0x0c | ||
60 | #define DW_CFA_def_cfa_register 0x0d | ||
61 | #define DW_CFA_def_cfa_offset 0x0e | ||
62 | #define DW_CFA_def_cfa_expression 0x0f | ||
63 | #define DW_CFA_expression 0x10 | ||
64 | #define DW_CFA_offset_extended_sf 0x11 | ||
65 | #define DW_CFA_def_cfa_sf 0x12 | ||
66 | #define DW_CFA_def_cfa_offset_sf 0x13 | ||
67 | #define DW_CFA_val_offset 0x14 | ||
68 | #define DW_CFA_val_offset_sf 0x15 | ||
69 | #define DW_CFA_val_expression 0x16 | ||
70 | #define DW_CFA_lo_user 0x1c | ||
71 | #define DW_CFA_GNU_window_save 0x2d | ||
72 | #define DW_CFA_GNU_args_size 0x2e | ||
73 | #define DW_CFA_GNU_negative_offset_extended 0x2f | ||
74 | #define DW_CFA_hi_user 0x3f | ||
75 | |||
76 | #define DW_EH_PE_FORM 0x07 | ||
77 | #define DW_EH_PE_native 0x00 | ||
78 | #define DW_EH_PE_leb128 0x01 | ||
79 | #define DW_EH_PE_data2 0x02 | ||
80 | #define DW_EH_PE_data4 0x03 | ||
81 | #define DW_EH_PE_data8 0x04 | ||
82 | #define DW_EH_PE_signed 0x08 | ||
83 | #define DW_EH_PE_ADJUST 0x70 | ||
84 | #define DW_EH_PE_abs 0x00 | ||
85 | #define DW_EH_PE_pcrel 0x10 | ||
86 | #define DW_EH_PE_textrel 0x20 | ||
87 | #define DW_EH_PE_datarel 0x30 | ||
88 | #define DW_EH_PE_funcrel 0x40 | ||
89 | #define DW_EH_PE_aligned 0x50 | ||
90 | #define DW_EH_PE_indirect 0x80 | ||
91 | #define DW_EH_PE_omit 0xff | ||
92 | |||
93 | typedef unsigned long uleb128_t; | ||
94 | typedef signed long sleb128_t; | ||
95 | |||
96 | static struct unwind_table { | ||
97 | struct { | ||
98 | unsigned long pc; | ||
99 | unsigned long range; | ||
100 | } core, init; | ||
101 | const void *address; | ||
102 | unsigned long size; | ||
103 | struct unwind_table *link; | ||
104 | const char *name; | ||
105 | } root_table, *last_table; | ||
106 | |||
107 | struct unwind_item { | ||
108 | enum item_location { | ||
109 | Nowhere, | ||
110 | Memory, | ||
111 | Register, | ||
112 | Value | ||
113 | } where; | ||
114 | uleb128_t value; | ||
115 | }; | ||
116 | |||
117 | struct unwind_state { | ||
118 | uleb128_t loc, org; | ||
119 | const u8 *cieStart, *cieEnd; | ||
120 | uleb128_t codeAlign; | ||
121 | sleb128_t dataAlign; | ||
122 | struct cfa { | ||
123 | uleb128_t reg, offs; | ||
124 | } cfa; | ||
125 | struct unwind_item regs[ARRAY_SIZE(reg_info)]; | ||
126 | unsigned stackDepth:8; | ||
127 | unsigned version:8; | ||
128 | const u8 *label; | ||
129 | const u8 *stack[MAX_STACK_DEPTH]; | ||
130 | }; | ||
131 | |||
132 | static const struct cfa badCFA = { ARRAY_SIZE(reg_info), 1 }; | ||
133 | |||
134 | static struct unwind_table *find_table(unsigned long pc) | ||
135 | { | ||
136 | struct unwind_table *table; | ||
137 | |||
138 | for (table = &root_table; table; table = table->link) | ||
139 | if ((pc >= table->core.pc | ||
140 | && pc < table->core.pc + table->core.range) | ||
141 | || (pc >= table->init.pc | ||
142 | && pc < table->init.pc + table->init.range)) | ||
143 | break; | ||
144 | |||
145 | return table; | ||
146 | } | ||
147 | |||
148 | static void init_unwind_table(struct unwind_table *table, | ||
149 | const char *name, | ||
150 | const void *core_start, | ||
151 | unsigned long core_size, | ||
152 | const void *init_start, | ||
153 | unsigned long init_size, | ||
154 | const void *table_start, | ||
155 | unsigned long table_size) | ||
156 | { | ||
157 | table->core.pc = (unsigned long)core_start; | ||
158 | table->core.range = core_size; | ||
159 | table->init.pc = (unsigned long)init_start; | ||
160 | table->init.range = init_size; | ||
161 | table->address = table_start; | ||
162 | table->size = table_size; | ||
163 | table->link = NULL; | ||
164 | table->name = name; | ||
165 | } | ||
166 | |||
167 | void __init unwind_init(void) | ||
168 | { | ||
169 | init_unwind_table(&root_table, "kernel", | ||
170 | _text, _end - _text, | ||
171 | NULL, 0, | ||
172 | __start_unwind, __end_unwind - __start_unwind); | ||
173 | } | ||
174 | |||
175 | #ifdef CONFIG_MODULES | ||
176 | |||
177 | /* Must be called with module_mutex held. */ | ||
178 | void *unwind_add_table(struct module *module, | ||
179 | const void *table_start, | ||
180 | unsigned long table_size) | ||
181 | { | ||
182 | struct unwind_table *table; | ||
183 | |||
184 | if (table_size <= 0) | ||
185 | return NULL; | ||
186 | |||
187 | table = kmalloc(sizeof(*table), GFP_KERNEL); | ||
188 | if (!table) | ||
189 | return NULL; | ||
190 | |||
191 | init_unwind_table(table, module->name, | ||
192 | module->module_core, module->core_size, | ||
193 | module->module_init, module->init_size, | ||
194 | table_start, table_size); | ||
195 | |||
196 | if (last_table) | ||
197 | last_table->link = table; | ||
198 | else | ||
199 | root_table.link = table; | ||
200 | last_table = table; | ||
201 | |||
202 | return table; | ||
203 | } | ||
204 | |||
205 | struct unlink_table_info | ||
206 | { | ||
207 | struct unwind_table *table; | ||
208 | int init_only; | ||
209 | }; | ||
210 | |||
211 | static int unlink_table(void *arg) | ||
212 | { | ||
213 | struct unlink_table_info *info = arg; | ||
214 | struct unwind_table *table = info->table, *prev; | ||
215 | |||
216 | for (prev = &root_table; prev->link && prev->link != table; prev = prev->link) | ||
217 | ; | ||
218 | |||
219 | if (prev->link) { | ||
220 | if (info->init_only) { | ||
221 | table->init.pc = 0; | ||
222 | table->init.range = 0; | ||
223 | info->table = NULL; | ||
224 | } else { | ||
225 | prev->link = table->link; | ||
226 | if (!prev->link) | ||
227 | last_table = prev; | ||
228 | } | ||
229 | } else | ||
230 | info->table = NULL; | ||
231 | |||
232 | return 0; | ||
233 | } | ||
234 | |||
235 | /* Must be called with module_mutex held. */ | ||
236 | void unwind_remove_table(void *handle, int init_only) | ||
237 | { | ||
238 | struct unwind_table *table = handle; | ||
239 | struct unlink_table_info info; | ||
240 | |||
241 | if (!table || table == &root_table) | ||
242 | return; | ||
243 | |||
244 | if (init_only && table == last_table) { | ||
245 | table->init.pc = 0; | ||
246 | table->init.range = 0; | ||
247 | return; | ||
248 | } | ||
249 | |||
250 | info.table = table; | ||
251 | info.init_only = init_only; | ||
252 | stop_machine_run(unlink_table, &info, NR_CPUS); | ||
253 | |||
254 | if (info.table) | ||
255 | kfree(table); | ||
256 | } | ||
257 | |||
258 | #endif /* CONFIG_MODULES */ | ||
259 | |||
260 | static uleb128_t get_uleb128(const u8 **pcur, const u8 *end) | ||
261 | { | ||
262 | const u8 *cur = *pcur; | ||
263 | uleb128_t value; | ||
264 | unsigned shift; | ||
265 | |||
266 | for (shift = 0, value = 0; cur < end; shift += 7) { | ||
267 | if (shift + 7 > 8 * sizeof(value) | ||
268 | && (*cur & 0x7fU) >= (1U << (8 * sizeof(value) - shift))) { | ||
269 | cur = end + 1; | ||
270 | break; | ||
271 | } | ||
272 | value |= (uleb128_t)(*cur & 0x7f) << shift; | ||
273 | if (!(*cur++ & 0x80)) | ||
274 | break; | ||
275 | } | ||
276 | *pcur = cur; | ||
277 | |||
278 | return value; | ||
279 | } | ||
280 | |||
281 | static sleb128_t get_sleb128(const u8 **pcur, const u8 *end) | ||
282 | { | ||
283 | const u8 *cur = *pcur; | ||
284 | sleb128_t value; | ||
285 | unsigned shift; | ||
286 | |||
287 | for (shift = 0, value = 0; cur < end; shift += 7) { | ||
288 | if (shift + 7 > 8 * sizeof(value) | ||
289 | && (*cur & 0x7fU) >= (1U << (8 * sizeof(value) - shift))) { | ||
290 | cur = end + 1; | ||
291 | break; | ||
292 | } | ||
293 | value |= (sleb128_t)(*cur & 0x7f) << shift; | ||
294 | if (!(*cur & 0x80)) { | ||
295 | value |= -(*cur++ & 0x40) << shift; | ||
296 | break; | ||
297 | } | ||
298 | } | ||
299 | *pcur = cur; | ||
300 | |||
301 | return value; | ||
302 | } | ||
303 | |||
304 | static unsigned long read_pointer(const u8 **pLoc, | ||
305 | const void *end, | ||
306 | signed ptrType) | ||
307 | { | ||
308 | unsigned long value = 0; | ||
309 | union { | ||
310 | const u8 *p8; | ||
311 | const u16 *p16u; | ||
312 | const s16 *p16s; | ||
313 | const u32 *p32u; | ||
314 | const s32 *p32s; | ||
315 | const unsigned long *pul; | ||
316 | } ptr; | ||
317 | |||
318 | if (ptrType < 0 || ptrType == DW_EH_PE_omit) | ||
319 | return 0; | ||
320 | ptr.p8 = *pLoc; | ||
321 | switch(ptrType & DW_EH_PE_FORM) { | ||
322 | case DW_EH_PE_data2: | ||
323 | if (end < (const void *)(ptr.p16u + 1)) | ||
324 | return 0; | ||
325 | if(ptrType & DW_EH_PE_signed) | ||
326 | value = get_unaligned(ptr.p16s++); | ||
327 | else | ||
328 | value = get_unaligned(ptr.p16u++); | ||
329 | break; | ||
330 | case DW_EH_PE_data4: | ||
331 | #ifdef CONFIG_64BIT | ||
332 | if (end < (const void *)(ptr.p32u + 1)) | ||
333 | return 0; | ||
334 | if(ptrType & DW_EH_PE_signed) | ||
335 | value = get_unaligned(ptr.p32s++); | ||
336 | else | ||
337 | value = get_unaligned(ptr.p32u++); | ||
338 | break; | ||
339 | case DW_EH_PE_data8: | ||
340 | BUILD_BUG_ON(sizeof(u64) != sizeof(value)); | ||
341 | #else | ||
342 | BUILD_BUG_ON(sizeof(u32) != sizeof(value)); | ||
343 | #endif | ||
344 | case DW_EH_PE_native: | ||
345 | if (end < (const void *)(ptr.pul + 1)) | ||
346 | return 0; | ||
347 | value = get_unaligned(ptr.pul++); | ||
348 | break; | ||
349 | case DW_EH_PE_leb128: | ||
350 | BUILD_BUG_ON(sizeof(uleb128_t) > sizeof(value)); | ||
351 | value = ptrType & DW_EH_PE_signed | ||
352 | ? get_sleb128(&ptr.p8, end) | ||
353 | : get_uleb128(&ptr.p8, end); | ||
354 | if ((const void *)ptr.p8 > end) | ||
355 | return 0; | ||
356 | break; | ||
357 | default: | ||
358 | return 0; | ||
359 | } | ||
360 | switch(ptrType & DW_EH_PE_ADJUST) { | ||
361 | case DW_EH_PE_abs: | ||
362 | break; | ||
363 | case DW_EH_PE_pcrel: | ||
364 | value += (unsigned long)*pLoc; | ||
365 | break; | ||
366 | default: | ||
367 | return 0; | ||
368 | } | ||
369 | if ((ptrType & DW_EH_PE_indirect) | ||
370 | && __get_user(value, (unsigned long *)value)) | ||
371 | return 0; | ||
372 | *pLoc = ptr.p8; | ||
373 | |||
374 | return value; | ||
375 | } | ||
376 | |||
377 | static signed fde_pointer_type(const u32 *cie) | ||
378 | { | ||
379 | const u8 *ptr = (const u8 *)(cie + 2); | ||
380 | unsigned version = *ptr; | ||
381 | |||
382 | if (version != 1) | ||
383 | return -1; /* unsupported */ | ||
384 | if (*++ptr) { | ||
385 | const char *aug; | ||
386 | const u8 *end = (const u8 *)(cie + 1) + *cie; | ||
387 | uleb128_t len; | ||
388 | |||
389 | /* check if augmentation size is first (and thus present) */ | ||
390 | if (*ptr != 'z') | ||
391 | return -1; | ||
392 | /* check if augmentation string is nul-terminated */ | ||
393 | if ((ptr = memchr(aug = (const void *)ptr, 0, end - ptr)) == NULL) | ||
394 | return -1; | ||
395 | ++ptr; /* skip terminator */ | ||
396 | get_uleb128(&ptr, end); /* skip code alignment */ | ||
397 | get_sleb128(&ptr, end); /* skip data alignment */ | ||
398 | /* skip return address column */ | ||
399 | version <= 1 ? (void)++ptr : (void)get_uleb128(&ptr, end); | ||
400 | len = get_uleb128(&ptr, end); /* augmentation length */ | ||
401 | if (ptr + len < ptr || ptr + len > end) | ||
402 | return -1; | ||
403 | end = ptr + len; | ||
404 | while (*++aug) { | ||
405 | if (ptr >= end) | ||
406 | return -1; | ||
407 | switch(*aug) { | ||
408 | case 'L': | ||
409 | ++ptr; | ||
410 | break; | ||
411 | case 'P': { | ||
412 | signed ptrType = *ptr++; | ||
413 | |||
414 | if (!read_pointer(&ptr, end, ptrType) || ptr > end) | ||
415 | return -1; | ||
416 | } | ||
417 | break; | ||
418 | case 'R': | ||
419 | return *ptr; | ||
420 | default: | ||
421 | return -1; | ||
422 | } | ||
423 | } | ||
424 | } | ||
425 | return DW_EH_PE_native|DW_EH_PE_abs; | ||
426 | } | ||
427 | |||
428 | static int advance_loc(unsigned long delta, struct unwind_state *state) | ||
429 | { | ||
430 | state->loc += delta * state->codeAlign; | ||
431 | |||
432 | return delta > 0; | ||
433 | } | ||
434 | |||
435 | static void set_rule(uleb128_t reg, | ||
436 | enum item_location where, | ||
437 | uleb128_t value, | ||
438 | struct unwind_state *state) | ||
439 | { | ||
440 | if (reg < ARRAY_SIZE(state->regs)) { | ||
441 | state->regs[reg].where = where; | ||
442 | state->regs[reg].value = value; | ||
443 | } | ||
444 | } | ||
445 | |||
446 | static int processCFI(const u8 *start, | ||
447 | const u8 *end, | ||
448 | unsigned long targetLoc, | ||
449 | signed ptrType, | ||
450 | struct unwind_state *state) | ||
451 | { | ||
452 | union { | ||
453 | const u8 *p8; | ||
454 | const u16 *p16; | ||
455 | const u32 *p32; | ||
456 | } ptr; | ||
457 | int result = 1; | ||
458 | |||
459 | if (start != state->cieStart) { | ||
460 | state->loc = state->org; | ||
461 | result = processCFI(state->cieStart, state->cieEnd, 0, ptrType, state); | ||
462 | if (targetLoc == 0 && state->label == NULL) | ||
463 | return result; | ||
464 | } | ||
465 | for (ptr.p8 = start; result && ptr.p8 < end; ) { | ||
466 | switch(*ptr.p8 >> 6) { | ||
467 | uleb128_t value; | ||
468 | |||
469 | case 0: | ||
470 | switch(*ptr.p8++) { | ||
471 | case DW_CFA_nop: | ||
472 | break; | ||
473 | case DW_CFA_set_loc: | ||
474 | if ((state->loc = read_pointer(&ptr.p8, end, ptrType)) == 0) | ||
475 | result = 0; | ||
476 | break; | ||
477 | case DW_CFA_advance_loc1: | ||
478 | result = ptr.p8 < end && advance_loc(*ptr.p8++, state); | ||
479 | break; | ||
480 | case DW_CFA_advance_loc2: | ||
481 | result = ptr.p8 <= end + 2 | ||
482 | && advance_loc(*ptr.p16++, state); | ||
483 | break; | ||
484 | case DW_CFA_advance_loc4: | ||
485 | result = ptr.p8 <= end + 4 | ||
486 | && advance_loc(*ptr.p32++, state); | ||
487 | break; | ||
488 | case DW_CFA_offset_extended: | ||
489 | value = get_uleb128(&ptr.p8, end); | ||
490 | set_rule(value, Memory, get_uleb128(&ptr.p8, end), state); | ||
491 | break; | ||
492 | case DW_CFA_val_offset: | ||
493 | value = get_uleb128(&ptr.p8, end); | ||
494 | set_rule(value, Value, get_uleb128(&ptr.p8, end), state); | ||
495 | break; | ||
496 | case DW_CFA_offset_extended_sf: | ||
497 | value = get_uleb128(&ptr.p8, end); | ||
498 | set_rule(value, Memory, get_sleb128(&ptr.p8, end), state); | ||
499 | break; | ||
500 | case DW_CFA_val_offset_sf: | ||
501 | value = get_uleb128(&ptr.p8, end); | ||
502 | set_rule(value, Value, get_sleb128(&ptr.p8, end), state); | ||
503 | break; | ||
504 | case DW_CFA_restore_extended: | ||
505 | case DW_CFA_undefined: | ||
506 | case DW_CFA_same_value: | ||
507 | set_rule(get_uleb128(&ptr.p8, end), Nowhere, 0, state); | ||
508 | break; | ||
509 | case DW_CFA_register: | ||
510 | value = get_uleb128(&ptr.p8, end); | ||
511 | set_rule(value, | ||
512 | Register, | ||
513 | get_uleb128(&ptr.p8, end), state); | ||
514 | break; | ||
515 | case DW_CFA_remember_state: | ||
516 | if (ptr.p8 == state->label) { | ||
517 | state->label = NULL; | ||
518 | return 1; | ||
519 | } | ||
520 | if (state->stackDepth >= MAX_STACK_DEPTH) | ||
521 | return 0; | ||
522 | state->stack[state->stackDepth++] = ptr.p8; | ||
523 | break; | ||
524 | case DW_CFA_restore_state: | ||
525 | if (state->stackDepth) { | ||
526 | const uleb128_t loc = state->loc; | ||
527 | const u8 *label = state->label; | ||
528 | |||
529 | state->label = state->stack[state->stackDepth - 1]; | ||
530 | memcpy(&state->cfa, &badCFA, sizeof(state->cfa)); | ||
531 | memset(state->regs, 0, sizeof(state->regs)); | ||
532 | state->stackDepth = 0; | ||
533 | result = processCFI(start, end, 0, ptrType, state); | ||
534 | state->loc = loc; | ||
535 | state->label = label; | ||
536 | } else | ||
537 | return 0; | ||
538 | break; | ||
539 | case DW_CFA_def_cfa: | ||
540 | state->cfa.reg = get_uleb128(&ptr.p8, end); | ||
541 | /*nobreak*/ | ||
542 | case DW_CFA_def_cfa_offset: | ||
543 | state->cfa.offs = get_uleb128(&ptr.p8, end); | ||
544 | break; | ||
545 | case DW_CFA_def_cfa_sf: | ||
546 | state->cfa.reg = get_uleb128(&ptr.p8, end); | ||
547 | /*nobreak*/ | ||
548 | case DW_CFA_def_cfa_offset_sf: | ||
549 | state->cfa.offs = get_sleb128(&ptr.p8, end) | ||
550 | * state->dataAlign; | ||
551 | break; | ||
552 | case DW_CFA_def_cfa_register: | ||
553 | state->cfa.reg = get_uleb128(&ptr.p8, end); | ||
554 | break; | ||
555 | /*todo case DW_CFA_def_cfa_expression: */ | ||
556 | /*todo case DW_CFA_expression: */ | ||
557 | /*todo case DW_CFA_val_expression: */ | ||
558 | case DW_CFA_GNU_args_size: | ||
559 | get_uleb128(&ptr.p8, end); | ||
560 | break; | ||
561 | case DW_CFA_GNU_negative_offset_extended: | ||
562 | value = get_uleb128(&ptr.p8, end); | ||
563 | set_rule(value, | ||
564 | Memory, | ||
565 | (uleb128_t)0 - get_uleb128(&ptr.p8, end), state); | ||
566 | break; | ||
567 | case DW_CFA_GNU_window_save: | ||
568 | default: | ||
569 | result = 0; | ||
570 | break; | ||
571 | } | ||
572 | break; | ||
573 | case 1: | ||
574 | result = advance_loc(*ptr.p8++ & 0x3f, state); | ||
575 | break; | ||
576 | case 2: | ||
577 | value = *ptr.p8++ & 0x3f; | ||
578 | set_rule(value, Memory, get_uleb128(&ptr.p8, end), state); | ||
579 | break; | ||
580 | case 3: | ||
581 | set_rule(*ptr.p8++ & 0x3f, Nowhere, 0, state); | ||
582 | break; | ||
583 | } | ||
584 | if (ptr.p8 > end) | ||
585 | result = 0; | ||
586 | if (result && targetLoc != 0 && targetLoc < state->loc) | ||
587 | return 1; | ||
588 | } | ||
589 | |||
590 | return result | ||
591 | && ptr.p8 == end | ||
592 | && (targetLoc == 0 | ||
593 | || (/*todo While in theory this should apply, gcc in practice omits | ||
594 | everything past the function prolog, and hence the location | ||
595 | never reaches the end of the function. | ||
596 | targetLoc < state->loc &&*/ state->label == NULL)); | ||
597 | } | ||
598 | |||
599 | /* Unwind to previous to frame. Returns 0 if successful, negative | ||
600 | * number in case of an error. */ | ||
601 | int unwind(struct unwind_frame_info *frame) | ||
602 | { | ||
603 | #define FRAME_REG(r, t) (((t *)frame)[reg_info[r].offs]) | ||
604 | const u32 *fde = NULL, *cie = NULL; | ||
605 | const u8 *ptr = NULL, *end = NULL; | ||
606 | unsigned long startLoc = 0, endLoc = 0, cfa; | ||
607 | unsigned i; | ||
608 | signed ptrType = -1; | ||
609 | uleb128_t retAddrReg = 0; | ||
610 | struct unwind_table *table; | ||
611 | struct unwind_state state; | ||
612 | |||
613 | if (UNW_PC(frame) == 0) | ||
614 | return -EINVAL; | ||
615 | if ((table = find_table(UNW_PC(frame))) != NULL | ||
616 | && !(table->size & (sizeof(*fde) - 1))) { | ||
617 | unsigned long tableSize = table->size; | ||
618 | |||
619 | for (fde = table->address; | ||
620 | tableSize > sizeof(*fde) && tableSize - sizeof(*fde) >= *fde; | ||
621 | tableSize -= sizeof(*fde) + *fde, | ||
622 | fde += 1 + *fde / sizeof(*fde)) { | ||
623 | if (!*fde || (*fde & (sizeof(*fde) - 1))) | ||
624 | break; | ||
625 | if (!fde[1]) | ||
626 | continue; /* this is a CIE */ | ||
627 | if ((fde[1] & (sizeof(*fde) - 1)) | ||
628 | || fde[1] > (unsigned long)(fde + 1) | ||
629 | - (unsigned long)table->address) | ||
630 | continue; /* this is not a valid FDE */ | ||
631 | cie = fde + 1 - fde[1] / sizeof(*fde); | ||
632 | if (*cie <= sizeof(*cie) + 4 | ||
633 | || *cie >= fde[1] - sizeof(*fde) | ||
634 | || (*cie & (sizeof(*cie) - 1)) | ||
635 | || cie[1] | ||
636 | || (ptrType = fde_pointer_type(cie)) < 0) { | ||
637 | cie = NULL; /* this is not a (valid) CIE */ | ||
638 | continue; | ||
639 | } | ||
640 | ptr = (const u8 *)(fde + 2); | ||
641 | startLoc = read_pointer(&ptr, | ||
642 | (const u8 *)(fde + 1) + *fde, | ||
643 | ptrType); | ||
644 | endLoc = startLoc | ||
645 | + read_pointer(&ptr, | ||
646 | (const u8 *)(fde + 1) + *fde, | ||
647 | ptrType & DW_EH_PE_indirect | ||
648 | ? ptrType | ||
649 | : ptrType & (DW_EH_PE_FORM|DW_EH_PE_signed)); | ||
650 | if (UNW_PC(frame) >= startLoc && UNW_PC(frame) < endLoc) | ||
651 | break; | ||
652 | cie = NULL; | ||
653 | } | ||
654 | } | ||
655 | if (cie != NULL) { | ||
656 | memset(&state, 0, sizeof(state)); | ||
657 | state.cieEnd = ptr; /* keep here temporarily */ | ||
658 | ptr = (const u8 *)(cie + 2); | ||
659 | end = (const u8 *)(cie + 1) + *cie; | ||
660 | if ((state.version = *ptr) != 1) | ||
661 | cie = NULL; /* unsupported version */ | ||
662 | else if (*++ptr) { | ||
663 | /* check if augmentation size is first (and thus present) */ | ||
664 | if (*ptr == 'z') { | ||
665 | /* check for ignorable (or already handled) | ||
666 | * nul-terminated augmentation string */ | ||
667 | while (++ptr < end && *ptr) | ||
668 | if (strchr("LPR", *ptr) == NULL) | ||
669 | break; | ||
670 | } | ||
671 | if (ptr >= end || *ptr) | ||
672 | cie = NULL; | ||
673 | } | ||
674 | ++ptr; | ||
675 | } | ||
676 | if (cie != NULL) { | ||
677 | /* get code aligment factor */ | ||
678 | state.codeAlign = get_uleb128(&ptr, end); | ||
679 | /* get data aligment factor */ | ||
680 | state.dataAlign = get_sleb128(&ptr, end); | ||
681 | if (state.codeAlign == 0 || state.dataAlign == 0 || ptr >= end) | ||
682 | cie = NULL; | ||
683 | else { | ||
684 | retAddrReg = state.version <= 1 ? *ptr++ : get_uleb128(&ptr, end); | ||
685 | /* skip augmentation */ | ||
686 | if (((const char *)(cie + 2))[1] == 'z') | ||
687 | ptr += get_uleb128(&ptr, end); | ||
688 | if (ptr > end | ||
689 | || retAddrReg >= ARRAY_SIZE(reg_info) | ||
690 | || REG_INVALID(retAddrReg) | ||
691 | || reg_info[retAddrReg].width != sizeof(unsigned long)) | ||
692 | cie = NULL; | ||
693 | } | ||
694 | } | ||
695 | if (cie != NULL) { | ||
696 | state.cieStart = ptr; | ||
697 | ptr = state.cieEnd; | ||
698 | state.cieEnd = end; | ||
699 | end = (const u8 *)(fde + 1) + *fde; | ||
700 | /* skip augmentation */ | ||
701 | if (((const char *)(cie + 2))[1] == 'z') { | ||
702 | uleb128_t augSize = get_uleb128(&ptr, end); | ||
703 | |||
704 | if ((ptr += augSize) > end) | ||
705 | fde = NULL; | ||
706 | } | ||
707 | } | ||
708 | if (cie == NULL || fde == NULL) { | ||
709 | #ifdef CONFIG_FRAME_POINTER | ||
710 | unsigned long top, bottom; | ||
711 | #endif | ||
712 | |||
713 | #ifdef CONFIG_FRAME_POINTER | ||
714 | top = STACK_TOP(frame->task); | ||
715 | bottom = STACK_BOTTOM(frame->task); | ||
716 | # if FRAME_RETADDR_OFFSET < 0 | ||
717 | if (UNW_SP(frame) < top | ||
718 | && UNW_FP(frame) <= UNW_SP(frame) | ||
719 | && bottom < UNW_FP(frame) | ||
720 | # else | ||
721 | if (UNW_SP(frame) > top | ||
722 | && UNW_FP(frame) >= UNW_SP(frame) | ||
723 | && bottom > UNW_FP(frame) | ||
724 | # endif | ||
725 | && !((UNW_SP(frame) | UNW_FP(frame)) | ||
726 | & (sizeof(unsigned long) - 1))) { | ||
727 | unsigned long link; | ||
728 | |||
729 | if (!__get_user(link, | ||
730 | (unsigned long *)(UNW_FP(frame) | ||
731 | + FRAME_LINK_OFFSET)) | ||
732 | # if FRAME_RETADDR_OFFSET < 0 | ||
733 | && link > bottom && link < UNW_FP(frame) | ||
734 | # else | ||
735 | && link > UNW_FP(frame) && link < bottom | ||
736 | # endif | ||
737 | && !(link & (sizeof(link) - 1)) | ||
738 | && !__get_user(UNW_PC(frame), | ||
739 | (unsigned long *)(UNW_FP(frame) | ||
740 | + FRAME_RETADDR_OFFSET))) { | ||
741 | UNW_SP(frame) = UNW_FP(frame) + FRAME_RETADDR_OFFSET | ||
742 | # if FRAME_RETADDR_OFFSET < 0 | ||
743 | - | ||
744 | # else | ||
745 | + | ||
746 | # endif | ||
747 | sizeof(UNW_PC(frame)); | ||
748 | UNW_FP(frame) = link; | ||
749 | return 0; | ||
750 | } | ||
751 | } | ||
752 | #endif | ||
753 | return -ENXIO; | ||
754 | } | ||
755 | state.org = startLoc; | ||
756 | memcpy(&state.cfa, &badCFA, sizeof(state.cfa)); | ||
757 | /* process instructions */ | ||
758 | if (!processCFI(ptr, end, UNW_PC(frame), ptrType, &state) | ||
759 | || state.loc > endLoc | ||
760 | || state.regs[retAddrReg].where == Nowhere | ||
761 | || state.cfa.reg >= ARRAY_SIZE(reg_info) | ||
762 | || reg_info[state.cfa.reg].width != sizeof(unsigned long) | ||
763 | || state.cfa.offs % sizeof(unsigned long)) | ||
764 | return -EIO; | ||
765 | /* update frame */ | ||
766 | cfa = FRAME_REG(state.cfa.reg, unsigned long) + state.cfa.offs; | ||
767 | startLoc = min((unsigned long)UNW_SP(frame), cfa); | ||
768 | endLoc = max((unsigned long)UNW_SP(frame), cfa); | ||
769 | if (STACK_LIMIT(startLoc) != STACK_LIMIT(endLoc)) { | ||
770 | startLoc = min(STACK_LIMIT(cfa), cfa); | ||
771 | endLoc = max(STACK_LIMIT(cfa), cfa); | ||
772 | } | ||
773 | #ifndef CONFIG_64BIT | ||
774 | # define CASES CASE(8); CASE(16); CASE(32) | ||
775 | #else | ||
776 | # define CASES CASE(8); CASE(16); CASE(32); CASE(64) | ||
777 | #endif | ||
778 | for (i = 0; i < ARRAY_SIZE(state.regs); ++i) { | ||
779 | if (REG_INVALID(i)) { | ||
780 | if (state.regs[i].where == Nowhere) | ||
781 | continue; | ||
782 | return -EIO; | ||
783 | } | ||
784 | switch(state.regs[i].where) { | ||
785 | default: | ||
786 | break; | ||
787 | case Register: | ||
788 | if (state.regs[i].value >= ARRAY_SIZE(reg_info) | ||
789 | || REG_INVALID(state.regs[i].value) | ||
790 | || reg_info[i].width > reg_info[state.regs[i].value].width) | ||
791 | return -EIO; | ||
792 | switch(reg_info[state.regs[i].value].width) { | ||
793 | #define CASE(n) \ | ||
794 | case sizeof(u##n): \ | ||
795 | state.regs[i].value = FRAME_REG(state.regs[i].value, \ | ||
796 | const u##n); \ | ||
797 | break | ||
798 | CASES; | ||
799 | #undef CASE | ||
800 | default: | ||
801 | return -EIO; | ||
802 | } | ||
803 | break; | ||
804 | } | ||
805 | } | ||
806 | for (i = 0; i < ARRAY_SIZE(state.regs); ++i) { | ||
807 | if (REG_INVALID(i)) | ||
808 | continue; | ||
809 | switch(state.regs[i].where) { | ||
810 | case Nowhere: | ||
811 | if (reg_info[i].width != sizeof(UNW_SP(frame)) | ||
812 | || &FRAME_REG(i, __typeof__(UNW_SP(frame))) | ||
813 | != &UNW_SP(frame)) | ||
814 | continue; | ||
815 | UNW_SP(frame) = cfa; | ||
816 | break; | ||
817 | case Register: | ||
818 | switch(reg_info[i].width) { | ||
819 | #define CASE(n) case sizeof(u##n): \ | ||
820 | FRAME_REG(i, u##n) = state.regs[i].value; \ | ||
821 | break | ||
822 | CASES; | ||
823 | #undef CASE | ||
824 | default: | ||
825 | return -EIO; | ||
826 | } | ||
827 | break; | ||
828 | case Value: | ||
829 | if (reg_info[i].width != sizeof(unsigned long)) | ||
830 | return -EIO; | ||
831 | FRAME_REG(i, unsigned long) = cfa + state.regs[i].value | ||
832 | * state.dataAlign; | ||
833 | break; | ||
834 | case Memory: { | ||
835 | unsigned long addr = cfa + state.regs[i].value | ||
836 | * state.dataAlign; | ||
837 | |||
838 | if ((state.regs[i].value * state.dataAlign) | ||
839 | % sizeof(unsigned long) | ||
840 | || addr < startLoc | ||
841 | || addr + sizeof(unsigned long) < addr | ||
842 | || addr + sizeof(unsigned long) > endLoc) | ||
843 | return -EIO; | ||
844 | switch(reg_info[i].width) { | ||
845 | #define CASE(n) case sizeof(u##n): \ | ||
846 | __get_user(FRAME_REG(i, u##n), (u##n *)addr); \ | ||
847 | break | ||
848 | CASES; | ||
849 | #undef CASE | ||
850 | default: | ||
851 | return -EIO; | ||
852 | } | ||
853 | } | ||
854 | break; | ||
855 | } | ||
856 | } | ||
857 | |||
858 | return 0; | ||
859 | #undef CASES | ||
860 | #undef FRAME_REG | ||
861 | } | ||
862 | EXPORT_SYMBOL(unwind); | ||
863 | |||
864 | int unwind_init_frame_info(struct unwind_frame_info *info, | ||
865 | struct task_struct *tsk, | ||
866 | /*const*/ struct pt_regs *regs) | ||
867 | { | ||
868 | info->task = tsk; | ||
869 | arch_unw_init_frame_info(info, regs); | ||
870 | |||
871 | return 0; | ||
872 | } | ||
873 | EXPORT_SYMBOL(unwind_init_frame_info); | ||
874 | |||
875 | /* | ||
876 | * Prepare to unwind a blocked task. | ||
877 | */ | ||
878 | int unwind_init_blocked(struct unwind_frame_info *info, | ||
879 | struct task_struct *tsk) | ||
880 | { | ||
881 | info->task = tsk; | ||
882 | arch_unw_init_blocked(info); | ||
883 | |||
884 | return 0; | ||
885 | } | ||
886 | EXPORT_SYMBOL(unwind_init_blocked); | ||
887 | |||
888 | /* | ||
889 | * Prepare to unwind the currently running thread. | ||
890 | */ | ||
891 | int unwind_init_running(struct unwind_frame_info *info, | ||
892 | asmlinkage int (*callback)(struct unwind_frame_info *, | ||
893 | void *arg), | ||
894 | void *arg) | ||
895 | { | ||
896 | info->task = current; | ||
897 | |||
898 | return arch_unwind_init_running(info, callback, arg); | ||
899 | } | ||
900 | EXPORT_SYMBOL(unwind_init_running); | ||
901 | |||
902 | /* | ||
903 | * Unwind until the return pointer is in user-land (or until an error | ||
904 | * occurs). Returns 0 if successful, negative number in case of | ||
905 | * error. | ||
906 | */ | ||
907 | int unwind_to_user(struct unwind_frame_info *info) | ||
908 | { | ||
909 | while (!arch_unw_user_mode(info)) { | ||
910 | int err = unwind(info); | ||
911 | |||
912 | if (err < 0) | ||
913 | return err; | ||
914 | } | ||
915 | |||
916 | return 0; | ||
917 | } | ||
918 | EXPORT_SYMBOL(unwind_to_user); | ||
diff --git a/kernel/user.c b/kernel/user.c index 2116642f42c6..6408c0424291 100644 --- a/kernel/user.c +++ b/kernel/user.c | |||
@@ -140,7 +140,7 @@ struct user_struct * alloc_uid(uid_t uid) | |||
140 | atomic_set(&new->processes, 0); | 140 | atomic_set(&new->processes, 0); |
141 | atomic_set(&new->files, 0); | 141 | atomic_set(&new->files, 0); |
142 | atomic_set(&new->sigpending, 0); | 142 | atomic_set(&new->sigpending, 0); |
143 | #ifdef CONFIG_INOTIFY | 143 | #ifdef CONFIG_INOTIFY_USER |
144 | atomic_set(&new->inotify_watches, 0); | 144 | atomic_set(&new->inotify_watches, 0); |
145 | atomic_set(&new->inotify_devs, 0); | 145 | atomic_set(&new->inotify_devs, 0); |
146 | #endif | 146 | #endif |
@@ -148,7 +148,7 @@ struct user_struct * alloc_uid(uid_t uid) | |||
148 | new->mq_bytes = 0; | 148 | new->mq_bytes = 0; |
149 | new->locked_shm = 0; | 149 | new->locked_shm = 0; |
150 | 150 | ||
151 | if (alloc_uid_keyring(new) < 0) { | 151 | if (alloc_uid_keyring(new, current) < 0) { |
152 | kmem_cache_free(uid_cachep, new); | 152 | kmem_cache_free(uid_cachep, new); |
153 | return NULL; | 153 | return NULL; |
154 | } | 154 | } |
diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 880fb415a8f6..565cf7a1febd 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c | |||
@@ -428,22 +428,34 @@ int schedule_delayed_work_on(int cpu, | |||
428 | return ret; | 428 | return ret; |
429 | } | 429 | } |
430 | 430 | ||
431 | int schedule_on_each_cpu(void (*func) (void *info), void *info) | 431 | /** |
432 | * schedule_on_each_cpu - call a function on each online CPU from keventd | ||
433 | * @func: the function to call | ||
434 | * @info: a pointer to pass to func() | ||
435 | * | ||
436 | * Returns zero on success. | ||
437 | * Returns -ve errno on failure. | ||
438 | * | ||
439 | * Appears to be racy against CPU hotplug. | ||
440 | * | ||
441 | * schedule_on_each_cpu() is very slow. | ||
442 | */ | ||
443 | int schedule_on_each_cpu(void (*func)(void *info), void *info) | ||
432 | { | 444 | { |
433 | int cpu; | 445 | int cpu; |
434 | struct work_struct *work; | 446 | struct work_struct *works; |
435 | 447 | ||
436 | work = kmalloc(NR_CPUS * sizeof(struct work_struct), GFP_KERNEL); | 448 | works = alloc_percpu(struct work_struct); |
437 | 449 | if (!works) | |
438 | if (!work) | ||
439 | return -ENOMEM; | 450 | return -ENOMEM; |
451 | |||
440 | for_each_online_cpu(cpu) { | 452 | for_each_online_cpu(cpu) { |
441 | INIT_WORK(work + cpu, func, info); | 453 | INIT_WORK(per_cpu_ptr(works, cpu), func, info); |
442 | __queue_work(per_cpu_ptr(keventd_wq->cpu_wq, cpu), | 454 | __queue_work(per_cpu_ptr(keventd_wq->cpu_wq, cpu), |
443 | work + cpu); | 455 | per_cpu_ptr(works, cpu)); |
444 | } | 456 | } |
445 | flush_workqueue(keventd_wq); | 457 | flush_workqueue(keventd_wq); |
446 | kfree(work); | 458 | free_percpu(works); |
447 | return 0; | 459 | return 0; |
448 | } | 460 | } |
449 | 461 | ||
@@ -531,11 +543,11 @@ int current_is_keventd(void) | |||
531 | static void take_over_work(struct workqueue_struct *wq, unsigned int cpu) | 543 | static void take_over_work(struct workqueue_struct *wq, unsigned int cpu) |
532 | { | 544 | { |
533 | struct cpu_workqueue_struct *cwq = per_cpu_ptr(wq->cpu_wq, cpu); | 545 | struct cpu_workqueue_struct *cwq = per_cpu_ptr(wq->cpu_wq, cpu); |
534 | LIST_HEAD(list); | 546 | struct list_head list; |
535 | struct work_struct *work; | 547 | struct work_struct *work; |
536 | 548 | ||
537 | spin_lock_irq(&cwq->lock); | 549 | spin_lock_irq(&cwq->lock); |
538 | list_splice_init(&cwq->worklist, &list); | 550 | list_replace_init(&cwq->worklist, &list); |
539 | 551 | ||
540 | while (!list_empty(&list)) { | 552 | while (!list_empty(&list)) { |
541 | printk("Taking work for %s\n", wq->name); | 553 | printk("Taking work for %s\n", wq->name); |
@@ -578,6 +590,8 @@ static int workqueue_cpu_callback(struct notifier_block *nfb, | |||
578 | 590 | ||
579 | case CPU_UP_CANCELED: | 591 | case CPU_UP_CANCELED: |
580 | list_for_each_entry(wq, &workqueues, list) { | 592 | list_for_each_entry(wq, &workqueues, list) { |
593 | if (!per_cpu_ptr(wq->cpu_wq, hotcpu)->thread) | ||
594 | continue; | ||
581 | /* Unbind so it can run. */ | 595 | /* Unbind so it can run. */ |
582 | kthread_bind(per_cpu_ptr(wq->cpu_wq, hotcpu)->thread, | 596 | kthread_bind(per_cpu_ptr(wq->cpu_wq, hotcpu)->thread, |
583 | any_online_cpu(cpu_online_map)); | 597 | any_online_cpu(cpu_online_map)); |