diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/Makefile | 1 | ||||
-rw-r--r-- | kernel/acct.c | 2 | ||||
-rw-r--r-- | kernel/audit.c | 205 | ||||
-rw-r--r-- | kernel/audit.h | 61 | ||||
-rw-r--r-- | kernel/auditfilter.c | 899 | ||||
-rw-r--r-- | kernel/auditsc.c | 649 | ||||
-rw-r--r-- | kernel/compat.c | 23 | ||||
-rw-r--r-- | kernel/cpuset.c | 16 | ||||
-rw-r--r-- | kernel/exit.c | 5 | ||||
-rw-r--r-- | kernel/fork.c | 5 | ||||
-rw-r--r-- | kernel/futex.c | 8 | ||||
-rw-r--r-- | kernel/hrtimer.c | 4 | ||||
-rw-r--r-- | kernel/intermodule.c | 184 | ||||
-rw-r--r-- | kernel/irq/handle.c | 5 | ||||
-rw-r--r-- | kernel/irq/migration.c | 4 | ||||
-rw-r--r-- | kernel/irq/proc.c | 3 | ||||
-rw-r--r-- | kernel/irq/spurious.c | 12 | ||||
-rw-r--r-- | kernel/kexec.c | 6 | ||||
-rw-r--r-- | kernel/ksysfs.c | 19 | ||||
-rw-r--r-- | kernel/power/main.c | 4 | ||||
-rw-r--r-- | kernel/power/power.h | 6 | ||||
-rw-r--r-- | kernel/power/snapshot.c | 260 | ||||
-rw-r--r-- | kernel/power/swsusp.c | 32 | ||||
-rw-r--r-- | kernel/printk.c | 28 | ||||
-rw-r--r-- | kernel/rcupdate.c | 13 | ||||
-rw-r--r-- | kernel/sched.c | 12 | ||||
-rw-r--r-- | kernel/signal.c | 2 | ||||
-rw-r--r-- | kernel/sys.c | 70 | ||||
-rw-r--r-- | kernel/sys_ni.c | 2 | ||||
-rw-r--r-- | kernel/sysctl.c | 15 | ||||
-rw-r--r-- | kernel/timer.c | 30 | ||||
-rw-r--r-- | kernel/user.c | 4 | ||||
-rw-r--r-- | kernel/workqueue.c | 4 |
33 files changed, 1964 insertions, 629 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index 58908f9d15..f6ef00f4f9 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
@@ -20,7 +20,6 @@ obj-$(CONFIG_SMP) += cpu.o spinlock.o | |||
20 | obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o | 20 | obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o |
21 | obj-$(CONFIG_UID16) += uid16.o | 21 | obj-$(CONFIG_UID16) += uid16.o |
22 | obj-$(CONFIG_MODULES) += module.o | 22 | obj-$(CONFIG_MODULES) += module.o |
23 | obj-$(CONFIG_OBSOLETE_INTERMODULE) += intermodule.o | ||
24 | obj-$(CONFIG_KALLSYMS) += kallsyms.o | 23 | obj-$(CONFIG_KALLSYMS) += kallsyms.o |
25 | obj-$(CONFIG_PM) += power/ | 24 | obj-$(CONFIG_PM) += power/ |
26 | obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o | 25 | obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o |
diff --git a/kernel/acct.c b/kernel/acct.c index b327f4d201..6802020e0c 100644 --- a/kernel/acct.c +++ b/kernel/acct.c | |||
@@ -118,7 +118,7 @@ static int check_free_space(struct file *file) | |||
118 | spin_unlock(&acct_globals.lock); | 118 | spin_unlock(&acct_globals.lock); |
119 | 119 | ||
120 | /* May block */ | 120 | /* May block */ |
121 | if (vfs_statfs(file->f_dentry->d_inode->i_sb, &sbuf)) | 121 | if (vfs_statfs(file->f_dentry, &sbuf)) |
122 | return res; | 122 | return res; |
123 | suspend = sbuf.f_blocks * SUSPEND; | 123 | suspend = sbuf.f_blocks * SUSPEND; |
124 | resume = sbuf.f_blocks * RESUME; | 124 | resume = sbuf.f_blocks * RESUME; |
diff --git a/kernel/audit.c b/kernel/audit.c index df57b493e1..7dfac7031b 100644 --- a/kernel/audit.c +++ b/kernel/audit.c | |||
@@ -56,6 +56,7 @@ | |||
56 | #include <linux/skbuff.h> | 56 | #include <linux/skbuff.h> |
57 | #include <linux/netlink.h> | 57 | #include <linux/netlink.h> |
58 | #include <linux/selinux.h> | 58 | #include <linux/selinux.h> |
59 | #include <linux/inotify.h> | ||
59 | 60 | ||
60 | #include "audit.h" | 61 | #include "audit.h" |
61 | 62 | ||
@@ -89,6 +90,7 @@ static int audit_backlog_wait_overflow = 0; | |||
89 | /* The identity of the user shutting down the audit system. */ | 90 | /* The identity of the user shutting down the audit system. */ |
90 | uid_t audit_sig_uid = -1; | 91 | uid_t audit_sig_uid = -1; |
91 | pid_t audit_sig_pid = -1; | 92 | pid_t audit_sig_pid = -1; |
93 | u32 audit_sig_sid = 0; | ||
92 | 94 | ||
93 | /* Records can be lost in several ways: | 95 | /* Records can be lost in several ways: |
94 | 0) [suppressed in audit_alloc] | 96 | 0) [suppressed in audit_alloc] |
@@ -102,6 +104,12 @@ static atomic_t audit_lost = ATOMIC_INIT(0); | |||
102 | /* The netlink socket. */ | 104 | /* The netlink socket. */ |
103 | static struct sock *audit_sock; | 105 | static struct sock *audit_sock; |
104 | 106 | ||
107 | /* Inotify handle. */ | ||
108 | struct inotify_handle *audit_ih; | ||
109 | |||
110 | /* Hash for inode-based rules */ | ||
111 | struct list_head audit_inode_hash[AUDIT_INODE_BUCKETS]; | ||
112 | |||
105 | /* The audit_freelist is a list of pre-allocated audit buffers (if more | 113 | /* The audit_freelist is a list of pre-allocated audit buffers (if more |
106 | * than AUDIT_MAXFREE are in use, the audit buffer is freed instead of | 114 | * than AUDIT_MAXFREE are in use, the audit buffer is freed instead of |
107 | * being placed on the freelist). */ | 115 | * being placed on the freelist). */ |
@@ -114,10 +122,8 @@ static struct task_struct *kauditd_task; | |||
114 | static DECLARE_WAIT_QUEUE_HEAD(kauditd_wait); | 122 | static DECLARE_WAIT_QUEUE_HEAD(kauditd_wait); |
115 | static DECLARE_WAIT_QUEUE_HEAD(audit_backlog_wait); | 123 | static DECLARE_WAIT_QUEUE_HEAD(audit_backlog_wait); |
116 | 124 | ||
117 | /* The netlink socket is only to be read by 1 CPU, which lets us assume | 125 | /* Serialize requests from userspace. */ |
118 | * that list additions and deletions never happen simultaneously in | 126 | static DEFINE_MUTEX(audit_cmd_mutex); |
119 | * auditsc.c */ | ||
120 | DEFINE_MUTEX(audit_netlink_mutex); | ||
121 | 127 | ||
122 | /* AUDIT_BUFSIZ is the size of the temporary buffer used for formatting | 128 | /* AUDIT_BUFSIZ is the size of the temporary buffer used for formatting |
123 | * audit records. Since printk uses a 1024 byte buffer, this buffer | 129 | * audit records. Since printk uses a 1024 byte buffer, this buffer |
@@ -250,7 +256,7 @@ static int audit_set_rate_limit(int limit, uid_t loginuid, u32 sid) | |||
250 | "audit_rate_limit=%d old=%d by auid=%u", | 256 | "audit_rate_limit=%d old=%d by auid=%u", |
251 | limit, old, loginuid); | 257 | limit, old, loginuid); |
252 | audit_rate_limit = limit; | 258 | audit_rate_limit = limit; |
253 | return old; | 259 | return 0; |
254 | } | 260 | } |
255 | 261 | ||
256 | static int audit_set_backlog_limit(int limit, uid_t loginuid, u32 sid) | 262 | static int audit_set_backlog_limit(int limit, uid_t loginuid, u32 sid) |
@@ -273,7 +279,7 @@ static int audit_set_backlog_limit(int limit, uid_t loginuid, u32 sid) | |||
273 | "audit_backlog_limit=%d old=%d by auid=%u", | 279 | "audit_backlog_limit=%d old=%d by auid=%u", |
274 | limit, old, loginuid); | 280 | limit, old, loginuid); |
275 | audit_backlog_limit = limit; | 281 | audit_backlog_limit = limit; |
276 | return old; | 282 | return 0; |
277 | } | 283 | } |
278 | 284 | ||
279 | static int audit_set_enabled(int state, uid_t loginuid, u32 sid) | 285 | static int audit_set_enabled(int state, uid_t loginuid, u32 sid) |
@@ -299,7 +305,7 @@ static int audit_set_enabled(int state, uid_t loginuid, u32 sid) | |||
299 | "audit_enabled=%d old=%d by auid=%u", | 305 | "audit_enabled=%d old=%d by auid=%u", |
300 | state, old, loginuid); | 306 | state, old, loginuid); |
301 | audit_enabled = state; | 307 | audit_enabled = state; |
302 | return old; | 308 | return 0; |
303 | } | 309 | } |
304 | 310 | ||
305 | static int audit_set_failure(int state, uid_t loginuid, u32 sid) | 311 | static int audit_set_failure(int state, uid_t loginuid, u32 sid) |
@@ -327,7 +333,7 @@ static int audit_set_failure(int state, uid_t loginuid, u32 sid) | |||
327 | "audit_failure=%d old=%d by auid=%u", | 333 | "audit_failure=%d old=%d by auid=%u", |
328 | state, old, loginuid); | 334 | state, old, loginuid); |
329 | audit_failure = state; | 335 | audit_failure = state; |
330 | return old; | 336 | return 0; |
331 | } | 337 | } |
332 | 338 | ||
333 | static int kauditd_thread(void *dummy) | 339 | static int kauditd_thread(void *dummy) |
@@ -363,9 +369,52 @@ static int kauditd_thread(void *dummy) | |||
363 | remove_wait_queue(&kauditd_wait, &wait); | 369 | remove_wait_queue(&kauditd_wait, &wait); |
364 | } | 370 | } |
365 | } | 371 | } |
372 | } | ||
373 | |||
374 | int audit_send_list(void *_dest) | ||
375 | { | ||
376 | struct audit_netlink_list *dest = _dest; | ||
377 | int pid = dest->pid; | ||
378 | struct sk_buff *skb; | ||
379 | |||
380 | /* wait for parent to finish and send an ACK */ | ||
381 | mutex_lock(&audit_cmd_mutex); | ||
382 | mutex_unlock(&audit_cmd_mutex); | ||
383 | |||
384 | while ((skb = __skb_dequeue(&dest->q)) != NULL) | ||
385 | netlink_unicast(audit_sock, skb, pid, 0); | ||
386 | |||
387 | kfree(dest); | ||
388 | |||
366 | return 0; | 389 | return 0; |
367 | } | 390 | } |
368 | 391 | ||
392 | struct sk_buff *audit_make_reply(int pid, int seq, int type, int done, | ||
393 | int multi, void *payload, int size) | ||
394 | { | ||
395 | struct sk_buff *skb; | ||
396 | struct nlmsghdr *nlh; | ||
397 | int len = NLMSG_SPACE(size); | ||
398 | void *data; | ||
399 | int flags = multi ? NLM_F_MULTI : 0; | ||
400 | int t = done ? NLMSG_DONE : type; | ||
401 | |||
402 | skb = alloc_skb(len, GFP_KERNEL); | ||
403 | if (!skb) | ||
404 | return NULL; | ||
405 | |||
406 | nlh = NLMSG_PUT(skb, pid, seq, t, size); | ||
407 | nlh->nlmsg_flags = flags; | ||
408 | data = NLMSG_DATA(nlh); | ||
409 | memcpy(data, payload, size); | ||
410 | return skb; | ||
411 | |||
412 | nlmsg_failure: /* Used by NLMSG_PUT */ | ||
413 | if (skb) | ||
414 | kfree_skb(skb); | ||
415 | return NULL; | ||
416 | } | ||
417 | |||
369 | /** | 418 | /** |
370 | * audit_send_reply - send an audit reply message via netlink | 419 | * audit_send_reply - send an audit reply message via netlink |
371 | * @pid: process id to send reply to | 420 | * @pid: process id to send reply to |
@@ -383,29 +432,13 @@ void audit_send_reply(int pid, int seq, int type, int done, int multi, | |||
383 | void *payload, int size) | 432 | void *payload, int size) |
384 | { | 433 | { |
385 | struct sk_buff *skb; | 434 | struct sk_buff *skb; |
386 | struct nlmsghdr *nlh; | 435 | skb = audit_make_reply(pid, seq, type, done, multi, payload, size); |
387 | int len = NLMSG_SPACE(size); | ||
388 | void *data; | ||
389 | int flags = multi ? NLM_F_MULTI : 0; | ||
390 | int t = done ? NLMSG_DONE : type; | ||
391 | |||
392 | skb = alloc_skb(len, GFP_KERNEL); | ||
393 | if (!skb) | 436 | if (!skb) |
394 | return; | 437 | return; |
395 | |||
396 | nlh = NLMSG_PUT(skb, pid, seq, t, size); | ||
397 | nlh->nlmsg_flags = flags; | ||
398 | data = NLMSG_DATA(nlh); | ||
399 | memcpy(data, payload, size); | ||
400 | |||
401 | /* Ignore failure. It'll only happen if the sender goes away, | 438 | /* Ignore failure. It'll only happen if the sender goes away, |
402 | because our timeout is set to infinite. */ | 439 | because our timeout is set to infinite. */ |
403 | netlink_unicast(audit_sock, skb, pid, 0); | 440 | netlink_unicast(audit_sock, skb, pid, 0); |
404 | return; | 441 | return; |
405 | |||
406 | nlmsg_failure: /* Used by NLMSG_PUT */ | ||
407 | if (skb) | ||
408 | kfree_skb(skb); | ||
409 | } | 442 | } |
410 | 443 | ||
411 | /* | 444 | /* |
@@ -451,7 +484,9 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | |||
451 | struct audit_buffer *ab; | 484 | struct audit_buffer *ab; |
452 | u16 msg_type = nlh->nlmsg_type; | 485 | u16 msg_type = nlh->nlmsg_type; |
453 | uid_t loginuid; /* loginuid of sender */ | 486 | uid_t loginuid; /* loginuid of sender */ |
454 | struct audit_sig_info sig_data; | 487 | struct audit_sig_info *sig_data; |
488 | char *ctx; | ||
489 | u32 len; | ||
455 | 490 | ||
456 | err = audit_netlink_ok(NETLINK_CB(skb).eff_cap, msg_type); | 491 | err = audit_netlink_ok(NETLINK_CB(skb).eff_cap, msg_type); |
457 | if (err) | 492 | if (err) |
@@ -503,12 +538,9 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | |||
503 | if (status_get->mask & AUDIT_STATUS_PID) { | 538 | if (status_get->mask & AUDIT_STATUS_PID) { |
504 | int old = audit_pid; | 539 | int old = audit_pid; |
505 | if (sid) { | 540 | if (sid) { |
506 | char *ctx = NULL; | 541 | if ((err = selinux_ctxid_to_string( |
507 | u32 len; | ||
508 | int rc; | ||
509 | if ((rc = selinux_ctxid_to_string( | ||
510 | sid, &ctx, &len))) | 542 | sid, &ctx, &len))) |
511 | return rc; | 543 | return err; |
512 | else | 544 | else |
513 | audit_log(NULL, GFP_KERNEL, | 545 | audit_log(NULL, GFP_KERNEL, |
514 | AUDIT_CONFIG_CHANGE, | 546 | AUDIT_CONFIG_CHANGE, |
@@ -523,10 +555,10 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | |||
523 | audit_pid = status_get->pid; | 555 | audit_pid = status_get->pid; |
524 | } | 556 | } |
525 | if (status_get->mask & AUDIT_STATUS_RATE_LIMIT) | 557 | if (status_get->mask & AUDIT_STATUS_RATE_LIMIT) |
526 | audit_set_rate_limit(status_get->rate_limit, | 558 | err = audit_set_rate_limit(status_get->rate_limit, |
527 | loginuid, sid); | 559 | loginuid, sid); |
528 | if (status_get->mask & AUDIT_STATUS_BACKLOG_LIMIT) | 560 | if (status_get->mask & AUDIT_STATUS_BACKLOG_LIMIT) |
529 | audit_set_backlog_limit(status_get->backlog_limit, | 561 | err = audit_set_backlog_limit(status_get->backlog_limit, |
530 | loginuid, sid); | 562 | loginuid, sid); |
531 | break; | 563 | break; |
532 | case AUDIT_USER: | 564 | case AUDIT_USER: |
@@ -544,8 +576,6 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | |||
544 | "user pid=%d uid=%u auid=%u", | 576 | "user pid=%d uid=%u auid=%u", |
545 | pid, uid, loginuid); | 577 | pid, uid, loginuid); |
546 | if (sid) { | 578 | if (sid) { |
547 | char *ctx = NULL; | ||
548 | u32 len; | ||
549 | if (selinux_ctxid_to_string( | 579 | if (selinux_ctxid_to_string( |
550 | sid, &ctx, &len)) { | 580 | sid, &ctx, &len)) { |
551 | audit_log_format(ab, | 581 | audit_log_format(ab, |
@@ -584,10 +614,21 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | |||
584 | loginuid, sid); | 614 | loginuid, sid); |
585 | break; | 615 | break; |
586 | case AUDIT_SIGNAL_INFO: | 616 | case AUDIT_SIGNAL_INFO: |
587 | sig_data.uid = audit_sig_uid; | 617 | err = selinux_ctxid_to_string(audit_sig_sid, &ctx, &len); |
588 | sig_data.pid = audit_sig_pid; | 618 | if (err) |
619 | return err; | ||
620 | sig_data = kmalloc(sizeof(*sig_data) + len, GFP_KERNEL); | ||
621 | if (!sig_data) { | ||
622 | kfree(ctx); | ||
623 | return -ENOMEM; | ||
624 | } | ||
625 | sig_data->uid = audit_sig_uid; | ||
626 | sig_data->pid = audit_sig_pid; | ||
627 | memcpy(sig_data->ctx, ctx, len); | ||
628 | kfree(ctx); | ||
589 | audit_send_reply(NETLINK_CB(skb).pid, seq, AUDIT_SIGNAL_INFO, | 629 | audit_send_reply(NETLINK_CB(skb).pid, seq, AUDIT_SIGNAL_INFO, |
590 | 0, 0, &sig_data, sizeof(sig_data)); | 630 | 0, 0, sig_data, sizeof(*sig_data) + len); |
631 | kfree(sig_data); | ||
591 | break; | 632 | break; |
592 | default: | 633 | default: |
593 | err = -EINVAL; | 634 | err = -EINVAL; |
@@ -629,20 +670,30 @@ static void audit_receive(struct sock *sk, int length) | |||
629 | struct sk_buff *skb; | 670 | struct sk_buff *skb; |
630 | unsigned int qlen; | 671 | unsigned int qlen; |
631 | 672 | ||
632 | mutex_lock(&audit_netlink_mutex); | 673 | mutex_lock(&audit_cmd_mutex); |
633 | 674 | ||
634 | for (qlen = skb_queue_len(&sk->sk_receive_queue); qlen; qlen--) { | 675 | for (qlen = skb_queue_len(&sk->sk_receive_queue); qlen; qlen--) { |
635 | skb = skb_dequeue(&sk->sk_receive_queue); | 676 | skb = skb_dequeue(&sk->sk_receive_queue); |
636 | audit_receive_skb(skb); | 677 | audit_receive_skb(skb); |
637 | kfree_skb(skb); | 678 | kfree_skb(skb); |
638 | } | 679 | } |
639 | mutex_unlock(&audit_netlink_mutex); | 680 | mutex_unlock(&audit_cmd_mutex); |
640 | } | 681 | } |
641 | 682 | ||
683 | #ifdef CONFIG_AUDITSYSCALL | ||
684 | static const struct inotify_operations audit_inotify_ops = { | ||
685 | .handle_event = audit_handle_ievent, | ||
686 | .destroy_watch = audit_free_parent, | ||
687 | }; | ||
688 | #endif | ||
642 | 689 | ||
643 | /* Initialize audit support at boot time. */ | 690 | /* Initialize audit support at boot time. */ |
644 | static int __init audit_init(void) | 691 | static int __init audit_init(void) |
645 | { | 692 | { |
693 | #ifdef CONFIG_AUDITSYSCALL | ||
694 | int i; | ||
695 | #endif | ||
696 | |||
646 | printk(KERN_INFO "audit: initializing netlink socket (%s)\n", | 697 | printk(KERN_INFO "audit: initializing netlink socket (%s)\n", |
647 | audit_default ? "enabled" : "disabled"); | 698 | audit_default ? "enabled" : "disabled"); |
648 | audit_sock = netlink_kernel_create(NETLINK_AUDIT, 0, audit_receive, | 699 | audit_sock = netlink_kernel_create(NETLINK_AUDIT, 0, audit_receive, |
@@ -661,6 +712,16 @@ static int __init audit_init(void) | |||
661 | selinux_audit_set_callback(&selinux_audit_rule_update); | 712 | selinux_audit_set_callback(&selinux_audit_rule_update); |
662 | 713 | ||
663 | audit_log(NULL, GFP_KERNEL, AUDIT_KERNEL, "initialized"); | 714 | audit_log(NULL, GFP_KERNEL, AUDIT_KERNEL, "initialized"); |
715 | |||
716 | #ifdef CONFIG_AUDITSYSCALL | ||
717 | audit_ih = inotify_init(&audit_inotify_ops); | ||
718 | if (IS_ERR(audit_ih)) | ||
719 | audit_panic("cannot initialize inotify handle"); | ||
720 | |||
721 | for (i = 0; i < AUDIT_INODE_BUCKETS; i++) | ||
722 | INIT_LIST_HEAD(&audit_inode_hash[i]); | ||
723 | #endif | ||
724 | |||
664 | return 0; | 725 | return 0; |
665 | } | 726 | } |
666 | __initcall(audit_init); | 727 | __initcall(audit_init); |
@@ -690,10 +751,12 @@ static void audit_buffer_free(struct audit_buffer *ab) | |||
690 | kfree_skb(ab->skb); | 751 | kfree_skb(ab->skb); |
691 | 752 | ||
692 | spin_lock_irqsave(&audit_freelist_lock, flags); | 753 | spin_lock_irqsave(&audit_freelist_lock, flags); |
693 | if (++audit_freelist_count > AUDIT_MAXFREE) | 754 | if (audit_freelist_count > AUDIT_MAXFREE) |
694 | kfree(ab); | 755 | kfree(ab); |
695 | else | 756 | else { |
757 | audit_freelist_count++; | ||
696 | list_add(&ab->list, &audit_freelist); | 758 | list_add(&ab->list, &audit_freelist); |
759 | } | ||
697 | spin_unlock_irqrestore(&audit_freelist_lock, flags); | 760 | spin_unlock_irqrestore(&audit_freelist_lock, flags); |
698 | } | 761 | } |
699 | 762 | ||
@@ -988,28 +1051,76 @@ void audit_log_hex(struct audit_buffer *ab, const unsigned char *buf, | |||
988 | skb_put(skb, len << 1); /* new string is twice the old string */ | 1051 | skb_put(skb, len << 1); /* new string is twice the old string */ |
989 | } | 1052 | } |
990 | 1053 | ||
1054 | /* | ||
1055 | * Format a string of no more than slen characters into the audit buffer, | ||
1056 | * enclosed in quote marks. | ||
1057 | */ | ||
1058 | static void audit_log_n_string(struct audit_buffer *ab, size_t slen, | ||
1059 | const char *string) | ||
1060 | { | ||
1061 | int avail, new_len; | ||
1062 | unsigned char *ptr; | ||
1063 | struct sk_buff *skb; | ||
1064 | |||
1065 | BUG_ON(!ab->skb); | ||
1066 | skb = ab->skb; | ||
1067 | avail = skb_tailroom(skb); | ||
1068 | new_len = slen + 3; /* enclosing quotes + null terminator */ | ||
1069 | if (new_len > avail) { | ||
1070 | avail = audit_expand(ab, new_len); | ||
1071 | if (!avail) | ||
1072 | return; | ||
1073 | } | ||
1074 | ptr = skb->tail; | ||
1075 | *ptr++ = '"'; | ||
1076 | memcpy(ptr, string, slen); | ||
1077 | ptr += slen; | ||
1078 | *ptr++ = '"'; | ||
1079 | *ptr = 0; | ||
1080 | skb_put(skb, slen + 2); /* don't include null terminator */ | ||
1081 | } | ||
1082 | |||
991 | /** | 1083 | /** |
992 | * audit_log_unstrustedstring - log a string that may contain random characters | 1084 | * audit_log_n_unstrustedstring - log a string that may contain random characters |
993 | * @ab: audit_buffer | 1085 | * @ab: audit_buffer |
1086 | * @len: lenth of string (not including trailing null) | ||
994 | * @string: string to be logged | 1087 | * @string: string to be logged |
995 | * | 1088 | * |
996 | * This code will escape a string that is passed to it if the string | 1089 | * This code will escape a string that is passed to it if the string |
997 | * contains a control character, unprintable character, double quote mark, | 1090 | * contains a control character, unprintable character, double quote mark, |
998 | * or a space. Unescaped strings will start and end with a double quote mark. | 1091 | * or a space. Unescaped strings will start and end with a double quote mark. |
999 | * Strings that are escaped are printed in hex (2 digits per char). | 1092 | * Strings that are escaped are printed in hex (2 digits per char). |
1093 | * | ||
1094 | * The caller specifies the number of characters in the string to log, which may | ||
1095 | * or may not be the entire string. | ||
1000 | */ | 1096 | */ |
1001 | void audit_log_untrustedstring(struct audit_buffer *ab, const char *string) | 1097 | const char *audit_log_n_untrustedstring(struct audit_buffer *ab, size_t len, |
1098 | const char *string) | ||
1002 | { | 1099 | { |
1003 | const unsigned char *p = string; | 1100 | const unsigned char *p = string; |
1004 | 1101 | ||
1005 | while (*p) { | 1102 | while (*p) { |
1006 | if (*p == '"' || *p < 0x21 || *p > 0x7f) { | 1103 | if (*p == '"' || *p < 0x21 || *p > 0x7f) { |
1007 | audit_log_hex(ab, string, strlen(string)); | 1104 | audit_log_hex(ab, string, len); |
1008 | return; | 1105 | return string + len + 1; |
1009 | } | 1106 | } |
1010 | p++; | 1107 | p++; |
1011 | } | 1108 | } |
1012 | audit_log_format(ab, "\"%s\"", string); | 1109 | audit_log_n_string(ab, len, string); |
1110 | return p + 1; | ||
1111 | } | ||
1112 | |||
1113 | /** | ||
1114 | * audit_log_unstrustedstring - log a string that may contain random characters | ||
1115 | * @ab: audit_buffer | ||
1116 | * @string: string to be logged | ||
1117 | * | ||
1118 | * Same as audit_log_n_unstrustedstring(), except that strlen is used to | ||
1119 | * determine string length. | ||
1120 | */ | ||
1121 | const char *audit_log_untrustedstring(struct audit_buffer *ab, const char *string) | ||
1122 | { | ||
1123 | return audit_log_n_untrustedstring(ab, strlen(string), string); | ||
1013 | } | 1124 | } |
1014 | 1125 | ||
1015 | /* This is a helper-function to print the escaped d_path */ | 1126 | /* This is a helper-function to print the escaped d_path */ |
diff --git a/kernel/audit.h b/kernel/audit.h index 6f733920fd..8323e4132a 100644 --- a/kernel/audit.h +++ b/kernel/audit.h | |||
@@ -19,9 +19,9 @@ | |||
19 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 19 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
20 | */ | 20 | */ |
21 | 21 | ||
22 | #include <linux/mutex.h> | ||
23 | #include <linux/fs.h> | 22 | #include <linux/fs.h> |
24 | #include <linux/audit.h> | 23 | #include <linux/audit.h> |
24 | #include <linux/skbuff.h> | ||
25 | 25 | ||
26 | /* 0 = no checking | 26 | /* 0 = no checking |
27 | 1 = put_count checking | 27 | 1 = put_count checking |
@@ -53,6 +53,18 @@ enum audit_state { | |||
53 | }; | 53 | }; |
54 | 54 | ||
55 | /* Rule lists */ | 55 | /* Rule lists */ |
56 | struct audit_parent; | ||
57 | |||
58 | struct audit_watch { | ||
59 | atomic_t count; /* reference count */ | ||
60 | char *path; /* insertion path */ | ||
61 | dev_t dev; /* associated superblock device */ | ||
62 | unsigned long ino; /* associated inode number */ | ||
63 | struct audit_parent *parent; /* associated parent */ | ||
64 | struct list_head wlist; /* entry in parent->watches list */ | ||
65 | struct list_head rules; /* associated rules */ | ||
66 | }; | ||
67 | |||
56 | struct audit_field { | 68 | struct audit_field { |
57 | u32 type; | 69 | u32 type; |
58 | u32 val; | 70 | u32 val; |
@@ -70,6 +82,9 @@ struct audit_krule { | |||
70 | u32 buflen; /* for data alloc on list rules */ | 82 | u32 buflen; /* for data alloc on list rules */ |
71 | u32 field_count; | 83 | u32 field_count; |
72 | struct audit_field *fields; | 84 | struct audit_field *fields; |
85 | struct audit_field *inode_f; /* quick access to an inode field */ | ||
86 | struct audit_watch *watch; /* associated watch */ | ||
87 | struct list_head rlist; /* entry in audit_watch.rules list */ | ||
73 | }; | 88 | }; |
74 | 89 | ||
75 | struct audit_entry { | 90 | struct audit_entry { |
@@ -78,15 +93,53 @@ struct audit_entry { | |||
78 | struct audit_krule rule; | 93 | struct audit_krule rule; |
79 | }; | 94 | }; |
80 | 95 | ||
81 | |||
82 | extern int audit_pid; | 96 | extern int audit_pid; |
83 | extern int audit_comparator(const u32 left, const u32 op, const u32 right); | ||
84 | 97 | ||
98 | #define AUDIT_INODE_BUCKETS 32 | ||
99 | extern struct list_head audit_inode_hash[AUDIT_INODE_BUCKETS]; | ||
100 | |||
101 | static inline int audit_hash_ino(u32 ino) | ||
102 | { | ||
103 | return (ino & (AUDIT_INODE_BUCKETS-1)); | ||
104 | } | ||
105 | |||
106 | extern int audit_comparator(const u32 left, const u32 op, const u32 right); | ||
107 | extern int audit_compare_dname_path(const char *dname, const char *path, | ||
108 | int *dirlen); | ||
109 | extern struct sk_buff * audit_make_reply(int pid, int seq, int type, | ||
110 | int done, int multi, | ||
111 | void *payload, int size); | ||
85 | extern void audit_send_reply(int pid, int seq, int type, | 112 | extern void audit_send_reply(int pid, int seq, int type, |
86 | int done, int multi, | 113 | int done, int multi, |
87 | void *payload, int size); | 114 | void *payload, int size); |
88 | extern void audit_log_lost(const char *message); | 115 | extern void audit_log_lost(const char *message); |
89 | extern void audit_panic(const char *message); | 116 | extern void audit_panic(const char *message); |
90 | extern struct mutex audit_netlink_mutex; | ||
91 | 117 | ||
118 | struct audit_netlink_list { | ||
119 | int pid; | ||
120 | struct sk_buff_head q; | ||
121 | }; | ||
122 | |||
123 | int audit_send_list(void *); | ||
124 | |||
125 | struct inotify_watch; | ||
126 | extern void audit_free_parent(struct inotify_watch *); | ||
127 | extern void audit_handle_ievent(struct inotify_watch *, u32, u32, u32, | ||
128 | const char *, struct inode *); | ||
92 | extern int selinux_audit_rule_update(void); | 129 | extern int selinux_audit_rule_update(void); |
130 | |||
131 | #ifdef CONFIG_AUDITSYSCALL | ||
132 | extern void __audit_signal_info(int sig, struct task_struct *t); | ||
133 | static inline void audit_signal_info(int sig, struct task_struct *t) | ||
134 | { | ||
135 | if (unlikely(audit_pid && t->tgid == audit_pid)) | ||
136 | __audit_signal_info(sig, t); | ||
137 | } | ||
138 | extern enum audit_state audit_filter_inodes(struct task_struct *, | ||
139 | struct audit_context *); | ||
140 | extern void audit_set_auditable(struct audit_context *); | ||
141 | #else | ||
142 | #define audit_signal_info(s,t) | ||
143 | #define audit_filter_inodes(t,c) AUDIT_DISABLED | ||
144 | #define audit_set_auditable(c) | ||
145 | #endif | ||
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index 7c134906d6..4c99d2c586 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c | |||
@@ -22,13 +22,59 @@ | |||
22 | #include <linux/kernel.h> | 22 | #include <linux/kernel.h> |
23 | #include <linux/audit.h> | 23 | #include <linux/audit.h> |
24 | #include <linux/kthread.h> | 24 | #include <linux/kthread.h> |
25 | #include <linux/mutex.h> | ||
26 | #include <linux/fs.h> | ||
27 | #include <linux/namei.h> | ||
25 | #include <linux/netlink.h> | 28 | #include <linux/netlink.h> |
29 | #include <linux/sched.h> | ||
30 | #include <linux/inotify.h> | ||
26 | #include <linux/selinux.h> | 31 | #include <linux/selinux.h> |
27 | #include "audit.h" | 32 | #include "audit.h" |
28 | 33 | ||
29 | /* There are three lists of rules -- one to search at task creation | 34 | /* |
30 | * time, one to search at syscall entry time, and another to search at | 35 | * Locking model: |
31 | * syscall exit time. */ | 36 | * |
37 | * audit_filter_mutex: | ||
38 | * Synchronizes writes and blocking reads of audit's filterlist | ||
39 | * data. Rcu is used to traverse the filterlist and access | ||
40 | * contents of structs audit_entry, audit_watch and opaque | ||
41 | * selinux rules during filtering. If modified, these structures | ||
42 | * must be copied and replace their counterparts in the filterlist. | ||
43 | * An audit_parent struct is not accessed during filtering, so may | ||
44 | * be written directly provided audit_filter_mutex is held. | ||
45 | */ | ||
46 | |||
47 | /* | ||
48 | * Reference counting: | ||
49 | * | ||
50 | * audit_parent: lifetime is from audit_init_parent() to receipt of an IN_IGNORED | ||
51 | * event. Each audit_watch holds a reference to its associated parent. | ||
52 | * | ||
53 | * audit_watch: if added to lists, lifetime is from audit_init_watch() to | ||
54 | * audit_remove_watch(). Additionally, an audit_watch may exist | ||
55 | * temporarily to assist in searching existing filter data. Each | ||
56 | * audit_krule holds a reference to its associated watch. | ||
57 | */ | ||
58 | |||
59 | struct audit_parent { | ||
60 | struct list_head ilist; /* entry in inotify registration list */ | ||
61 | struct list_head watches; /* associated watches */ | ||
62 | struct inotify_watch wdata; /* inotify watch data */ | ||
63 | unsigned flags; /* status flags */ | ||
64 | }; | ||
65 | |||
66 | /* | ||
67 | * audit_parent status flags: | ||
68 | * | ||
69 | * AUDIT_PARENT_INVALID - set anytime rules/watches are auto-removed due to | ||
70 | * a filesystem event to ensure we're adding audit watches to a valid parent. | ||
71 | * Technically not needed for IN_DELETE_SELF or IN_UNMOUNT events, as we cannot | ||
72 | * receive them while we have nameidata, but must be used for IN_MOVE_SELF which | ||
73 | * we can receive while holding nameidata. | ||
74 | */ | ||
75 | #define AUDIT_PARENT_INVALID 0x001 | ||
76 | |||
77 | /* Audit filter lists, defined in <linux/audit.h> */ | ||
32 | struct list_head audit_filter_list[AUDIT_NR_FILTERS] = { | 78 | struct list_head audit_filter_list[AUDIT_NR_FILTERS] = { |
33 | LIST_HEAD_INIT(audit_filter_list[0]), | 79 | LIST_HEAD_INIT(audit_filter_list[0]), |
34 | LIST_HEAD_INIT(audit_filter_list[1]), | 80 | LIST_HEAD_INIT(audit_filter_list[1]), |
@@ -41,9 +87,53 @@ struct list_head audit_filter_list[AUDIT_NR_FILTERS] = { | |||
41 | #endif | 87 | #endif |
42 | }; | 88 | }; |
43 | 89 | ||
90 | static DEFINE_MUTEX(audit_filter_mutex); | ||
91 | |||
92 | /* Inotify handle */ | ||
93 | extern struct inotify_handle *audit_ih; | ||
94 | |||
95 | /* Inotify events we care about. */ | ||
96 | #define AUDIT_IN_WATCH IN_MOVE|IN_CREATE|IN_DELETE|IN_DELETE_SELF|IN_MOVE_SELF | ||
97 | |||
98 | void audit_free_parent(struct inotify_watch *i_watch) | ||
99 | { | ||
100 | struct audit_parent *parent; | ||
101 | |||
102 | parent = container_of(i_watch, struct audit_parent, wdata); | ||
103 | WARN_ON(!list_empty(&parent->watches)); | ||
104 | kfree(parent); | ||
105 | } | ||
106 | |||
107 | static inline void audit_get_watch(struct audit_watch *watch) | ||
108 | { | ||
109 | atomic_inc(&watch->count); | ||
110 | } | ||
111 | |||
112 | static void audit_put_watch(struct audit_watch *watch) | ||
113 | { | ||
114 | if (atomic_dec_and_test(&watch->count)) { | ||
115 | WARN_ON(watch->parent); | ||
116 | WARN_ON(!list_empty(&watch->rules)); | ||
117 | kfree(watch->path); | ||
118 | kfree(watch); | ||
119 | } | ||
120 | } | ||
121 | |||
122 | static void audit_remove_watch(struct audit_watch *watch) | ||
123 | { | ||
124 | list_del(&watch->wlist); | ||
125 | put_inotify_watch(&watch->parent->wdata); | ||
126 | watch->parent = NULL; | ||
127 | audit_put_watch(watch); /* match initial get */ | ||
128 | } | ||
129 | |||
44 | static inline void audit_free_rule(struct audit_entry *e) | 130 | static inline void audit_free_rule(struct audit_entry *e) |
45 | { | 131 | { |
46 | int i; | 132 | int i; |
133 | |||
134 | /* some rules don't have associated watches */ | ||
135 | if (e->rule.watch) | ||
136 | audit_put_watch(e->rule.watch); | ||
47 | if (e->rule.fields) | 137 | if (e->rule.fields) |
48 | for (i = 0; i < e->rule.field_count; i++) { | 138 | for (i = 0; i < e->rule.field_count; i++) { |
49 | struct audit_field *f = &e->rule.fields[i]; | 139 | struct audit_field *f = &e->rule.fields[i]; |
@@ -60,6 +150,50 @@ static inline void audit_free_rule_rcu(struct rcu_head *head) | |||
60 | audit_free_rule(e); | 150 | audit_free_rule(e); |
61 | } | 151 | } |
62 | 152 | ||
153 | /* Initialize a parent watch entry. */ | ||
154 | static struct audit_parent *audit_init_parent(struct nameidata *ndp) | ||
155 | { | ||
156 | struct audit_parent *parent; | ||
157 | s32 wd; | ||
158 | |||
159 | parent = kzalloc(sizeof(*parent), GFP_KERNEL); | ||
160 | if (unlikely(!parent)) | ||
161 | return ERR_PTR(-ENOMEM); | ||
162 | |||
163 | INIT_LIST_HEAD(&parent->watches); | ||
164 | parent->flags = 0; | ||
165 | |||
166 | inotify_init_watch(&parent->wdata); | ||
167 | /* grab a ref so inotify watch hangs around until we take audit_filter_mutex */ | ||
168 | get_inotify_watch(&parent->wdata); | ||
169 | wd = inotify_add_watch(audit_ih, &parent->wdata, ndp->dentry->d_inode, | ||
170 | AUDIT_IN_WATCH); | ||
171 | if (wd < 0) { | ||
172 | audit_free_parent(&parent->wdata); | ||
173 | return ERR_PTR(wd); | ||
174 | } | ||
175 | |||
176 | return parent; | ||
177 | } | ||
178 | |||
179 | /* Initialize a watch entry. */ | ||
180 | static struct audit_watch *audit_init_watch(char *path) | ||
181 | { | ||
182 | struct audit_watch *watch; | ||
183 | |||
184 | watch = kzalloc(sizeof(*watch), GFP_KERNEL); | ||
185 | if (unlikely(!watch)) | ||
186 | return ERR_PTR(-ENOMEM); | ||
187 | |||
188 | INIT_LIST_HEAD(&watch->rules); | ||
189 | atomic_set(&watch->count, 1); | ||
190 | watch->path = path; | ||
191 | watch->dev = (dev_t)-1; | ||
192 | watch->ino = (unsigned long)-1; | ||
193 | |||
194 | return watch; | ||
195 | } | ||
196 | |||
63 | /* Initialize an audit filterlist entry. */ | 197 | /* Initialize an audit filterlist entry. */ |
64 | static inline struct audit_entry *audit_init_entry(u32 field_count) | 198 | static inline struct audit_entry *audit_init_entry(u32 field_count) |
65 | { | 199 | { |
@@ -107,6 +241,43 @@ static char *audit_unpack_string(void **bufp, size_t *remain, size_t len) | |||
107 | return str; | 241 | return str; |
108 | } | 242 | } |
109 | 243 | ||
244 | /* Translate an inode field to kernel respresentation. */ | ||
245 | static inline int audit_to_inode(struct audit_krule *krule, | ||
246 | struct audit_field *f) | ||
247 | { | ||
248 | if (krule->listnr != AUDIT_FILTER_EXIT || | ||
249 | krule->watch || krule->inode_f) | ||
250 | return -EINVAL; | ||
251 | |||
252 | krule->inode_f = f; | ||
253 | return 0; | ||
254 | } | ||
255 | |||
256 | /* Translate a watch string to kernel respresentation. */ | ||
257 | static int audit_to_watch(struct audit_krule *krule, char *path, int len, | ||
258 | u32 op) | ||
259 | { | ||
260 | struct audit_watch *watch; | ||
261 | |||
262 | if (!audit_ih) | ||
263 | return -EOPNOTSUPP; | ||
264 | |||
265 | if (path[0] != '/' || path[len-1] == '/' || | ||
266 | krule->listnr != AUDIT_FILTER_EXIT || | ||
267 | op & ~AUDIT_EQUAL || | ||
268 | krule->inode_f || krule->watch) /* 1 inode # per rule, for hash */ | ||
269 | return -EINVAL; | ||
270 | |||
271 | watch = audit_init_watch(path); | ||
272 | if (unlikely(IS_ERR(watch))) | ||
273 | return PTR_ERR(watch); | ||
274 | |||
275 | audit_get_watch(watch); | ||
276 | krule->watch = watch; | ||
277 | |||
278 | return 0; | ||
279 | } | ||
280 | |||
110 | /* Common user-space to kernel rule translation. */ | 281 | /* Common user-space to kernel rule translation. */ |
111 | static inline struct audit_entry *audit_to_entry_common(struct audit_rule *rule) | 282 | static inline struct audit_entry *audit_to_entry_common(struct audit_rule *rule) |
112 | { | 283 | { |
@@ -128,8 +299,11 @@ static inline struct audit_entry *audit_to_entry_common(struct audit_rule *rule) | |||
128 | #endif | 299 | #endif |
129 | ; | 300 | ; |
130 | } | 301 | } |
131 | if (rule->action != AUDIT_NEVER && rule->action != AUDIT_POSSIBLE && | 302 | if (unlikely(rule->action == AUDIT_POSSIBLE)) { |
132 | rule->action != AUDIT_ALWAYS) | 303 | printk(KERN_ERR "AUDIT_POSSIBLE is deprecated\n"); |
304 | goto exit_err; | ||
305 | } | ||
306 | if (rule->action != AUDIT_NEVER && rule->action != AUDIT_ALWAYS) | ||
133 | goto exit_err; | 307 | goto exit_err; |
134 | if (rule->field_count > AUDIT_MAX_FIELDS) | 308 | if (rule->field_count > AUDIT_MAX_FIELDS) |
135 | goto exit_err; | 309 | goto exit_err; |
@@ -158,6 +332,7 @@ exit_err: | |||
158 | static struct audit_entry *audit_rule_to_entry(struct audit_rule *rule) | 332 | static struct audit_entry *audit_rule_to_entry(struct audit_rule *rule) |
159 | { | 333 | { |
160 | struct audit_entry *entry; | 334 | struct audit_entry *entry; |
335 | struct audit_field *f; | ||
161 | int err = 0; | 336 | int err = 0; |
162 | int i; | 337 | int i; |
163 | 338 | ||
@@ -172,14 +347,37 @@ static struct audit_entry *audit_rule_to_entry(struct audit_rule *rule) | |||
172 | f->type = rule->fields[i] & ~(AUDIT_NEGATE|AUDIT_OPERATORS); | 347 | f->type = rule->fields[i] & ~(AUDIT_NEGATE|AUDIT_OPERATORS); |
173 | f->val = rule->values[i]; | 348 | f->val = rule->values[i]; |
174 | 349 | ||
175 | if (f->type & AUDIT_UNUSED_BITS || | 350 | err = -EINVAL; |
176 | f->type == AUDIT_SE_USER || | 351 | switch(f->type) { |
177 | f->type == AUDIT_SE_ROLE || | 352 | default: |
178 | f->type == AUDIT_SE_TYPE || | ||
179 | f->type == AUDIT_SE_SEN || | ||
180 | f->type == AUDIT_SE_CLR) { | ||
181 | err = -EINVAL; | ||
182 | goto exit_free; | 353 | goto exit_free; |
354 | case AUDIT_PID: | ||
355 | case AUDIT_UID: | ||
356 | case AUDIT_EUID: | ||
357 | case AUDIT_SUID: | ||
358 | case AUDIT_FSUID: | ||
359 | case AUDIT_GID: | ||
360 | case AUDIT_EGID: | ||
361 | case AUDIT_SGID: | ||
362 | case AUDIT_FSGID: | ||
363 | case AUDIT_LOGINUID: | ||
364 | case AUDIT_PERS: | ||
365 | case AUDIT_ARCH: | ||
366 | case AUDIT_MSGTYPE: | ||
367 | case AUDIT_DEVMAJOR: | ||
368 | case AUDIT_DEVMINOR: | ||
369 | case AUDIT_EXIT: | ||
370 | case AUDIT_SUCCESS: | ||
371 | case AUDIT_ARG0: | ||
372 | case AUDIT_ARG1: | ||
373 | case AUDIT_ARG2: | ||
374 | case AUDIT_ARG3: | ||
375 | break; | ||
376 | case AUDIT_INODE: | ||
377 | err = audit_to_inode(&entry->rule, f); | ||
378 | if (err) | ||
379 | goto exit_free; | ||
380 | break; | ||
183 | } | 381 | } |
184 | 382 | ||
185 | entry->rule.vers_ops = (f->op & AUDIT_OPERATORS) ? 2 : 1; | 383 | entry->rule.vers_ops = (f->op & AUDIT_OPERATORS) ? 2 : 1; |
@@ -196,6 +394,18 @@ static struct audit_entry *audit_rule_to_entry(struct audit_rule *rule) | |||
196 | } | 394 | } |
197 | } | 395 | } |
198 | 396 | ||
397 | f = entry->rule.inode_f; | ||
398 | if (f) { | ||
399 | switch(f->op) { | ||
400 | case AUDIT_NOT_EQUAL: | ||
401 | entry->rule.inode_f = NULL; | ||
402 | case AUDIT_EQUAL: | ||
403 | break; | ||
404 | default: | ||
405 | goto exit_free; | ||
406 | } | ||
407 | } | ||
408 | |||
199 | exit_nofree: | 409 | exit_nofree: |
200 | return entry; | 410 | return entry; |
201 | 411 | ||
@@ -210,6 +420,7 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data, | |||
210 | { | 420 | { |
211 | int err = 0; | 421 | int err = 0; |
212 | struct audit_entry *entry; | 422 | struct audit_entry *entry; |
423 | struct audit_field *f; | ||
213 | void *bufp; | 424 | void *bufp; |
214 | size_t remain = datasz - sizeof(struct audit_rule_data); | 425 | size_t remain = datasz - sizeof(struct audit_rule_data); |
215 | int i; | 426 | int i; |
@@ -235,6 +446,29 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data, | |||
235 | f->se_str = NULL; | 446 | f->se_str = NULL; |
236 | f->se_rule = NULL; | 447 | f->se_rule = NULL; |
237 | switch(f->type) { | 448 | switch(f->type) { |
449 | case AUDIT_PID: | ||
450 | case AUDIT_UID: | ||
451 | case AUDIT_EUID: | ||
452 | case AUDIT_SUID: | ||
453 | case AUDIT_FSUID: | ||
454 | case AUDIT_GID: | ||
455 | case AUDIT_EGID: | ||
456 | case AUDIT_SGID: | ||
457 | case AUDIT_FSGID: | ||
458 | case AUDIT_LOGINUID: | ||
459 | case AUDIT_PERS: | ||
460 | case AUDIT_ARCH: | ||
461 | case AUDIT_MSGTYPE: | ||
462 | case AUDIT_PPID: | ||
463 | case AUDIT_DEVMAJOR: | ||
464 | case AUDIT_DEVMINOR: | ||
465 | case AUDIT_EXIT: | ||
466 | case AUDIT_SUCCESS: | ||
467 | case AUDIT_ARG0: | ||
468 | case AUDIT_ARG1: | ||
469 | case AUDIT_ARG2: | ||
470 | case AUDIT_ARG3: | ||
471 | break; | ||
238 | case AUDIT_SE_USER: | 472 | case AUDIT_SE_USER: |
239 | case AUDIT_SE_ROLE: | 473 | case AUDIT_SE_ROLE: |
240 | case AUDIT_SE_TYPE: | 474 | case AUDIT_SE_TYPE: |
@@ -260,6 +494,37 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data, | |||
260 | } else | 494 | } else |
261 | f->se_str = str; | 495 | f->se_str = str; |
262 | break; | 496 | break; |
497 | case AUDIT_WATCH: | ||
498 | str = audit_unpack_string(&bufp, &remain, f->val); | ||
499 | if (IS_ERR(str)) | ||
500 | goto exit_free; | ||
501 | entry->rule.buflen += f->val; | ||
502 | |||
503 | err = audit_to_watch(&entry->rule, str, f->val, f->op); | ||
504 | if (err) { | ||
505 | kfree(str); | ||
506 | goto exit_free; | ||
507 | } | ||
508 | break; | ||
509 | case AUDIT_INODE: | ||
510 | err = audit_to_inode(&entry->rule, f); | ||
511 | if (err) | ||
512 | goto exit_free; | ||
513 | break; | ||
514 | default: | ||
515 | goto exit_free; | ||
516 | } | ||
517 | } | ||
518 | |||
519 | f = entry->rule.inode_f; | ||
520 | if (f) { | ||
521 | switch(f->op) { | ||
522 | case AUDIT_NOT_EQUAL: | ||
523 | entry->rule.inode_f = NULL; | ||
524 | case AUDIT_EQUAL: | ||
525 | break; | ||
526 | default: | ||
527 | goto exit_free; | ||
263 | } | 528 | } |
264 | } | 529 | } |
265 | 530 | ||
@@ -291,7 +556,7 @@ static struct audit_rule *audit_krule_to_rule(struct audit_krule *krule) | |||
291 | 556 | ||
292 | rule = kmalloc(sizeof(*rule), GFP_KERNEL); | 557 | rule = kmalloc(sizeof(*rule), GFP_KERNEL); |
293 | if (unlikely(!rule)) | 558 | if (unlikely(!rule)) |
294 | return ERR_PTR(-ENOMEM); | 559 | return NULL; |
295 | memset(rule, 0, sizeof(*rule)); | 560 | memset(rule, 0, sizeof(*rule)); |
296 | 561 | ||
297 | rule->flags = krule->flags | krule->listnr; | 562 | rule->flags = krule->flags | krule->listnr; |
@@ -322,7 +587,7 @@ static struct audit_rule_data *audit_krule_to_data(struct audit_krule *krule) | |||
322 | 587 | ||
323 | data = kmalloc(sizeof(*data) + krule->buflen, GFP_KERNEL); | 588 | data = kmalloc(sizeof(*data) + krule->buflen, GFP_KERNEL); |
324 | if (unlikely(!data)) | 589 | if (unlikely(!data)) |
325 | return ERR_PTR(-ENOMEM); | 590 | return NULL; |
326 | memset(data, 0, sizeof(*data)); | 591 | memset(data, 0, sizeof(*data)); |
327 | 592 | ||
328 | data->flags = krule->flags | krule->listnr; | 593 | data->flags = krule->flags | krule->listnr; |
@@ -343,6 +608,10 @@ static struct audit_rule_data *audit_krule_to_data(struct audit_krule *krule) | |||
343 | data->buflen += data->values[i] = | 608 | data->buflen += data->values[i] = |
344 | audit_pack_string(&bufp, f->se_str); | 609 | audit_pack_string(&bufp, f->se_str); |
345 | break; | 610 | break; |
611 | case AUDIT_WATCH: | ||
612 | data->buflen += data->values[i] = | ||
613 | audit_pack_string(&bufp, krule->watch->path); | ||
614 | break; | ||
346 | default: | 615 | default: |
347 | data->values[i] = f->val; | 616 | data->values[i] = f->val; |
348 | } | 617 | } |
@@ -378,6 +647,10 @@ static int audit_compare_rule(struct audit_krule *a, struct audit_krule *b) | |||
378 | if (strcmp(a->fields[i].se_str, b->fields[i].se_str)) | 647 | if (strcmp(a->fields[i].se_str, b->fields[i].se_str)) |
379 | return 1; | 648 | return 1; |
380 | break; | 649 | break; |
650 | case AUDIT_WATCH: | ||
651 | if (strcmp(a->watch->path, b->watch->path)) | ||
652 | return 1; | ||
653 | break; | ||
381 | default: | 654 | default: |
382 | if (a->fields[i].val != b->fields[i].val) | 655 | if (a->fields[i].val != b->fields[i].val) |
383 | return 1; | 656 | return 1; |
@@ -391,6 +664,32 @@ static int audit_compare_rule(struct audit_krule *a, struct audit_krule *b) | |||
391 | return 0; | 664 | return 0; |
392 | } | 665 | } |
393 | 666 | ||
667 | /* Duplicate the given audit watch. The new watch's rules list is initialized | ||
668 | * to an empty list and wlist is undefined. */ | ||
669 | static struct audit_watch *audit_dupe_watch(struct audit_watch *old) | ||
670 | { | ||
671 | char *path; | ||
672 | struct audit_watch *new; | ||
673 | |||
674 | path = kstrdup(old->path, GFP_KERNEL); | ||
675 | if (unlikely(!path)) | ||
676 | return ERR_PTR(-ENOMEM); | ||
677 | |||
678 | new = audit_init_watch(path); | ||
679 | if (unlikely(IS_ERR(new))) { | ||
680 | kfree(path); | ||
681 | goto out; | ||
682 | } | ||
683 | |||
684 | new->dev = old->dev; | ||
685 | new->ino = old->ino; | ||
686 | get_inotify_watch(&old->parent->wdata); | ||
687 | new->parent = old->parent; | ||
688 | |||
689 | out: | ||
690 | return new; | ||
691 | } | ||
692 | |||
394 | /* Duplicate selinux field information. The se_rule is opaque, so must be | 693 | /* Duplicate selinux field information. The se_rule is opaque, so must be |
395 | * re-initialized. */ | 694 | * re-initialized. */ |
396 | static inline int audit_dupe_selinux_field(struct audit_field *df, | 695 | static inline int audit_dupe_selinux_field(struct audit_field *df, |
@@ -422,8 +721,11 @@ static inline int audit_dupe_selinux_field(struct audit_field *df, | |||
422 | /* Duplicate an audit rule. This will be a deep copy with the exception | 721 | /* Duplicate an audit rule. This will be a deep copy with the exception |
423 | * of the watch - that pointer is carried over. The selinux specific fields | 722 | * of the watch - that pointer is carried over. The selinux specific fields |
424 | * will be updated in the copy. The point is to be able to replace the old | 723 | * will be updated in the copy. The point is to be able to replace the old |
425 | * rule with the new rule in the filterlist, then free the old rule. */ | 724 | * rule with the new rule in the filterlist, then free the old rule. |
426 | static struct audit_entry *audit_dupe_rule(struct audit_krule *old) | 725 | * The rlist element is undefined; list manipulations are handled apart from |
726 | * the initial copy. */ | ||
727 | static struct audit_entry *audit_dupe_rule(struct audit_krule *old, | ||
728 | struct audit_watch *watch) | ||
427 | { | 729 | { |
428 | u32 fcount = old->field_count; | 730 | u32 fcount = old->field_count; |
429 | struct audit_entry *entry; | 731 | struct audit_entry *entry; |
@@ -442,6 +744,8 @@ static struct audit_entry *audit_dupe_rule(struct audit_krule *old) | |||
442 | for (i = 0; i < AUDIT_BITMASK_SIZE; i++) | 744 | for (i = 0; i < AUDIT_BITMASK_SIZE; i++) |
443 | new->mask[i] = old->mask[i]; | 745 | new->mask[i] = old->mask[i]; |
444 | new->buflen = old->buflen; | 746 | new->buflen = old->buflen; |
747 | new->inode_f = old->inode_f; | ||
748 | new->watch = NULL; | ||
445 | new->field_count = old->field_count; | 749 | new->field_count = old->field_count; |
446 | memcpy(new->fields, old->fields, sizeof(struct audit_field) * fcount); | 750 | memcpy(new->fields, old->fields, sizeof(struct audit_field) * fcount); |
447 | 751 | ||
@@ -463,68 +767,409 @@ static struct audit_entry *audit_dupe_rule(struct audit_krule *old) | |||
463 | } | 767 | } |
464 | } | 768 | } |
465 | 769 | ||
770 | if (watch) { | ||
771 | audit_get_watch(watch); | ||
772 | new->watch = watch; | ||
773 | } | ||
774 | |||
466 | return entry; | 775 | return entry; |
467 | } | 776 | } |
468 | 777 | ||
469 | /* Add rule to given filterlist if not a duplicate. Protected by | 778 | /* Update inode info in audit rules based on filesystem event. */ |
470 | * audit_netlink_mutex. */ | 779 | static void audit_update_watch(struct audit_parent *parent, |
780 | const char *dname, dev_t dev, | ||
781 | unsigned long ino, unsigned invalidating) | ||
782 | { | ||
783 | struct audit_watch *owatch, *nwatch, *nextw; | ||
784 | struct audit_krule *r, *nextr; | ||
785 | struct audit_entry *oentry, *nentry; | ||
786 | struct audit_buffer *ab; | ||
787 | |||
788 | mutex_lock(&audit_filter_mutex); | ||
789 | list_for_each_entry_safe(owatch, nextw, &parent->watches, wlist) { | ||
790 | if (audit_compare_dname_path(dname, owatch->path, NULL)) | ||
791 | continue; | ||
792 | |||
793 | /* If the update involves invalidating rules, do the inode-based | ||
794 | * filtering now, so we don't omit records. */ | ||
795 | if (invalidating && | ||
796 | audit_filter_inodes(current, current->audit_context) == AUDIT_RECORD_CONTEXT) | ||
797 | audit_set_auditable(current->audit_context); | ||
798 | |||
799 | nwatch = audit_dupe_watch(owatch); | ||
800 | if (unlikely(IS_ERR(nwatch))) { | ||
801 | mutex_unlock(&audit_filter_mutex); | ||
802 | audit_panic("error updating watch, skipping"); | ||
803 | return; | ||
804 | } | ||
805 | nwatch->dev = dev; | ||
806 | nwatch->ino = ino; | ||
807 | |||
808 | list_for_each_entry_safe(r, nextr, &owatch->rules, rlist) { | ||
809 | |||
810 | oentry = container_of(r, struct audit_entry, rule); | ||
811 | list_del(&oentry->rule.rlist); | ||
812 | list_del_rcu(&oentry->list); | ||
813 | |||
814 | nentry = audit_dupe_rule(&oentry->rule, nwatch); | ||
815 | if (unlikely(IS_ERR(nentry))) | ||
816 | audit_panic("error updating watch, removing"); | ||
817 | else { | ||
818 | int h = audit_hash_ino((u32)ino); | ||
819 | list_add(&nentry->rule.rlist, &nwatch->rules); | ||
820 | list_add_rcu(&nentry->list, &audit_inode_hash[h]); | ||
821 | } | ||
822 | |||
823 | call_rcu(&oentry->rcu, audit_free_rule_rcu); | ||
824 | } | ||
825 | |||
826 | ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE); | ||
827 | audit_log_format(ab, "audit updated rules specifying watch="); | ||
828 | audit_log_untrustedstring(ab, owatch->path); | ||
829 | audit_log_format(ab, " with dev=%u ino=%lu\n", dev, ino); | ||
830 | audit_log_end(ab); | ||
831 | |||
832 | audit_remove_watch(owatch); | ||
833 | goto add_watch_to_parent; /* event applies to a single watch */ | ||
834 | } | ||
835 | mutex_unlock(&audit_filter_mutex); | ||
836 | return; | ||
837 | |||
838 | add_watch_to_parent: | ||
839 | list_add(&nwatch->wlist, &parent->watches); | ||
840 | mutex_unlock(&audit_filter_mutex); | ||
841 | return; | ||
842 | } | ||
843 | |||
844 | /* Remove all watches & rules associated with a parent that is going away. */ | ||
845 | static void audit_remove_parent_watches(struct audit_parent *parent) | ||
846 | { | ||
847 | struct audit_watch *w, *nextw; | ||
848 | struct audit_krule *r, *nextr; | ||
849 | struct audit_entry *e; | ||
850 | |||
851 | mutex_lock(&audit_filter_mutex); | ||
852 | parent->flags |= AUDIT_PARENT_INVALID; | ||
853 | list_for_each_entry_safe(w, nextw, &parent->watches, wlist) { | ||
854 | list_for_each_entry_safe(r, nextr, &w->rules, rlist) { | ||
855 | e = container_of(r, struct audit_entry, rule); | ||
856 | list_del(&r->rlist); | ||
857 | list_del_rcu(&e->list); | ||
858 | call_rcu(&e->rcu, audit_free_rule_rcu); | ||
859 | |||
860 | audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, | ||
861 | "audit implicitly removed rule from list=%d\n", | ||
862 | AUDIT_FILTER_EXIT); | ||
863 | } | ||
864 | audit_remove_watch(w); | ||
865 | } | ||
866 | mutex_unlock(&audit_filter_mutex); | ||
867 | } | ||
868 | |||
869 | /* Unregister inotify watches for parents on in_list. | ||
870 | * Generates an IN_IGNORED event. */ | ||
871 | static void audit_inotify_unregister(struct list_head *in_list) | ||
872 | { | ||
873 | struct audit_parent *p, *n; | ||
874 | |||
875 | list_for_each_entry_safe(p, n, in_list, ilist) { | ||
876 | list_del(&p->ilist); | ||
877 | inotify_rm_watch(audit_ih, &p->wdata); | ||
878 | /* the put matching the get in audit_do_del_rule() */ | ||
879 | put_inotify_watch(&p->wdata); | ||
880 | } | ||
881 | } | ||
882 | |||
883 | /* Find an existing audit rule. | ||
884 | * Caller must hold audit_filter_mutex to prevent stale rule data. */ | ||
885 | static struct audit_entry *audit_find_rule(struct audit_entry *entry, | ||
886 | struct list_head *list) | ||
887 | { | ||
888 | struct audit_entry *e, *found = NULL; | ||
889 | int h; | ||
890 | |||
891 | if (entry->rule.watch) { | ||
892 | /* we don't know the inode number, so must walk entire hash */ | ||
893 | for (h = 0; h < AUDIT_INODE_BUCKETS; h++) { | ||
894 | list = &audit_inode_hash[h]; | ||
895 | list_for_each_entry(e, list, list) | ||
896 | if (!audit_compare_rule(&entry->rule, &e->rule)) { | ||
897 | found = e; | ||
898 | goto out; | ||
899 | } | ||
900 | } | ||
901 | goto out; | ||
902 | } | ||
903 | |||
904 | list_for_each_entry(e, list, list) | ||
905 | if (!audit_compare_rule(&entry->rule, &e->rule)) { | ||
906 | found = e; | ||
907 | goto out; | ||
908 | } | ||
909 | |||
910 | out: | ||
911 | return found; | ||
912 | } | ||
913 | |||
914 | /* Get path information necessary for adding watches. */ | ||
915 | static int audit_get_nd(char *path, struct nameidata **ndp, | ||
916 | struct nameidata **ndw) | ||
917 | { | ||
918 | struct nameidata *ndparent, *ndwatch; | ||
919 | int err; | ||
920 | |||
921 | ndparent = kmalloc(sizeof(*ndparent), GFP_KERNEL); | ||
922 | if (unlikely(!ndparent)) | ||
923 | return -ENOMEM; | ||
924 | |||
925 | ndwatch = kmalloc(sizeof(*ndwatch), GFP_KERNEL); | ||
926 | if (unlikely(!ndwatch)) { | ||
927 | kfree(ndparent); | ||
928 | return -ENOMEM; | ||
929 | } | ||
930 | |||
931 | err = path_lookup(path, LOOKUP_PARENT, ndparent); | ||
932 | if (err) { | ||
933 | kfree(ndparent); | ||
934 | kfree(ndwatch); | ||
935 | return err; | ||
936 | } | ||
937 | |||
938 | err = path_lookup(path, 0, ndwatch); | ||
939 | if (err) { | ||
940 | kfree(ndwatch); | ||
941 | ndwatch = NULL; | ||
942 | } | ||
943 | |||
944 | *ndp = ndparent; | ||
945 | *ndw = ndwatch; | ||
946 | |||
947 | return 0; | ||
948 | } | ||
949 | |||
950 | /* Release resources used for watch path information. */ | ||
951 | static void audit_put_nd(struct nameidata *ndp, struct nameidata *ndw) | ||
952 | { | ||
953 | if (ndp) { | ||
954 | path_release(ndp); | ||
955 | kfree(ndp); | ||
956 | } | ||
957 | if (ndw) { | ||
958 | path_release(ndw); | ||
959 | kfree(ndw); | ||
960 | } | ||
961 | } | ||
962 | |||
963 | /* Associate the given rule with an existing parent inotify_watch. | ||
964 | * Caller must hold audit_filter_mutex. */ | ||
965 | static void audit_add_to_parent(struct audit_krule *krule, | ||
966 | struct audit_parent *parent) | ||
967 | { | ||
968 | struct audit_watch *w, *watch = krule->watch; | ||
969 | int watch_found = 0; | ||
970 | |||
971 | list_for_each_entry(w, &parent->watches, wlist) { | ||
972 | if (strcmp(watch->path, w->path)) | ||
973 | continue; | ||
974 | |||
975 | watch_found = 1; | ||
976 | |||
977 | /* put krule's and initial refs to temporary watch */ | ||
978 | audit_put_watch(watch); | ||
979 | audit_put_watch(watch); | ||
980 | |||
981 | audit_get_watch(w); | ||
982 | krule->watch = watch = w; | ||
983 | break; | ||
984 | } | ||
985 | |||
986 | if (!watch_found) { | ||
987 | get_inotify_watch(&parent->wdata); | ||
988 | watch->parent = parent; | ||
989 | |||
990 | list_add(&watch->wlist, &parent->watches); | ||
991 | } | ||
992 | list_add(&krule->rlist, &watch->rules); | ||
993 | } | ||
994 | |||
995 | /* Find a matching watch entry, or add this one. | ||
996 | * Caller must hold audit_filter_mutex. */ | ||
997 | static int audit_add_watch(struct audit_krule *krule, struct nameidata *ndp, | ||
998 | struct nameidata *ndw) | ||
999 | { | ||
1000 | struct audit_watch *watch = krule->watch; | ||
1001 | struct inotify_watch *i_watch; | ||
1002 | struct audit_parent *parent; | ||
1003 | int ret = 0; | ||
1004 | |||
1005 | /* update watch filter fields */ | ||
1006 | if (ndw) { | ||
1007 | watch->dev = ndw->dentry->d_inode->i_sb->s_dev; | ||
1008 | watch->ino = ndw->dentry->d_inode->i_ino; | ||
1009 | } | ||
1010 | |||
1011 | /* The audit_filter_mutex must not be held during inotify calls because | ||
1012 | * we hold it during inotify event callback processing. If an existing | ||
1013 | * inotify watch is found, inotify_find_watch() grabs a reference before | ||
1014 | * returning. | ||
1015 | */ | ||
1016 | mutex_unlock(&audit_filter_mutex); | ||
1017 | |||
1018 | if (inotify_find_watch(audit_ih, ndp->dentry->d_inode, &i_watch) < 0) { | ||
1019 | parent = audit_init_parent(ndp); | ||
1020 | if (IS_ERR(parent)) { | ||
1021 | /* caller expects mutex locked */ | ||
1022 | mutex_lock(&audit_filter_mutex); | ||
1023 | return PTR_ERR(parent); | ||
1024 | } | ||
1025 | } else | ||
1026 | parent = container_of(i_watch, struct audit_parent, wdata); | ||
1027 | |||
1028 | mutex_lock(&audit_filter_mutex); | ||
1029 | |||
1030 | /* parent was moved before we took audit_filter_mutex */ | ||
1031 | if (parent->flags & AUDIT_PARENT_INVALID) | ||
1032 | ret = -ENOENT; | ||
1033 | else | ||
1034 | audit_add_to_parent(krule, parent); | ||
1035 | |||
1036 | /* match get in audit_init_parent or inotify_find_watch */ | ||
1037 | put_inotify_watch(&parent->wdata); | ||
1038 | return ret; | ||
1039 | } | ||
1040 | |||
1041 | /* Add rule to given filterlist if not a duplicate. */ | ||
471 | static inline int audit_add_rule(struct audit_entry *entry, | 1042 | static inline int audit_add_rule(struct audit_entry *entry, |
472 | struct list_head *list) | 1043 | struct list_head *list) |
473 | { | 1044 | { |
474 | struct audit_entry *e; | 1045 | struct audit_entry *e; |
1046 | struct audit_field *inode_f = entry->rule.inode_f; | ||
1047 | struct audit_watch *watch = entry->rule.watch; | ||
1048 | struct nameidata *ndp, *ndw; | ||
1049 | int h, err, putnd_needed = 0; | ||
1050 | |||
1051 | if (inode_f) { | ||
1052 | h = audit_hash_ino(inode_f->val); | ||
1053 | list = &audit_inode_hash[h]; | ||
1054 | } | ||
475 | 1055 | ||
476 | /* Do not use the _rcu iterator here, since this is the only | 1056 | mutex_lock(&audit_filter_mutex); |
477 | * addition routine. */ | 1057 | e = audit_find_rule(entry, list); |
478 | list_for_each_entry(e, list, list) { | 1058 | mutex_unlock(&audit_filter_mutex); |
479 | if (!audit_compare_rule(&entry->rule, &e->rule)) | 1059 | if (e) { |
480 | return -EEXIST; | 1060 | err = -EEXIST; |
1061 | goto error; | ||
1062 | } | ||
1063 | |||
1064 | /* Avoid calling path_lookup under audit_filter_mutex. */ | ||
1065 | if (watch) { | ||
1066 | err = audit_get_nd(watch->path, &ndp, &ndw); | ||
1067 | if (err) | ||
1068 | goto error; | ||
1069 | putnd_needed = 1; | ||
1070 | } | ||
1071 | |||
1072 | mutex_lock(&audit_filter_mutex); | ||
1073 | if (watch) { | ||
1074 | /* audit_filter_mutex is dropped and re-taken during this call */ | ||
1075 | err = audit_add_watch(&entry->rule, ndp, ndw); | ||
1076 | if (err) { | ||
1077 | mutex_unlock(&audit_filter_mutex); | ||
1078 | goto error; | ||
1079 | } | ||
1080 | h = audit_hash_ino((u32)watch->ino); | ||
1081 | list = &audit_inode_hash[h]; | ||
481 | } | 1082 | } |
482 | 1083 | ||
483 | if (entry->rule.flags & AUDIT_FILTER_PREPEND) { | 1084 | if (entry->rule.flags & AUDIT_FILTER_PREPEND) { |
484 | list_add_rcu(&entry->list, list); | 1085 | list_add_rcu(&entry->list, list); |
1086 | entry->rule.flags &= ~AUDIT_FILTER_PREPEND; | ||
485 | } else { | 1087 | } else { |
486 | list_add_tail_rcu(&entry->list, list); | 1088 | list_add_tail_rcu(&entry->list, list); |
487 | } | 1089 | } |
1090 | mutex_unlock(&audit_filter_mutex); | ||
488 | 1091 | ||
489 | return 0; | 1092 | if (putnd_needed) |
1093 | audit_put_nd(ndp, ndw); | ||
1094 | |||
1095 | return 0; | ||
1096 | |||
1097 | error: | ||
1098 | if (putnd_needed) | ||
1099 | audit_put_nd(ndp, ndw); | ||
1100 | if (watch) | ||
1101 | audit_put_watch(watch); /* tmp watch, matches initial get */ | ||
1102 | return err; | ||
490 | } | 1103 | } |
491 | 1104 | ||
492 | /* Remove an existing rule from filterlist. Protected by | 1105 | /* Remove an existing rule from filterlist. */ |
493 | * audit_netlink_mutex. */ | ||
494 | static inline int audit_del_rule(struct audit_entry *entry, | 1106 | static inline int audit_del_rule(struct audit_entry *entry, |
495 | struct list_head *list) | 1107 | struct list_head *list) |
496 | { | 1108 | { |
497 | struct audit_entry *e; | 1109 | struct audit_entry *e; |
1110 | struct audit_field *inode_f = entry->rule.inode_f; | ||
1111 | struct audit_watch *watch, *tmp_watch = entry->rule.watch; | ||
1112 | LIST_HEAD(inotify_list); | ||
1113 | int h, ret = 0; | ||
1114 | |||
1115 | if (inode_f) { | ||
1116 | h = audit_hash_ino(inode_f->val); | ||
1117 | list = &audit_inode_hash[h]; | ||
1118 | } | ||
498 | 1119 | ||
499 | /* Do not use the _rcu iterator here, since this is the only | 1120 | mutex_lock(&audit_filter_mutex); |
500 | * deletion routine. */ | 1121 | e = audit_find_rule(entry, list); |
501 | list_for_each_entry(e, list, list) { | 1122 | if (!e) { |
502 | if (!audit_compare_rule(&entry->rule, &e->rule)) { | 1123 | mutex_unlock(&audit_filter_mutex); |
503 | list_del_rcu(&e->list); | 1124 | ret = -ENOENT; |
504 | call_rcu(&e->rcu, audit_free_rule_rcu); | 1125 | goto out; |
505 | return 0; | 1126 | } |
1127 | |||
1128 | watch = e->rule.watch; | ||
1129 | if (watch) { | ||
1130 | struct audit_parent *parent = watch->parent; | ||
1131 | |||
1132 | list_del(&e->rule.rlist); | ||
1133 | |||
1134 | if (list_empty(&watch->rules)) { | ||
1135 | audit_remove_watch(watch); | ||
1136 | |||
1137 | if (list_empty(&parent->watches)) { | ||
1138 | /* Put parent on the inotify un-registration | ||
1139 | * list. Grab a reference before releasing | ||
1140 | * audit_filter_mutex, to be released in | ||
1141 | * audit_inotify_unregister(). */ | ||
1142 | list_add(&parent->ilist, &inotify_list); | ||
1143 | get_inotify_watch(&parent->wdata); | ||
1144 | } | ||
506 | } | 1145 | } |
507 | } | 1146 | } |
508 | return -ENOENT; /* No matching rule */ | 1147 | |
1148 | list_del_rcu(&e->list); | ||
1149 | call_rcu(&e->rcu, audit_free_rule_rcu); | ||
1150 | |||
1151 | mutex_unlock(&audit_filter_mutex); | ||
1152 | |||
1153 | if (!list_empty(&inotify_list)) | ||
1154 | audit_inotify_unregister(&inotify_list); | ||
1155 | |||
1156 | out: | ||
1157 | if (tmp_watch) | ||
1158 | audit_put_watch(tmp_watch); /* match initial get */ | ||
1159 | |||
1160 | return ret; | ||
509 | } | 1161 | } |
510 | 1162 | ||
511 | /* List rules using struct audit_rule. Exists for backward | 1163 | /* List rules using struct audit_rule. Exists for backward |
512 | * compatibility with userspace. */ | 1164 | * compatibility with userspace. */ |
513 | static int audit_list(void *_dest) | 1165 | static void audit_list(int pid, int seq, struct sk_buff_head *q) |
514 | { | 1166 | { |
515 | int pid, seq; | 1167 | struct sk_buff *skb; |
516 | int *dest = _dest; | ||
517 | struct audit_entry *entry; | 1168 | struct audit_entry *entry; |
518 | int i; | 1169 | int i; |
519 | 1170 | ||
520 | pid = dest[0]; | 1171 | /* This is a blocking read, so use audit_filter_mutex instead of rcu |
521 | seq = dest[1]; | 1172 | * iterator to sync with list writers. */ |
522 | kfree(dest); | ||
523 | |||
524 | mutex_lock(&audit_netlink_mutex); | ||
525 | |||
526 | /* The *_rcu iterators not needed here because we are | ||
527 | always called with audit_netlink_mutex held. */ | ||
528 | for (i=0; i<AUDIT_NR_FILTERS; i++) { | 1173 | for (i=0; i<AUDIT_NR_FILTERS; i++) { |
529 | list_for_each_entry(entry, &audit_filter_list[i], list) { | 1174 | list_for_each_entry(entry, &audit_filter_list[i], list) { |
530 | struct audit_rule *rule; | 1175 | struct audit_rule *rule; |
@@ -532,33 +1177,41 @@ static int audit_list(void *_dest) | |||
532 | rule = audit_krule_to_rule(&entry->rule); | 1177 | rule = audit_krule_to_rule(&entry->rule); |
533 | if (unlikely(!rule)) | 1178 | if (unlikely(!rule)) |
534 | break; | 1179 | break; |
535 | audit_send_reply(pid, seq, AUDIT_LIST, 0, 1, | 1180 | skb = audit_make_reply(pid, seq, AUDIT_LIST, 0, 1, |
536 | rule, sizeof(*rule)); | 1181 | rule, sizeof(*rule)); |
1182 | if (skb) | ||
1183 | skb_queue_tail(q, skb); | ||
537 | kfree(rule); | 1184 | kfree(rule); |
538 | } | 1185 | } |
539 | } | 1186 | } |
540 | audit_send_reply(pid, seq, AUDIT_LIST, 1, 1, NULL, 0); | 1187 | for (i = 0; i < AUDIT_INODE_BUCKETS; i++) { |
541 | 1188 | list_for_each_entry(entry, &audit_inode_hash[i], list) { | |
542 | mutex_unlock(&audit_netlink_mutex); | 1189 | struct audit_rule *rule; |
543 | return 0; | 1190 | |
1191 | rule = audit_krule_to_rule(&entry->rule); | ||
1192 | if (unlikely(!rule)) | ||
1193 | break; | ||
1194 | skb = audit_make_reply(pid, seq, AUDIT_LIST, 0, 1, | ||
1195 | rule, sizeof(*rule)); | ||
1196 | if (skb) | ||
1197 | skb_queue_tail(q, skb); | ||
1198 | kfree(rule); | ||
1199 | } | ||
1200 | } | ||
1201 | skb = audit_make_reply(pid, seq, AUDIT_LIST, 1, 1, NULL, 0); | ||
1202 | if (skb) | ||
1203 | skb_queue_tail(q, skb); | ||
544 | } | 1204 | } |
545 | 1205 | ||
546 | /* List rules using struct audit_rule_data. */ | 1206 | /* List rules using struct audit_rule_data. */ |
547 | static int audit_list_rules(void *_dest) | 1207 | static void audit_list_rules(int pid, int seq, struct sk_buff_head *q) |
548 | { | 1208 | { |
549 | int pid, seq; | 1209 | struct sk_buff *skb; |
550 | int *dest = _dest; | ||
551 | struct audit_entry *e; | 1210 | struct audit_entry *e; |
552 | int i; | 1211 | int i; |
553 | 1212 | ||
554 | pid = dest[0]; | 1213 | /* This is a blocking read, so use audit_filter_mutex instead of rcu |
555 | seq = dest[1]; | 1214 | * iterator to sync with list writers. */ |
556 | kfree(dest); | ||
557 | |||
558 | mutex_lock(&audit_netlink_mutex); | ||
559 | |||
560 | /* The *_rcu iterators not needed here because we are | ||
561 | always called with audit_netlink_mutex held. */ | ||
562 | for (i=0; i<AUDIT_NR_FILTERS; i++) { | 1215 | for (i=0; i<AUDIT_NR_FILTERS; i++) { |
563 | list_for_each_entry(e, &audit_filter_list[i], list) { | 1216 | list_for_each_entry(e, &audit_filter_list[i], list) { |
564 | struct audit_rule_data *data; | 1217 | struct audit_rule_data *data; |
@@ -566,15 +1219,30 @@ static int audit_list_rules(void *_dest) | |||
566 | data = audit_krule_to_data(&e->rule); | 1219 | data = audit_krule_to_data(&e->rule); |
567 | if (unlikely(!data)) | 1220 | if (unlikely(!data)) |
568 | break; | 1221 | break; |
569 | audit_send_reply(pid, seq, AUDIT_LIST_RULES, 0, 1, | 1222 | skb = audit_make_reply(pid, seq, AUDIT_LIST_RULES, 0, 1, |
570 | data, sizeof(*data)); | 1223 | data, sizeof(*data) + data->buflen); |
1224 | if (skb) | ||
1225 | skb_queue_tail(q, skb); | ||
571 | kfree(data); | 1226 | kfree(data); |
572 | } | 1227 | } |
573 | } | 1228 | } |
574 | audit_send_reply(pid, seq, AUDIT_LIST_RULES, 1, 1, NULL, 0); | 1229 | for (i=0; i< AUDIT_INODE_BUCKETS; i++) { |
1230 | list_for_each_entry(e, &audit_inode_hash[i], list) { | ||
1231 | struct audit_rule_data *data; | ||
575 | 1232 | ||
576 | mutex_unlock(&audit_netlink_mutex); | 1233 | data = audit_krule_to_data(&e->rule); |
577 | return 0; | 1234 | if (unlikely(!data)) |
1235 | break; | ||
1236 | skb = audit_make_reply(pid, seq, AUDIT_LIST_RULES, 0, 1, | ||
1237 | data, sizeof(*data) + data->buflen); | ||
1238 | if (skb) | ||
1239 | skb_queue_tail(q, skb); | ||
1240 | kfree(data); | ||
1241 | } | ||
1242 | } | ||
1243 | skb = audit_make_reply(pid, seq, AUDIT_LIST_RULES, 1, 1, NULL, 0); | ||
1244 | if (skb) | ||
1245 | skb_queue_tail(q, skb); | ||
578 | } | 1246 | } |
579 | 1247 | ||
580 | /** | 1248 | /** |
@@ -592,7 +1260,7 @@ int audit_receive_filter(int type, int pid, int uid, int seq, void *data, | |||
592 | size_t datasz, uid_t loginuid, u32 sid) | 1260 | size_t datasz, uid_t loginuid, u32 sid) |
593 | { | 1261 | { |
594 | struct task_struct *tsk; | 1262 | struct task_struct *tsk; |
595 | int *dest; | 1263 | struct audit_netlink_list *dest; |
596 | int err = 0; | 1264 | int err = 0; |
597 | struct audit_entry *entry; | 1265 | struct audit_entry *entry; |
598 | 1266 | ||
@@ -605,18 +1273,22 @@ int audit_receive_filter(int type, int pid, int uid, int seq, void *data, | |||
605 | * happen if we're actually running in the context of auditctl | 1273 | * happen if we're actually running in the context of auditctl |
606 | * trying to _send_ the stuff */ | 1274 | * trying to _send_ the stuff */ |
607 | 1275 | ||
608 | dest = kmalloc(2 * sizeof(int), GFP_KERNEL); | 1276 | dest = kmalloc(sizeof(struct audit_netlink_list), GFP_KERNEL); |
609 | if (!dest) | 1277 | if (!dest) |
610 | return -ENOMEM; | 1278 | return -ENOMEM; |
611 | dest[0] = pid; | 1279 | dest->pid = pid; |
612 | dest[1] = seq; | 1280 | skb_queue_head_init(&dest->q); |
613 | 1281 | ||
1282 | mutex_lock(&audit_filter_mutex); | ||
614 | if (type == AUDIT_LIST) | 1283 | if (type == AUDIT_LIST) |
615 | tsk = kthread_run(audit_list, dest, "audit_list"); | 1284 | audit_list(pid, seq, &dest->q); |
616 | else | 1285 | else |
617 | tsk = kthread_run(audit_list_rules, dest, | 1286 | audit_list_rules(pid, seq, &dest->q); |
618 | "audit_list_rules"); | 1287 | mutex_unlock(&audit_filter_mutex); |
1288 | |||
1289 | tsk = kthread_run(audit_send_list, dest, "audit_send_list"); | ||
619 | if (IS_ERR(tsk)) { | 1290 | if (IS_ERR(tsk)) { |
1291 | skb_queue_purge(&dest->q); | ||
620 | kfree(dest); | 1292 | kfree(dest); |
621 | err = PTR_ERR(tsk); | 1293 | err = PTR_ERR(tsk); |
622 | } | 1294 | } |
@@ -632,6 +1304,7 @@ int audit_receive_filter(int type, int pid, int uid, int seq, void *data, | |||
632 | 1304 | ||
633 | err = audit_add_rule(entry, | 1305 | err = audit_add_rule(entry, |
634 | &audit_filter_list[entry->rule.listnr]); | 1306 | &audit_filter_list[entry->rule.listnr]); |
1307 | |||
635 | if (sid) { | 1308 | if (sid) { |
636 | char *ctx = NULL; | 1309 | char *ctx = NULL; |
637 | u32 len; | 1310 | u32 len; |
@@ -712,7 +1385,43 @@ int audit_comparator(const u32 left, const u32 op, const u32 right) | |||
712 | return 0; | 1385 | return 0; |
713 | } | 1386 | } |
714 | 1387 | ||
1388 | /* Compare given dentry name with last component in given path, | ||
1389 | * return of 0 indicates a match. */ | ||
1390 | int audit_compare_dname_path(const char *dname, const char *path, | ||
1391 | int *dirlen) | ||
1392 | { | ||
1393 | int dlen, plen; | ||
1394 | const char *p; | ||
715 | 1395 | ||
1396 | if (!dname || !path) | ||
1397 | return 1; | ||
1398 | |||
1399 | dlen = strlen(dname); | ||
1400 | plen = strlen(path); | ||
1401 | if (plen < dlen) | ||
1402 | return 1; | ||
1403 | |||
1404 | /* disregard trailing slashes */ | ||
1405 | p = path + plen - 1; | ||
1406 | while ((*p == '/') && (p > path)) | ||
1407 | p--; | ||
1408 | |||
1409 | /* find last path component */ | ||
1410 | p = p - dlen + 1; | ||
1411 | if (p < path) | ||
1412 | return 1; | ||
1413 | else if (p > path) { | ||
1414 | if (*--p != '/') | ||
1415 | return 1; | ||
1416 | else | ||
1417 | p++; | ||
1418 | } | ||
1419 | |||
1420 | /* return length of path's directory component */ | ||
1421 | if (dirlen) | ||
1422 | *dirlen = p - path; | ||
1423 | return strncmp(p, dname, dlen); | ||
1424 | } | ||
716 | 1425 | ||
717 | static int audit_filter_user_rules(struct netlink_skb_parms *cb, | 1426 | static int audit_filter_user_rules(struct netlink_skb_parms *cb, |
718 | struct audit_krule *rule, | 1427 | struct audit_krule *rule, |
@@ -744,7 +1453,6 @@ static int audit_filter_user_rules(struct netlink_skb_parms *cb, | |||
744 | } | 1453 | } |
745 | switch (rule->action) { | 1454 | switch (rule->action) { |
746 | case AUDIT_NEVER: *state = AUDIT_DISABLED; break; | 1455 | case AUDIT_NEVER: *state = AUDIT_DISABLED; break; |
747 | case AUDIT_POSSIBLE: *state = AUDIT_BUILD_CONTEXT; break; | ||
748 | case AUDIT_ALWAYS: *state = AUDIT_RECORD_CONTEXT; break; | 1456 | case AUDIT_ALWAYS: *state = AUDIT_RECORD_CONTEXT; break; |
749 | } | 1457 | } |
750 | return 1; | 1458 | return 1; |
@@ -826,32 +1534,65 @@ static inline int audit_rule_has_selinux(struct audit_krule *rule) | |||
826 | int selinux_audit_rule_update(void) | 1534 | int selinux_audit_rule_update(void) |
827 | { | 1535 | { |
828 | struct audit_entry *entry, *n, *nentry; | 1536 | struct audit_entry *entry, *n, *nentry; |
1537 | struct audit_watch *watch; | ||
829 | int i, err = 0; | 1538 | int i, err = 0; |
830 | 1539 | ||
831 | /* audit_netlink_mutex synchronizes the writers */ | 1540 | /* audit_filter_mutex synchronizes the writers */ |
832 | mutex_lock(&audit_netlink_mutex); | 1541 | mutex_lock(&audit_filter_mutex); |
833 | 1542 | ||
834 | for (i = 0; i < AUDIT_NR_FILTERS; i++) { | 1543 | for (i = 0; i < AUDIT_NR_FILTERS; i++) { |
835 | list_for_each_entry_safe(entry, n, &audit_filter_list[i], list) { | 1544 | list_for_each_entry_safe(entry, n, &audit_filter_list[i], list) { |
836 | if (!audit_rule_has_selinux(&entry->rule)) | 1545 | if (!audit_rule_has_selinux(&entry->rule)) |
837 | continue; | 1546 | continue; |
838 | 1547 | ||
839 | nentry = audit_dupe_rule(&entry->rule); | 1548 | watch = entry->rule.watch; |
1549 | nentry = audit_dupe_rule(&entry->rule, watch); | ||
840 | if (unlikely(IS_ERR(nentry))) { | 1550 | if (unlikely(IS_ERR(nentry))) { |
841 | /* save the first error encountered for the | 1551 | /* save the first error encountered for the |
842 | * return value */ | 1552 | * return value */ |
843 | if (!err) | 1553 | if (!err) |
844 | err = PTR_ERR(nentry); | 1554 | err = PTR_ERR(nentry); |
845 | audit_panic("error updating selinux filters"); | 1555 | audit_panic("error updating selinux filters"); |
1556 | if (watch) | ||
1557 | list_del(&entry->rule.rlist); | ||
846 | list_del_rcu(&entry->list); | 1558 | list_del_rcu(&entry->list); |
847 | } else { | 1559 | } else { |
1560 | if (watch) { | ||
1561 | list_add(&nentry->rule.rlist, | ||
1562 | &watch->rules); | ||
1563 | list_del(&entry->rule.rlist); | ||
1564 | } | ||
848 | list_replace_rcu(&entry->list, &nentry->list); | 1565 | list_replace_rcu(&entry->list, &nentry->list); |
849 | } | 1566 | } |
850 | call_rcu(&entry->rcu, audit_free_rule_rcu); | 1567 | call_rcu(&entry->rcu, audit_free_rule_rcu); |
851 | } | 1568 | } |
852 | } | 1569 | } |
853 | 1570 | ||
854 | mutex_unlock(&audit_netlink_mutex); | 1571 | mutex_unlock(&audit_filter_mutex); |
855 | 1572 | ||
856 | return err; | 1573 | return err; |
857 | } | 1574 | } |
1575 | |||
1576 | /* Update watch data in audit rules based on inotify events. */ | ||
1577 | void audit_handle_ievent(struct inotify_watch *i_watch, u32 wd, u32 mask, | ||
1578 | u32 cookie, const char *dname, struct inode *inode) | ||
1579 | { | ||
1580 | struct audit_parent *parent; | ||
1581 | |||
1582 | parent = container_of(i_watch, struct audit_parent, wdata); | ||
1583 | |||
1584 | if (mask & (IN_CREATE|IN_MOVED_TO) && inode) | ||
1585 | audit_update_watch(parent, dname, inode->i_sb->s_dev, | ||
1586 | inode->i_ino, 0); | ||
1587 | else if (mask & (IN_DELETE|IN_MOVED_FROM)) | ||
1588 | audit_update_watch(parent, dname, (dev_t)-1, (unsigned long)-1, 1); | ||
1589 | /* inotify automatically removes the watch and sends IN_IGNORED */ | ||
1590 | else if (mask & (IN_DELETE_SELF|IN_UNMOUNT)) | ||
1591 | audit_remove_parent_watches(parent); | ||
1592 | /* inotify does not remove the watch, so remove it manually */ | ||
1593 | else if(mask & IN_MOVE_SELF) { | ||
1594 | audit_remove_parent_watches(parent); | ||
1595 | inotify_remove_watch_locked(audit_ih, i_watch); | ||
1596 | } else if (mask & IN_IGNORED) | ||
1597 | put_inotify_watch(i_watch); | ||
1598 | } | ||
diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 1c03a4ed1b..9ebd96fda2 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c | |||
@@ -3,7 +3,7 @@ | |||
3 | * | 3 | * |
4 | * Copyright 2003-2004 Red Hat Inc., Durham, North Carolina. | 4 | * Copyright 2003-2004 Red Hat Inc., Durham, North Carolina. |
5 | * Copyright 2005 Hewlett-Packard Development Company, L.P. | 5 | * Copyright 2005 Hewlett-Packard Development Company, L.P. |
6 | * Copyright (C) 2005 IBM Corporation | 6 | * Copyright (C) 2005, 2006 IBM Corporation |
7 | * All Rights Reserved. | 7 | * All Rights Reserved. |
8 | * | 8 | * |
9 | * This program is free software; you can redistribute it and/or modify | 9 | * This program is free software; you can redistribute it and/or modify |
@@ -29,6 +29,9 @@ | |||
29 | * this file -- see entry.S) is based on a GPL'd patch written by | 29 | * this file -- see entry.S) is based on a GPL'd patch written by |
30 | * okir@suse.de and Copyright 2003 SuSE Linux AG. | 30 | * okir@suse.de and Copyright 2003 SuSE Linux AG. |
31 | * | 31 | * |
32 | * POSIX message queue support added by George Wilson <ltcgcw@us.ibm.com>, | ||
33 | * 2006. | ||
34 | * | ||
32 | * The support of additional filter rules compares (>, <, >=, <=) was | 35 | * The support of additional filter rules compares (>, <, >=, <=) was |
33 | * added by Dustin Kirkland <dustin.kirkland@us.ibm.com>, 2005. | 36 | * added by Dustin Kirkland <dustin.kirkland@us.ibm.com>, 2005. |
34 | * | 37 | * |
@@ -49,6 +52,7 @@ | |||
49 | #include <linux/module.h> | 52 | #include <linux/module.h> |
50 | #include <linux/mount.h> | 53 | #include <linux/mount.h> |
51 | #include <linux/socket.h> | 54 | #include <linux/socket.h> |
55 | #include <linux/mqueue.h> | ||
52 | #include <linux/audit.h> | 56 | #include <linux/audit.h> |
53 | #include <linux/personality.h> | 57 | #include <linux/personality.h> |
54 | #include <linux/time.h> | 58 | #include <linux/time.h> |
@@ -59,6 +63,8 @@ | |||
59 | #include <linux/list.h> | 63 | #include <linux/list.h> |
60 | #include <linux/tty.h> | 64 | #include <linux/tty.h> |
61 | #include <linux/selinux.h> | 65 | #include <linux/selinux.h> |
66 | #include <linux/binfmts.h> | ||
67 | #include <linux/syscalls.h> | ||
62 | 68 | ||
63 | #include "audit.h" | 69 | #include "audit.h" |
64 | 70 | ||
@@ -76,6 +82,9 @@ extern int audit_enabled; | |||
76 | * path_lookup. */ | 82 | * path_lookup. */ |
77 | #define AUDIT_NAMES_RESERVED 7 | 83 | #define AUDIT_NAMES_RESERVED 7 |
78 | 84 | ||
85 | /* Indicates that audit should log the full pathname. */ | ||
86 | #define AUDIT_NAME_FULL -1 | ||
87 | |||
79 | /* When fs/namei.c:getname() is called, we store the pointer in name and | 88 | /* When fs/namei.c:getname() is called, we store the pointer in name and |
80 | * we don't let putname() free it (instead we free all of the saved | 89 | * we don't let putname() free it (instead we free all of the saved |
81 | * pointers at syscall exit time). | 90 | * pointers at syscall exit time). |
@@ -83,8 +92,9 @@ extern int audit_enabled; | |||
83 | * Further, in fs/namei.c:path_lookup() we store the inode and device. */ | 92 | * Further, in fs/namei.c:path_lookup() we store the inode and device. */ |
84 | struct audit_names { | 93 | struct audit_names { |
85 | const char *name; | 94 | const char *name; |
95 | int name_len; /* number of name's characters to log */ | ||
96 | unsigned name_put; /* call __putname() for this name */ | ||
86 | unsigned long ino; | 97 | unsigned long ino; |
87 | unsigned long pino; | ||
88 | dev_t dev; | 98 | dev_t dev; |
89 | umode_t mode; | 99 | umode_t mode; |
90 | uid_t uid; | 100 | uid_t uid; |
@@ -100,6 +110,33 @@ struct audit_aux_data { | |||
100 | 110 | ||
101 | #define AUDIT_AUX_IPCPERM 0 | 111 | #define AUDIT_AUX_IPCPERM 0 |
102 | 112 | ||
113 | struct audit_aux_data_mq_open { | ||
114 | struct audit_aux_data d; | ||
115 | int oflag; | ||
116 | mode_t mode; | ||
117 | struct mq_attr attr; | ||
118 | }; | ||
119 | |||
120 | struct audit_aux_data_mq_sendrecv { | ||
121 | struct audit_aux_data d; | ||
122 | mqd_t mqdes; | ||
123 | size_t msg_len; | ||
124 | unsigned int msg_prio; | ||
125 | struct timespec abs_timeout; | ||
126 | }; | ||
127 | |||
128 | struct audit_aux_data_mq_notify { | ||
129 | struct audit_aux_data d; | ||
130 | mqd_t mqdes; | ||
131 | struct sigevent notification; | ||
132 | }; | ||
133 | |||
134 | struct audit_aux_data_mq_getsetattr { | ||
135 | struct audit_aux_data d; | ||
136 | mqd_t mqdes; | ||
137 | struct mq_attr mqstat; | ||
138 | }; | ||
139 | |||
103 | struct audit_aux_data_ipcctl { | 140 | struct audit_aux_data_ipcctl { |
104 | struct audit_aux_data d; | 141 | struct audit_aux_data d; |
105 | struct ipc_perm p; | 142 | struct ipc_perm p; |
@@ -110,6 +147,13 @@ struct audit_aux_data_ipcctl { | |||
110 | u32 osid; | 147 | u32 osid; |
111 | }; | 148 | }; |
112 | 149 | ||
150 | struct audit_aux_data_execve { | ||
151 | struct audit_aux_data d; | ||
152 | int argc; | ||
153 | int envc; | ||
154 | char mem[0]; | ||
155 | }; | ||
156 | |||
113 | struct audit_aux_data_socketcall { | 157 | struct audit_aux_data_socketcall { |
114 | struct audit_aux_data d; | 158 | struct audit_aux_data d; |
115 | int nargs; | 159 | int nargs; |
@@ -148,7 +192,7 @@ struct audit_context { | |||
148 | struct audit_aux_data *aux; | 192 | struct audit_aux_data *aux; |
149 | 193 | ||
150 | /* Save things to print about task_struct */ | 194 | /* Save things to print about task_struct */ |
151 | pid_t pid; | 195 | pid_t pid, ppid; |
152 | uid_t uid, euid, suid, fsuid; | 196 | uid_t uid, euid, suid, fsuid; |
153 | gid_t gid, egid, sgid, fsgid; | 197 | gid_t gid, egid, sgid, fsgid; |
154 | unsigned long personality; | 198 | unsigned long personality; |
@@ -160,12 +204,13 @@ struct audit_context { | |||
160 | #endif | 204 | #endif |
161 | }; | 205 | }; |
162 | 206 | ||
163 | 207 | /* Determine if any context name data matches a rule's watch data */ | |
164 | /* Compare a task_struct with an audit_rule. Return 1 on match, 0 | 208 | /* Compare a task_struct with an audit_rule. Return 1 on match, 0 |
165 | * otherwise. */ | 209 | * otherwise. */ |
166 | static int audit_filter_rules(struct task_struct *tsk, | 210 | static int audit_filter_rules(struct task_struct *tsk, |
167 | struct audit_krule *rule, | 211 | struct audit_krule *rule, |
168 | struct audit_context *ctx, | 212 | struct audit_context *ctx, |
213 | struct audit_names *name, | ||
169 | enum audit_state *state) | 214 | enum audit_state *state) |
170 | { | 215 | { |
171 | int i, j, need_sid = 1; | 216 | int i, j, need_sid = 1; |
@@ -179,6 +224,10 @@ static int audit_filter_rules(struct task_struct *tsk, | |||
179 | case AUDIT_PID: | 224 | case AUDIT_PID: |
180 | result = audit_comparator(tsk->pid, f->op, f->val); | 225 | result = audit_comparator(tsk->pid, f->op, f->val); |
181 | break; | 226 | break; |
227 | case AUDIT_PPID: | ||
228 | if (ctx) | ||
229 | result = audit_comparator(ctx->ppid, f->op, f->val); | ||
230 | break; | ||
182 | case AUDIT_UID: | 231 | case AUDIT_UID: |
183 | result = audit_comparator(tsk->uid, f->op, f->val); | 232 | result = audit_comparator(tsk->uid, f->op, f->val); |
184 | break; | 233 | break; |
@@ -224,7 +273,10 @@ static int audit_filter_rules(struct task_struct *tsk, | |||
224 | } | 273 | } |
225 | break; | 274 | break; |
226 | case AUDIT_DEVMAJOR: | 275 | case AUDIT_DEVMAJOR: |
227 | if (ctx) { | 276 | if (name) |
277 | result = audit_comparator(MAJOR(name->dev), | ||
278 | f->op, f->val); | ||
279 | else if (ctx) { | ||
228 | for (j = 0; j < ctx->name_count; j++) { | 280 | for (j = 0; j < ctx->name_count; j++) { |
229 | if (audit_comparator(MAJOR(ctx->names[j].dev), f->op, f->val)) { | 281 | if (audit_comparator(MAJOR(ctx->names[j].dev), f->op, f->val)) { |
230 | ++result; | 282 | ++result; |
@@ -234,7 +286,10 @@ static int audit_filter_rules(struct task_struct *tsk, | |||
234 | } | 286 | } |
235 | break; | 287 | break; |
236 | case AUDIT_DEVMINOR: | 288 | case AUDIT_DEVMINOR: |
237 | if (ctx) { | 289 | if (name) |
290 | result = audit_comparator(MINOR(name->dev), | ||
291 | f->op, f->val); | ||
292 | else if (ctx) { | ||
238 | for (j = 0; j < ctx->name_count; j++) { | 293 | for (j = 0; j < ctx->name_count; j++) { |
239 | if (audit_comparator(MINOR(ctx->names[j].dev), f->op, f->val)) { | 294 | if (audit_comparator(MINOR(ctx->names[j].dev), f->op, f->val)) { |
240 | ++result; | 295 | ++result; |
@@ -244,16 +299,22 @@ static int audit_filter_rules(struct task_struct *tsk, | |||
244 | } | 299 | } |
245 | break; | 300 | break; |
246 | case AUDIT_INODE: | 301 | case AUDIT_INODE: |
247 | if (ctx) { | 302 | if (name) |
303 | result = (name->ino == f->val); | ||
304 | else if (ctx) { | ||
248 | for (j = 0; j < ctx->name_count; j++) { | 305 | for (j = 0; j < ctx->name_count; j++) { |
249 | if (audit_comparator(ctx->names[j].ino, f->op, f->val) || | 306 | if (audit_comparator(ctx->names[j].ino, f->op, f->val)) { |
250 | audit_comparator(ctx->names[j].pino, f->op, f->val)) { | ||
251 | ++result; | 307 | ++result; |
252 | break; | 308 | break; |
253 | } | 309 | } |
254 | } | 310 | } |
255 | } | 311 | } |
256 | break; | 312 | break; |
313 | case AUDIT_WATCH: | ||
314 | if (name && rule->watch->ino != (unsigned long)-1) | ||
315 | result = (name->dev == rule->watch->dev && | ||
316 | name->ino == rule->watch->ino); | ||
317 | break; | ||
257 | case AUDIT_LOGINUID: | 318 | case AUDIT_LOGINUID: |
258 | result = 0; | 319 | result = 0; |
259 | if (ctx) | 320 | if (ctx) |
@@ -294,7 +355,6 @@ static int audit_filter_rules(struct task_struct *tsk, | |||
294 | } | 355 | } |
295 | switch (rule->action) { | 356 | switch (rule->action) { |
296 | case AUDIT_NEVER: *state = AUDIT_DISABLED; break; | 357 | case AUDIT_NEVER: *state = AUDIT_DISABLED; break; |
297 | case AUDIT_POSSIBLE: *state = AUDIT_BUILD_CONTEXT; break; | ||
298 | case AUDIT_ALWAYS: *state = AUDIT_RECORD_CONTEXT; break; | 358 | case AUDIT_ALWAYS: *state = AUDIT_RECORD_CONTEXT; break; |
299 | } | 359 | } |
300 | return 1; | 360 | return 1; |
@@ -311,7 +371,7 @@ static enum audit_state audit_filter_task(struct task_struct *tsk) | |||
311 | 371 | ||
312 | rcu_read_lock(); | 372 | rcu_read_lock(); |
313 | list_for_each_entry_rcu(e, &audit_filter_list[AUDIT_FILTER_TASK], list) { | 373 | list_for_each_entry_rcu(e, &audit_filter_list[AUDIT_FILTER_TASK], list) { |
314 | if (audit_filter_rules(tsk, &e->rule, NULL, &state)) { | 374 | if (audit_filter_rules(tsk, &e->rule, NULL, NULL, &state)) { |
315 | rcu_read_unlock(); | 375 | rcu_read_unlock(); |
316 | return state; | 376 | return state; |
317 | } | 377 | } |
@@ -341,8 +401,47 @@ static enum audit_state audit_filter_syscall(struct task_struct *tsk, | |||
341 | int bit = AUDIT_BIT(ctx->major); | 401 | int bit = AUDIT_BIT(ctx->major); |
342 | 402 | ||
343 | list_for_each_entry_rcu(e, list, list) { | 403 | list_for_each_entry_rcu(e, list, list) { |
344 | if ((e->rule.mask[word] & bit) == bit | 404 | if ((e->rule.mask[word] & bit) == bit && |
345 | && audit_filter_rules(tsk, &e->rule, ctx, &state)) { | 405 | audit_filter_rules(tsk, &e->rule, ctx, NULL, |
406 | &state)) { | ||
407 | rcu_read_unlock(); | ||
408 | return state; | ||
409 | } | ||
410 | } | ||
411 | } | ||
412 | rcu_read_unlock(); | ||
413 | return AUDIT_BUILD_CONTEXT; | ||
414 | } | ||
415 | |||
416 | /* At syscall exit time, this filter is called if any audit_names[] have been | ||
417 | * collected during syscall processing. We only check rules in sublists at hash | ||
418 | * buckets applicable to the inode numbers in audit_names[]. | ||
419 | * Regarding audit_state, same rules apply as for audit_filter_syscall(). | ||
420 | */ | ||
421 | enum audit_state audit_filter_inodes(struct task_struct *tsk, | ||
422 | struct audit_context *ctx) | ||
423 | { | ||
424 | int i; | ||
425 | struct audit_entry *e; | ||
426 | enum audit_state state; | ||
427 | |||
428 | if (audit_pid && tsk->tgid == audit_pid) | ||
429 | return AUDIT_DISABLED; | ||
430 | |||
431 | rcu_read_lock(); | ||
432 | for (i = 0; i < ctx->name_count; i++) { | ||
433 | int word = AUDIT_WORD(ctx->major); | ||
434 | int bit = AUDIT_BIT(ctx->major); | ||
435 | struct audit_names *n = &ctx->names[i]; | ||
436 | int h = audit_hash_ino((u32)n->ino); | ||
437 | struct list_head *list = &audit_inode_hash[h]; | ||
438 | |||
439 | if (list_empty(list)) | ||
440 | continue; | ||
441 | |||
442 | list_for_each_entry_rcu(e, list, list) { | ||
443 | if ((e->rule.mask[word] & bit) == bit && | ||
444 | audit_filter_rules(tsk, &e->rule, ctx, n, &state)) { | ||
346 | rcu_read_unlock(); | 445 | rcu_read_unlock(); |
347 | return state; | 446 | return state; |
348 | } | 447 | } |
@@ -352,6 +451,11 @@ static enum audit_state audit_filter_syscall(struct task_struct *tsk, | |||
352 | return AUDIT_BUILD_CONTEXT; | 451 | return AUDIT_BUILD_CONTEXT; |
353 | } | 452 | } |
354 | 453 | ||
454 | void audit_set_auditable(struct audit_context *ctx) | ||
455 | { | ||
456 | ctx->auditable = 1; | ||
457 | } | ||
458 | |||
355 | static inline struct audit_context *audit_get_context(struct task_struct *tsk, | 459 | static inline struct audit_context *audit_get_context(struct task_struct *tsk, |
356 | int return_valid, | 460 | int return_valid, |
357 | int return_code) | 461 | int return_code) |
@@ -365,12 +469,22 @@ static inline struct audit_context *audit_get_context(struct task_struct *tsk, | |||
365 | 469 | ||
366 | if (context->in_syscall && !context->auditable) { | 470 | if (context->in_syscall && !context->auditable) { |
367 | enum audit_state state; | 471 | enum audit_state state; |
472 | |||
368 | state = audit_filter_syscall(tsk, context, &audit_filter_list[AUDIT_FILTER_EXIT]); | 473 | state = audit_filter_syscall(tsk, context, &audit_filter_list[AUDIT_FILTER_EXIT]); |
474 | if (state == AUDIT_RECORD_CONTEXT) { | ||
475 | context->auditable = 1; | ||
476 | goto get_context; | ||
477 | } | ||
478 | |||
479 | state = audit_filter_inodes(tsk, context); | ||
369 | if (state == AUDIT_RECORD_CONTEXT) | 480 | if (state == AUDIT_RECORD_CONTEXT) |
370 | context->auditable = 1; | 481 | context->auditable = 1; |
482 | |||
371 | } | 483 | } |
372 | 484 | ||
485 | get_context: | ||
373 | context->pid = tsk->pid; | 486 | context->pid = tsk->pid; |
487 | context->ppid = sys_getppid(); /* sic. tsk == current in all cases */ | ||
374 | context->uid = tsk->uid; | 488 | context->uid = tsk->uid; |
375 | context->gid = tsk->gid; | 489 | context->gid = tsk->gid; |
376 | context->euid = tsk->euid; | 490 | context->euid = tsk->euid; |
@@ -413,7 +527,7 @@ static inline void audit_free_names(struct audit_context *context) | |||
413 | #endif | 527 | #endif |
414 | 528 | ||
415 | for (i = 0; i < context->name_count; i++) { | 529 | for (i = 0; i < context->name_count; i++) { |
416 | if (context->names[i].name) | 530 | if (context->names[i].name && context->names[i].name_put) |
417 | __putname(context->names[i].name); | 531 | __putname(context->names[i].name); |
418 | } | 532 | } |
419 | context->name_count = 0; | 533 | context->name_count = 0; |
@@ -606,7 +720,7 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts | |||
606 | tty = "(none)"; | 720 | tty = "(none)"; |
607 | audit_log_format(ab, | 721 | audit_log_format(ab, |
608 | " a0=%lx a1=%lx a2=%lx a3=%lx items=%d" | 722 | " a0=%lx a1=%lx a2=%lx a3=%lx items=%d" |
609 | " pid=%d auid=%u uid=%u gid=%u" | 723 | " ppid=%d pid=%d auid=%u uid=%u gid=%u" |
610 | " euid=%u suid=%u fsuid=%u" | 724 | " euid=%u suid=%u fsuid=%u" |
611 | " egid=%u sgid=%u fsgid=%u tty=%s", | 725 | " egid=%u sgid=%u fsgid=%u tty=%s", |
612 | context->argv[0], | 726 | context->argv[0], |
@@ -614,6 +728,7 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts | |||
614 | context->argv[2], | 728 | context->argv[2], |
615 | context->argv[3], | 729 | context->argv[3], |
616 | context->name_count, | 730 | context->name_count, |
731 | context->ppid, | ||
617 | context->pid, | 732 | context->pid, |
618 | context->loginuid, | 733 | context->loginuid, |
619 | context->uid, | 734 | context->uid, |
@@ -630,11 +745,48 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts | |||
630 | continue; /* audit_panic has been called */ | 745 | continue; /* audit_panic has been called */ |
631 | 746 | ||
632 | switch (aux->type) { | 747 | switch (aux->type) { |
748 | case AUDIT_MQ_OPEN: { | ||
749 | struct audit_aux_data_mq_open *axi = (void *)aux; | ||
750 | audit_log_format(ab, | ||
751 | "oflag=0x%x mode=%#o mq_flags=0x%lx mq_maxmsg=%ld " | ||
752 | "mq_msgsize=%ld mq_curmsgs=%ld", | ||
753 | axi->oflag, axi->mode, axi->attr.mq_flags, | ||
754 | axi->attr.mq_maxmsg, axi->attr.mq_msgsize, | ||
755 | axi->attr.mq_curmsgs); | ||
756 | break; } | ||
757 | |||
758 | case AUDIT_MQ_SENDRECV: { | ||
759 | struct audit_aux_data_mq_sendrecv *axi = (void *)aux; | ||
760 | audit_log_format(ab, | ||
761 | "mqdes=%d msg_len=%zd msg_prio=%u " | ||
762 | "abs_timeout_sec=%ld abs_timeout_nsec=%ld", | ||
763 | axi->mqdes, axi->msg_len, axi->msg_prio, | ||
764 | axi->abs_timeout.tv_sec, axi->abs_timeout.tv_nsec); | ||
765 | break; } | ||
766 | |||
767 | case AUDIT_MQ_NOTIFY: { | ||
768 | struct audit_aux_data_mq_notify *axi = (void *)aux; | ||
769 | audit_log_format(ab, | ||
770 | "mqdes=%d sigev_signo=%d", | ||
771 | axi->mqdes, | ||
772 | axi->notification.sigev_signo); | ||
773 | break; } | ||
774 | |||
775 | case AUDIT_MQ_GETSETATTR: { | ||
776 | struct audit_aux_data_mq_getsetattr *axi = (void *)aux; | ||
777 | audit_log_format(ab, | ||
778 | "mqdes=%d mq_flags=0x%lx mq_maxmsg=%ld mq_msgsize=%ld " | ||
779 | "mq_curmsgs=%ld ", | ||
780 | axi->mqdes, | ||
781 | axi->mqstat.mq_flags, axi->mqstat.mq_maxmsg, | ||
782 | axi->mqstat.mq_msgsize, axi->mqstat.mq_curmsgs); | ||
783 | break; } | ||
784 | |||
633 | case AUDIT_IPC: { | 785 | case AUDIT_IPC: { |
634 | struct audit_aux_data_ipcctl *axi = (void *)aux; | 786 | struct audit_aux_data_ipcctl *axi = (void *)aux; |
635 | audit_log_format(ab, | 787 | audit_log_format(ab, |
636 | " qbytes=%lx iuid=%u igid=%u mode=%x", | 788 | "ouid=%u ogid=%u mode=%x", |
637 | axi->qbytes, axi->uid, axi->gid, axi->mode); | 789 | axi->uid, axi->gid, axi->mode); |
638 | if (axi->osid != 0) { | 790 | if (axi->osid != 0) { |
639 | char *ctx = NULL; | 791 | char *ctx = NULL; |
640 | u32 len; | 792 | u32 len; |
@@ -652,19 +804,18 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts | |||
652 | case AUDIT_IPC_SET_PERM: { | 804 | case AUDIT_IPC_SET_PERM: { |
653 | struct audit_aux_data_ipcctl *axi = (void *)aux; | 805 | struct audit_aux_data_ipcctl *axi = (void *)aux; |
654 | audit_log_format(ab, | 806 | audit_log_format(ab, |
655 | " new qbytes=%lx new iuid=%u new igid=%u new mode=%x", | 807 | "qbytes=%lx ouid=%u ogid=%u mode=%x", |
656 | axi->qbytes, axi->uid, axi->gid, axi->mode); | 808 | axi->qbytes, axi->uid, axi->gid, axi->mode); |
657 | if (axi->osid != 0) { | 809 | break; } |
658 | char *ctx = NULL; | 810 | |
659 | u32 len; | 811 | case AUDIT_EXECVE: { |
660 | if (selinux_ctxid_to_string( | 812 | struct audit_aux_data_execve *axi = (void *)aux; |
661 | axi->osid, &ctx, &len)) { | 813 | int i; |
662 | audit_log_format(ab, " osid=%u", | 814 | const char *p; |
663 | axi->osid); | 815 | for (i = 0, p = axi->mem; i < axi->argc; i++) { |
664 | call_panic = 1; | 816 | audit_log_format(ab, "a%d=", i); |
665 | } else | 817 | p = audit_log_untrustedstring(ab, p); |
666 | audit_log_format(ab, " obj=%s", ctx); | 818 | audit_log_format(ab, "\n"); |
667 | kfree(ctx); | ||
668 | } | 819 | } |
669 | break; } | 820 | break; } |
670 | 821 | ||
@@ -700,8 +851,7 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts | |||
700 | } | 851 | } |
701 | } | 852 | } |
702 | for (i = 0; i < context->name_count; i++) { | 853 | for (i = 0; i < context->name_count; i++) { |
703 | unsigned long ino = context->names[i].ino; | 854 | struct audit_names *n = &context->names[i]; |
704 | unsigned long pino = context->names[i].pino; | ||
705 | 855 | ||
706 | ab = audit_log_start(context, GFP_KERNEL, AUDIT_PATH); | 856 | ab = audit_log_start(context, GFP_KERNEL, AUDIT_PATH); |
707 | if (!ab) | 857 | if (!ab) |
@@ -709,33 +859,47 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts | |||
709 | 859 | ||
710 | audit_log_format(ab, "item=%d", i); | 860 | audit_log_format(ab, "item=%d", i); |
711 | 861 | ||
712 | audit_log_format(ab, " name="); | 862 | if (n->name) { |
713 | if (context->names[i].name) | 863 | switch(n->name_len) { |
714 | audit_log_untrustedstring(ab, context->names[i].name); | 864 | case AUDIT_NAME_FULL: |
715 | else | 865 | /* log the full path */ |
716 | audit_log_format(ab, "(null)"); | 866 | audit_log_format(ab, " name="); |
717 | 867 | audit_log_untrustedstring(ab, n->name); | |
718 | if (pino != (unsigned long)-1) | 868 | break; |
719 | audit_log_format(ab, " parent=%lu", pino); | 869 | case 0: |
720 | if (ino != (unsigned long)-1) | 870 | /* name was specified as a relative path and the |
721 | audit_log_format(ab, " inode=%lu", ino); | 871 | * directory component is the cwd */ |
722 | if ((pino != (unsigned long)-1) || (ino != (unsigned long)-1)) | 872 | audit_log_d_path(ab, " name=", context->pwd, |
723 | audit_log_format(ab, " dev=%02x:%02x mode=%#o" | 873 | context->pwdmnt); |
724 | " ouid=%u ogid=%u rdev=%02x:%02x", | 874 | break; |
725 | MAJOR(context->names[i].dev), | 875 | default: |
726 | MINOR(context->names[i].dev), | 876 | /* log the name's directory component */ |
727 | context->names[i].mode, | 877 | audit_log_format(ab, " name="); |
728 | context->names[i].uid, | 878 | audit_log_n_untrustedstring(ab, n->name_len, |
729 | context->names[i].gid, | 879 | n->name); |
730 | MAJOR(context->names[i].rdev), | 880 | } |
731 | MINOR(context->names[i].rdev)); | 881 | } else |
732 | if (context->names[i].osid != 0) { | 882 | audit_log_format(ab, " name=(null)"); |
883 | |||
884 | if (n->ino != (unsigned long)-1) { | ||
885 | audit_log_format(ab, " inode=%lu" | ||
886 | " dev=%02x:%02x mode=%#o" | ||
887 | " ouid=%u ogid=%u rdev=%02x:%02x", | ||
888 | n->ino, | ||
889 | MAJOR(n->dev), | ||
890 | MINOR(n->dev), | ||
891 | n->mode, | ||
892 | n->uid, | ||
893 | n->gid, | ||
894 | MAJOR(n->rdev), | ||
895 | MINOR(n->rdev)); | ||
896 | } | ||
897 | if (n->osid != 0) { | ||
733 | char *ctx = NULL; | 898 | char *ctx = NULL; |
734 | u32 len; | 899 | u32 len; |
735 | if (selinux_ctxid_to_string( | 900 | if (selinux_ctxid_to_string( |
736 | context->names[i].osid, &ctx, &len)) { | 901 | n->osid, &ctx, &len)) { |
737 | audit_log_format(ab, " osid=%u", | 902 | audit_log_format(ab, " osid=%u", n->osid); |
738 | context->names[i].osid); | ||
739 | call_panic = 2; | 903 | call_panic = 2; |
740 | } else | 904 | } else |
741 | audit_log_format(ab, " obj=%s", ctx); | 905 | audit_log_format(ab, " obj=%s", ctx); |
@@ -908,11 +1072,11 @@ void audit_syscall_exit(int valid, long return_code) | |||
908 | * Add a name to the list of audit names for this context. | 1072 | * Add a name to the list of audit names for this context. |
909 | * Called from fs/namei.c:getname(). | 1073 | * Called from fs/namei.c:getname(). |
910 | */ | 1074 | */ |
911 | void audit_getname(const char *name) | 1075 | void __audit_getname(const char *name) |
912 | { | 1076 | { |
913 | struct audit_context *context = current->audit_context; | 1077 | struct audit_context *context = current->audit_context; |
914 | 1078 | ||
915 | if (!context || IS_ERR(name) || !name) | 1079 | if (IS_ERR(name) || !name) |
916 | return; | 1080 | return; |
917 | 1081 | ||
918 | if (!context->in_syscall) { | 1082 | if (!context->in_syscall) { |
@@ -925,6 +1089,8 @@ void audit_getname(const char *name) | |||
925 | } | 1089 | } |
926 | BUG_ON(context->name_count >= AUDIT_NAMES); | 1090 | BUG_ON(context->name_count >= AUDIT_NAMES); |
927 | context->names[context->name_count].name = name; | 1091 | context->names[context->name_count].name = name; |
1092 | context->names[context->name_count].name_len = AUDIT_NAME_FULL; | ||
1093 | context->names[context->name_count].name_put = 1; | ||
928 | context->names[context->name_count].ino = (unsigned long)-1; | 1094 | context->names[context->name_count].ino = (unsigned long)-1; |
929 | ++context->name_count; | 1095 | ++context->name_count; |
930 | if (!context->pwd) { | 1096 | if (!context->pwd) { |
@@ -991,11 +1157,10 @@ static void audit_inode_context(int idx, const struct inode *inode) | |||
991 | * audit_inode - store the inode and device from a lookup | 1157 | * audit_inode - store the inode and device from a lookup |
992 | * @name: name being audited | 1158 | * @name: name being audited |
993 | * @inode: inode being audited | 1159 | * @inode: inode being audited |
994 | * @flags: lookup flags (as used in path_lookup()) | ||
995 | * | 1160 | * |
996 | * Called from fs/namei.c:path_lookup(). | 1161 | * Called from fs/namei.c:path_lookup(). |
997 | */ | 1162 | */ |
998 | void __audit_inode(const char *name, const struct inode *inode, unsigned flags) | 1163 | void __audit_inode(const char *name, const struct inode *inode) |
999 | { | 1164 | { |
1000 | int idx; | 1165 | int idx; |
1001 | struct audit_context *context = current->audit_context; | 1166 | struct audit_context *context = current->audit_context; |
@@ -1021,20 +1186,13 @@ void __audit_inode(const char *name, const struct inode *inode, unsigned flags) | |||
1021 | ++context->ino_count; | 1186 | ++context->ino_count; |
1022 | #endif | 1187 | #endif |
1023 | } | 1188 | } |
1189 | context->names[idx].ino = inode->i_ino; | ||
1024 | context->names[idx].dev = inode->i_sb->s_dev; | 1190 | context->names[idx].dev = inode->i_sb->s_dev; |
1025 | context->names[idx].mode = inode->i_mode; | 1191 | context->names[idx].mode = inode->i_mode; |
1026 | context->names[idx].uid = inode->i_uid; | 1192 | context->names[idx].uid = inode->i_uid; |
1027 | context->names[idx].gid = inode->i_gid; | 1193 | context->names[idx].gid = inode->i_gid; |
1028 | context->names[idx].rdev = inode->i_rdev; | 1194 | context->names[idx].rdev = inode->i_rdev; |
1029 | audit_inode_context(idx, inode); | 1195 | audit_inode_context(idx, inode); |
1030 | if ((flags & LOOKUP_PARENT) && (strcmp(name, "/") != 0) && | ||
1031 | (strcmp(name, ".") != 0)) { | ||
1032 | context->names[idx].ino = (unsigned long)-1; | ||
1033 | context->names[idx].pino = inode->i_ino; | ||
1034 | } else { | ||
1035 | context->names[idx].ino = inode->i_ino; | ||
1036 | context->names[idx].pino = (unsigned long)-1; | ||
1037 | } | ||
1038 | } | 1196 | } |
1039 | 1197 | ||
1040 | /** | 1198 | /** |
@@ -1056,51 +1214,40 @@ void __audit_inode_child(const char *dname, const struct inode *inode, | |||
1056 | { | 1214 | { |
1057 | int idx; | 1215 | int idx; |
1058 | struct audit_context *context = current->audit_context; | 1216 | struct audit_context *context = current->audit_context; |
1217 | const char *found_name = NULL; | ||
1218 | int dirlen = 0; | ||
1059 | 1219 | ||
1060 | if (!context->in_syscall) | 1220 | if (!context->in_syscall) |
1061 | return; | 1221 | return; |
1062 | 1222 | ||
1063 | /* determine matching parent */ | 1223 | /* determine matching parent */ |
1064 | if (dname) | 1224 | if (!dname) |
1065 | for (idx = 0; idx < context->name_count; idx++) | 1225 | goto update_context; |
1066 | if (context->names[idx].pino == pino) { | 1226 | for (idx = 0; idx < context->name_count; idx++) |
1067 | const char *n; | 1227 | if (context->names[idx].ino == pino) { |
1068 | const char *name = context->names[idx].name; | 1228 | const char *name = context->names[idx].name; |
1069 | int dlen = strlen(dname); | 1229 | |
1070 | int nlen = name ? strlen(name) : 0; | 1230 | if (!name) |
1071 | 1231 | continue; | |
1072 | if (nlen < dlen) | 1232 | |
1073 | continue; | 1233 | if (audit_compare_dname_path(dname, name, &dirlen) == 0) { |
1074 | 1234 | context->names[idx].name_len = dirlen; | |
1075 | /* disregard trailing slashes */ | 1235 | found_name = name; |
1076 | n = name + nlen - 1; | 1236 | break; |
1077 | while ((*n == '/') && (n > name)) | ||
1078 | n--; | ||
1079 | |||
1080 | /* find last path component */ | ||
1081 | n = n - dlen + 1; | ||
1082 | if (n < name) | ||
1083 | continue; | ||
1084 | else if (n > name) { | ||
1085 | if (*--n != '/') | ||
1086 | continue; | ||
1087 | else | ||
1088 | n++; | ||
1089 | } | ||
1090 | |||
1091 | if (strncmp(n, dname, dlen) == 0) | ||
1092 | goto update_context; | ||
1093 | } | 1237 | } |
1238 | } | ||
1094 | 1239 | ||
1095 | /* catch-all in case match not found */ | 1240 | update_context: |
1096 | idx = context->name_count++; | 1241 | idx = context->name_count++; |
1097 | context->names[idx].name = NULL; | ||
1098 | context->names[idx].pino = pino; | ||
1099 | #if AUDIT_DEBUG | 1242 | #if AUDIT_DEBUG |
1100 | context->ino_count++; | 1243 | context->ino_count++; |
1101 | #endif | 1244 | #endif |
1245 | /* Re-use the name belonging to the slot for a matching parent directory. | ||
1246 | * All names for this context are relinquished in audit_free_names() */ | ||
1247 | context->names[idx].name = found_name; | ||
1248 | context->names[idx].name_len = AUDIT_NAME_FULL; | ||
1249 | context->names[idx].name_put = 0; /* don't call __putname() */ | ||
1102 | 1250 | ||
1103 | update_context: | ||
1104 | if (inode) { | 1251 | if (inode) { |
1105 | context->names[idx].ino = inode->i_ino; | 1252 | context->names[idx].ino = inode->i_ino; |
1106 | context->names[idx].dev = inode->i_sb->s_dev; | 1253 | context->names[idx].dev = inode->i_sb->s_dev; |
@@ -1109,7 +1256,8 @@ update_context: | |||
1109 | context->names[idx].gid = inode->i_gid; | 1256 | context->names[idx].gid = inode->i_gid; |
1110 | context->names[idx].rdev = inode->i_rdev; | 1257 | context->names[idx].rdev = inode->i_rdev; |
1111 | audit_inode_context(idx, inode); | 1258 | audit_inode_context(idx, inode); |
1112 | } | 1259 | } else |
1260 | context->names[idx].ino = (unsigned long)-1; | ||
1113 | } | 1261 | } |
1114 | 1262 | ||
1115 | /** | 1263 | /** |
@@ -1142,18 +1290,23 @@ void auditsc_get_stamp(struct audit_context *ctx, | |||
1142 | */ | 1290 | */ |
1143 | int audit_set_loginuid(struct task_struct *task, uid_t loginuid) | 1291 | int audit_set_loginuid(struct task_struct *task, uid_t loginuid) |
1144 | { | 1292 | { |
1145 | if (task->audit_context) { | 1293 | struct audit_context *context = task->audit_context; |
1146 | struct audit_buffer *ab; | 1294 | |
1147 | 1295 | if (context) { | |
1148 | ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_LOGIN); | 1296 | /* Only log if audit is enabled */ |
1149 | if (ab) { | 1297 | if (context->in_syscall) { |
1150 | audit_log_format(ab, "login pid=%d uid=%u " | 1298 | struct audit_buffer *ab; |
1151 | "old auid=%u new auid=%u", | 1299 | |
1152 | task->pid, task->uid, | 1300 | ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_LOGIN); |
1153 | task->audit_context->loginuid, loginuid); | 1301 | if (ab) { |
1154 | audit_log_end(ab); | 1302 | audit_log_format(ab, "login pid=%d uid=%u " |
1303 | "old auid=%u new auid=%u", | ||
1304 | task->pid, task->uid, | ||
1305 | context->loginuid, loginuid); | ||
1306 | audit_log_end(ab); | ||
1307 | } | ||
1155 | } | 1308 | } |
1156 | task->audit_context->loginuid = loginuid; | 1309 | context->loginuid = loginuid; |
1157 | } | 1310 | } |
1158 | return 0; | 1311 | return 0; |
1159 | } | 1312 | } |
@@ -1170,16 +1323,193 @@ uid_t audit_get_loginuid(struct audit_context *ctx) | |||
1170 | } | 1323 | } |
1171 | 1324 | ||
1172 | /** | 1325 | /** |
1173 | * audit_ipc_obj - record audit data for ipc object | 1326 | * __audit_mq_open - record audit data for a POSIX MQ open |
1174 | * @ipcp: ipc permissions | 1327 | * @oflag: open flag |
1328 | * @mode: mode bits | ||
1329 | * @u_attr: queue attributes | ||
1175 | * | 1330 | * |
1176 | * Returns 0 for success or NULL context or < 0 on error. | 1331 | * Returns 0 for success or NULL context or < 0 on error. |
1177 | */ | 1332 | */ |
1178 | int audit_ipc_obj(struct kern_ipc_perm *ipcp) | 1333 | int __audit_mq_open(int oflag, mode_t mode, struct mq_attr __user *u_attr) |
1179 | { | 1334 | { |
1180 | struct audit_aux_data_ipcctl *ax; | 1335 | struct audit_aux_data_mq_open *ax; |
1336 | struct audit_context *context = current->audit_context; | ||
1337 | |||
1338 | if (!audit_enabled) | ||
1339 | return 0; | ||
1340 | |||
1341 | if (likely(!context)) | ||
1342 | return 0; | ||
1343 | |||
1344 | ax = kmalloc(sizeof(*ax), GFP_ATOMIC); | ||
1345 | if (!ax) | ||
1346 | return -ENOMEM; | ||
1347 | |||
1348 | if (u_attr != NULL) { | ||
1349 | if (copy_from_user(&ax->attr, u_attr, sizeof(ax->attr))) { | ||
1350 | kfree(ax); | ||
1351 | return -EFAULT; | ||
1352 | } | ||
1353 | } else | ||
1354 | memset(&ax->attr, 0, sizeof(ax->attr)); | ||
1355 | |||
1356 | ax->oflag = oflag; | ||
1357 | ax->mode = mode; | ||
1358 | |||
1359 | ax->d.type = AUDIT_MQ_OPEN; | ||
1360 | ax->d.next = context->aux; | ||
1361 | context->aux = (void *)ax; | ||
1362 | return 0; | ||
1363 | } | ||
1364 | |||
1365 | /** | ||
1366 | * __audit_mq_timedsend - record audit data for a POSIX MQ timed send | ||
1367 | * @mqdes: MQ descriptor | ||
1368 | * @msg_len: Message length | ||
1369 | * @msg_prio: Message priority | ||
1370 | * @abs_timeout: Message timeout in absolute time | ||
1371 | * | ||
1372 | * Returns 0 for success or NULL context or < 0 on error. | ||
1373 | */ | ||
1374 | int __audit_mq_timedsend(mqd_t mqdes, size_t msg_len, unsigned int msg_prio, | ||
1375 | const struct timespec __user *u_abs_timeout) | ||
1376 | { | ||
1377 | struct audit_aux_data_mq_sendrecv *ax; | ||
1378 | struct audit_context *context = current->audit_context; | ||
1379 | |||
1380 | if (!audit_enabled) | ||
1381 | return 0; | ||
1382 | |||
1383 | if (likely(!context)) | ||
1384 | return 0; | ||
1385 | |||
1386 | ax = kmalloc(sizeof(*ax), GFP_ATOMIC); | ||
1387 | if (!ax) | ||
1388 | return -ENOMEM; | ||
1389 | |||
1390 | if (u_abs_timeout != NULL) { | ||
1391 | if (copy_from_user(&ax->abs_timeout, u_abs_timeout, sizeof(ax->abs_timeout))) { | ||
1392 | kfree(ax); | ||
1393 | return -EFAULT; | ||
1394 | } | ||
1395 | } else | ||
1396 | memset(&ax->abs_timeout, 0, sizeof(ax->abs_timeout)); | ||
1397 | |||
1398 | ax->mqdes = mqdes; | ||
1399 | ax->msg_len = msg_len; | ||
1400 | ax->msg_prio = msg_prio; | ||
1401 | |||
1402 | ax->d.type = AUDIT_MQ_SENDRECV; | ||
1403 | ax->d.next = context->aux; | ||
1404 | context->aux = (void *)ax; | ||
1405 | return 0; | ||
1406 | } | ||
1407 | |||
1408 | /** | ||
1409 | * __audit_mq_timedreceive - record audit data for a POSIX MQ timed receive | ||
1410 | * @mqdes: MQ descriptor | ||
1411 | * @msg_len: Message length | ||
1412 | * @msg_prio: Message priority | ||
1413 | * @abs_timeout: Message timeout in absolute time | ||
1414 | * | ||
1415 | * Returns 0 for success or NULL context or < 0 on error. | ||
1416 | */ | ||
1417 | int __audit_mq_timedreceive(mqd_t mqdes, size_t msg_len, | ||
1418 | unsigned int __user *u_msg_prio, | ||
1419 | const struct timespec __user *u_abs_timeout) | ||
1420 | { | ||
1421 | struct audit_aux_data_mq_sendrecv *ax; | ||
1422 | struct audit_context *context = current->audit_context; | ||
1423 | |||
1424 | if (!audit_enabled) | ||
1425 | return 0; | ||
1426 | |||
1427 | if (likely(!context)) | ||
1428 | return 0; | ||
1429 | |||
1430 | ax = kmalloc(sizeof(*ax), GFP_ATOMIC); | ||
1431 | if (!ax) | ||
1432 | return -ENOMEM; | ||
1433 | |||
1434 | if (u_msg_prio != NULL) { | ||
1435 | if (get_user(ax->msg_prio, u_msg_prio)) { | ||
1436 | kfree(ax); | ||
1437 | return -EFAULT; | ||
1438 | } | ||
1439 | } else | ||
1440 | ax->msg_prio = 0; | ||
1441 | |||
1442 | if (u_abs_timeout != NULL) { | ||
1443 | if (copy_from_user(&ax->abs_timeout, u_abs_timeout, sizeof(ax->abs_timeout))) { | ||
1444 | kfree(ax); | ||
1445 | return -EFAULT; | ||
1446 | } | ||
1447 | } else | ||
1448 | memset(&ax->abs_timeout, 0, sizeof(ax->abs_timeout)); | ||
1449 | |||
1450 | ax->mqdes = mqdes; | ||
1451 | ax->msg_len = msg_len; | ||
1452 | |||
1453 | ax->d.type = AUDIT_MQ_SENDRECV; | ||
1454 | ax->d.next = context->aux; | ||
1455 | context->aux = (void *)ax; | ||
1456 | return 0; | ||
1457 | } | ||
1458 | |||
1459 | /** | ||
1460 | * __audit_mq_notify - record audit data for a POSIX MQ notify | ||
1461 | * @mqdes: MQ descriptor | ||
1462 | * @u_notification: Notification event | ||
1463 | * | ||
1464 | * Returns 0 for success or NULL context or < 0 on error. | ||
1465 | */ | ||
1466 | |||
1467 | int __audit_mq_notify(mqd_t mqdes, const struct sigevent __user *u_notification) | ||
1468 | { | ||
1469 | struct audit_aux_data_mq_notify *ax; | ||
1470 | struct audit_context *context = current->audit_context; | ||
1471 | |||
1472 | if (!audit_enabled) | ||
1473 | return 0; | ||
1474 | |||
1475 | if (likely(!context)) | ||
1476 | return 0; | ||
1477 | |||
1478 | ax = kmalloc(sizeof(*ax), GFP_ATOMIC); | ||
1479 | if (!ax) | ||
1480 | return -ENOMEM; | ||
1481 | |||
1482 | if (u_notification != NULL) { | ||
1483 | if (copy_from_user(&ax->notification, u_notification, sizeof(ax->notification))) { | ||
1484 | kfree(ax); | ||
1485 | return -EFAULT; | ||
1486 | } | ||
1487 | } else | ||
1488 | memset(&ax->notification, 0, sizeof(ax->notification)); | ||
1489 | |||
1490 | ax->mqdes = mqdes; | ||
1491 | |||
1492 | ax->d.type = AUDIT_MQ_NOTIFY; | ||
1493 | ax->d.next = context->aux; | ||
1494 | context->aux = (void *)ax; | ||
1495 | return 0; | ||
1496 | } | ||
1497 | |||
1498 | /** | ||
1499 | * __audit_mq_getsetattr - record audit data for a POSIX MQ get/set attribute | ||
1500 | * @mqdes: MQ descriptor | ||
1501 | * @mqstat: MQ flags | ||
1502 | * | ||
1503 | * Returns 0 for success or NULL context or < 0 on error. | ||
1504 | */ | ||
1505 | int __audit_mq_getsetattr(mqd_t mqdes, struct mq_attr *mqstat) | ||
1506 | { | ||
1507 | struct audit_aux_data_mq_getsetattr *ax; | ||
1181 | struct audit_context *context = current->audit_context; | 1508 | struct audit_context *context = current->audit_context; |
1182 | 1509 | ||
1510 | if (!audit_enabled) | ||
1511 | return 0; | ||
1512 | |||
1183 | if (likely(!context)) | 1513 | if (likely(!context)) |
1184 | return 0; | 1514 | return 0; |
1185 | 1515 | ||
@@ -1187,6 +1517,30 @@ int audit_ipc_obj(struct kern_ipc_perm *ipcp) | |||
1187 | if (!ax) | 1517 | if (!ax) |
1188 | return -ENOMEM; | 1518 | return -ENOMEM; |
1189 | 1519 | ||
1520 | ax->mqdes = mqdes; | ||
1521 | ax->mqstat = *mqstat; | ||
1522 | |||
1523 | ax->d.type = AUDIT_MQ_GETSETATTR; | ||
1524 | ax->d.next = context->aux; | ||
1525 | context->aux = (void *)ax; | ||
1526 | return 0; | ||
1527 | } | ||
1528 | |||
1529 | /** | ||
1530 | * audit_ipc_obj - record audit data for ipc object | ||
1531 | * @ipcp: ipc permissions | ||
1532 | * | ||
1533 | * Returns 0 for success or NULL context or < 0 on error. | ||
1534 | */ | ||
1535 | int __audit_ipc_obj(struct kern_ipc_perm *ipcp) | ||
1536 | { | ||
1537 | struct audit_aux_data_ipcctl *ax; | ||
1538 | struct audit_context *context = current->audit_context; | ||
1539 | |||
1540 | ax = kmalloc(sizeof(*ax), GFP_ATOMIC); | ||
1541 | if (!ax) | ||
1542 | return -ENOMEM; | ||
1543 | |||
1190 | ax->uid = ipcp->uid; | 1544 | ax->uid = ipcp->uid; |
1191 | ax->gid = ipcp->gid; | 1545 | ax->gid = ipcp->gid; |
1192 | ax->mode = ipcp->mode; | 1546 | ax->mode = ipcp->mode; |
@@ -1204,17 +1558,15 @@ int audit_ipc_obj(struct kern_ipc_perm *ipcp) | |||
1204 | * @uid: msgq user id | 1558 | * @uid: msgq user id |
1205 | * @gid: msgq group id | 1559 | * @gid: msgq group id |
1206 | * @mode: msgq mode (permissions) | 1560 | * @mode: msgq mode (permissions) |
1561 | * @ipcp: in-kernel IPC permissions | ||
1207 | * | 1562 | * |
1208 | * Returns 0 for success or NULL context or < 0 on error. | 1563 | * Returns 0 for success or NULL context or < 0 on error. |
1209 | */ | 1564 | */ |
1210 | int audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode, struct kern_ipc_perm *ipcp) | 1565 | int __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode) |
1211 | { | 1566 | { |
1212 | struct audit_aux_data_ipcctl *ax; | 1567 | struct audit_aux_data_ipcctl *ax; |
1213 | struct audit_context *context = current->audit_context; | 1568 | struct audit_context *context = current->audit_context; |
1214 | 1569 | ||
1215 | if (likely(!context)) | ||
1216 | return 0; | ||
1217 | |||
1218 | ax = kmalloc(sizeof(*ax), GFP_ATOMIC); | 1570 | ax = kmalloc(sizeof(*ax), GFP_ATOMIC); |
1219 | if (!ax) | 1571 | if (!ax) |
1220 | return -ENOMEM; | 1572 | return -ENOMEM; |
@@ -1223,7 +1575,6 @@ int audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode, | |||
1223 | ax->uid = uid; | 1575 | ax->uid = uid; |
1224 | ax->gid = gid; | 1576 | ax->gid = gid; |
1225 | ax->mode = mode; | 1577 | ax->mode = mode; |
1226 | selinux_get_ipc_sid(ipcp, &ax->osid); | ||
1227 | 1578 | ||
1228 | ax->d.type = AUDIT_IPC_SET_PERM; | 1579 | ax->d.type = AUDIT_IPC_SET_PERM; |
1229 | ax->d.next = context->aux; | 1580 | ax->d.next = context->aux; |
@@ -1231,6 +1582,39 @@ int audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode, | |||
1231 | return 0; | 1582 | return 0; |
1232 | } | 1583 | } |
1233 | 1584 | ||
1585 | int audit_bprm(struct linux_binprm *bprm) | ||
1586 | { | ||
1587 | struct audit_aux_data_execve *ax; | ||
1588 | struct audit_context *context = current->audit_context; | ||
1589 | unsigned long p, next; | ||
1590 | void *to; | ||
1591 | |||
1592 | if (likely(!audit_enabled || !context)) | ||
1593 | return 0; | ||
1594 | |||
1595 | ax = kmalloc(sizeof(*ax) + PAGE_SIZE * MAX_ARG_PAGES - bprm->p, | ||
1596 | GFP_KERNEL); | ||
1597 | if (!ax) | ||
1598 | return -ENOMEM; | ||
1599 | |||
1600 | ax->argc = bprm->argc; | ||
1601 | ax->envc = bprm->envc; | ||
1602 | for (p = bprm->p, to = ax->mem; p < MAX_ARG_PAGES*PAGE_SIZE; p = next) { | ||
1603 | struct page *page = bprm->page[p / PAGE_SIZE]; | ||
1604 | void *kaddr = kmap(page); | ||
1605 | next = (p + PAGE_SIZE) & ~(PAGE_SIZE - 1); | ||
1606 | memcpy(to, kaddr + (p & (PAGE_SIZE - 1)), next - p); | ||
1607 | to += next - p; | ||
1608 | kunmap(page); | ||
1609 | } | ||
1610 | |||
1611 | ax->d.type = AUDIT_EXECVE; | ||
1612 | ax->d.next = context->aux; | ||
1613 | context->aux = (void *)ax; | ||
1614 | return 0; | ||
1615 | } | ||
1616 | |||
1617 | |||
1234 | /** | 1618 | /** |
1235 | * audit_socketcall - record audit data for sys_socketcall | 1619 | * audit_socketcall - record audit data for sys_socketcall |
1236 | * @nargs: number of args | 1620 | * @nargs: number of args |
@@ -1325,19 +1709,20 @@ int audit_avc_path(struct dentry *dentry, struct vfsmount *mnt) | |||
1325 | * If the audit subsystem is being terminated, record the task (pid) | 1709 | * If the audit subsystem is being terminated, record the task (pid) |
1326 | * and uid that is doing that. | 1710 | * and uid that is doing that. |
1327 | */ | 1711 | */ |
1328 | void audit_signal_info(int sig, struct task_struct *t) | 1712 | void __audit_signal_info(int sig, struct task_struct *t) |
1329 | { | 1713 | { |
1330 | extern pid_t audit_sig_pid; | 1714 | extern pid_t audit_sig_pid; |
1331 | extern uid_t audit_sig_uid; | 1715 | extern uid_t audit_sig_uid; |
1332 | 1716 | extern u32 audit_sig_sid; | |
1333 | if (unlikely(audit_pid && t->tgid == audit_pid)) { | 1717 | |
1334 | if (sig == SIGTERM || sig == SIGHUP) { | 1718 | if (sig == SIGTERM || sig == SIGHUP || sig == SIGUSR1) { |
1335 | struct audit_context *ctx = current->audit_context; | 1719 | struct task_struct *tsk = current; |
1336 | audit_sig_pid = current->pid; | 1720 | struct audit_context *ctx = tsk->audit_context; |
1337 | if (ctx) | 1721 | audit_sig_pid = tsk->pid; |
1338 | audit_sig_uid = ctx->loginuid; | 1722 | if (ctx) |
1339 | else | 1723 | audit_sig_uid = ctx->loginuid; |
1340 | audit_sig_uid = current->uid; | 1724 | else |
1341 | } | 1725 | audit_sig_uid = tsk->uid; |
1726 | selinux_get_task_sid(tsk, &audit_sig_sid); | ||
1342 | } | 1727 | } |
1343 | } | 1728 | } |
diff --git a/kernel/compat.c b/kernel/compat.c index c1601a84f8..2f67233243 100644 --- a/kernel/compat.c +++ b/kernel/compat.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <linux/unistd.h> | 21 | #include <linux/unistd.h> |
22 | #include <linux/security.h> | 22 | #include <linux/security.h> |
23 | #include <linux/timex.h> | 23 | #include <linux/timex.h> |
24 | #include <linux/migrate.h> | ||
24 | 25 | ||
25 | #include <asm/uaccess.h> | 26 | #include <asm/uaccess.h> |
26 | 27 | ||
@@ -934,3 +935,25 @@ asmlinkage long compat_sys_adjtimex(struct compat_timex __user *utp) | |||
934 | 935 | ||
935 | return ret; | 936 | return ret; |
936 | } | 937 | } |
938 | |||
939 | #ifdef CONFIG_NUMA | ||
940 | asmlinkage long compat_sys_move_pages(pid_t pid, unsigned long nr_pages, | ||
941 | compat_uptr_t __user *pages32, | ||
942 | const int __user *nodes, | ||
943 | int __user *status, | ||
944 | int flags) | ||
945 | { | ||
946 | const void __user * __user *pages; | ||
947 | int i; | ||
948 | |||
949 | pages = compat_alloc_user_space(nr_pages * sizeof(void *)); | ||
950 | for (i = 0; i < nr_pages; i++) { | ||
951 | compat_uptr_t p; | ||
952 | |||
953 | if (get_user(p, pages32 + i) || | ||
954 | put_user(compat_ptr(p), pages + i)) | ||
955 | return -EFAULT; | ||
956 | } | ||
957 | return sys_move_pages(pid, nr_pages, pages, nodes, status, flags); | ||
958 | } | ||
959 | #endif | ||
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index ab81fdd457..b602f73fb3 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -41,6 +41,7 @@ | |||
41 | #include <linux/rcupdate.h> | 41 | #include <linux/rcupdate.h> |
42 | #include <linux/sched.h> | 42 | #include <linux/sched.h> |
43 | #include <linux/seq_file.h> | 43 | #include <linux/seq_file.h> |
44 | #include <linux/security.h> | ||
44 | #include <linux/slab.h> | 45 | #include <linux/slab.h> |
45 | #include <linux/smp_lock.h> | 46 | #include <linux/smp_lock.h> |
46 | #include <linux/spinlock.h> | 47 | #include <linux/spinlock.h> |
@@ -392,11 +393,11 @@ static int cpuset_fill_super(struct super_block *sb, void *unused_data, | |||
392 | return 0; | 393 | return 0; |
393 | } | 394 | } |
394 | 395 | ||
395 | static struct super_block *cpuset_get_sb(struct file_system_type *fs_type, | 396 | static int cpuset_get_sb(struct file_system_type *fs_type, |
396 | int flags, const char *unused_dev_name, | 397 | int flags, const char *unused_dev_name, |
397 | void *data) | 398 | void *data, struct vfsmount *mnt) |
398 | { | 399 | { |
399 | return get_sb_single(fs_type, flags, data, cpuset_fill_super); | 400 | return get_sb_single(fs_type, flags, data, cpuset_fill_super, mnt); |
400 | } | 401 | } |
401 | 402 | ||
402 | static struct file_system_type cpuset_fs_type = { | 403 | static struct file_system_type cpuset_fs_type = { |
@@ -1177,6 +1178,7 @@ static int attach_task(struct cpuset *cs, char *pidbuf, char **ppathbuf) | |||
1177 | cpumask_t cpus; | 1178 | cpumask_t cpus; |
1178 | nodemask_t from, to; | 1179 | nodemask_t from, to; |
1179 | struct mm_struct *mm; | 1180 | struct mm_struct *mm; |
1181 | int retval; | ||
1180 | 1182 | ||
1181 | if (sscanf(pidbuf, "%d", &pid) != 1) | 1183 | if (sscanf(pidbuf, "%d", &pid) != 1) |
1182 | return -EIO; | 1184 | return -EIO; |
@@ -1205,6 +1207,12 @@ static int attach_task(struct cpuset *cs, char *pidbuf, char **ppathbuf) | |||
1205 | get_task_struct(tsk); | 1207 | get_task_struct(tsk); |
1206 | } | 1208 | } |
1207 | 1209 | ||
1210 | retval = security_task_setscheduler(tsk, 0, NULL); | ||
1211 | if (retval) { | ||
1212 | put_task_struct(tsk); | ||
1213 | return retval; | ||
1214 | } | ||
1215 | |||
1208 | mutex_lock(&callback_mutex); | 1216 | mutex_lock(&callback_mutex); |
1209 | 1217 | ||
1210 | task_lock(tsk); | 1218 | task_lock(tsk); |
diff --git a/kernel/exit.c b/kernel/exit.c index e06d0c10a2..a3baf92462 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -579,7 +579,7 @@ static void exit_mm(struct task_struct * tsk) | |||
579 | down_read(&mm->mmap_sem); | 579 | down_read(&mm->mmap_sem); |
580 | } | 580 | } |
581 | atomic_inc(&mm->mm_count); | 581 | atomic_inc(&mm->mm_count); |
582 | if (mm != tsk->active_mm) BUG(); | 582 | BUG_ON(mm != tsk->active_mm); |
583 | /* more a memory barrier than a real lock */ | 583 | /* more a memory barrier than a real lock */ |
584 | task_lock(tsk); | 584 | task_lock(tsk); |
585 | tsk->mm = NULL; | 585 | tsk->mm = NULL; |
@@ -1530,8 +1530,7 @@ check_continued: | |||
1530 | if (options & __WNOTHREAD) | 1530 | if (options & __WNOTHREAD) |
1531 | break; | 1531 | break; |
1532 | tsk = next_thread(tsk); | 1532 | tsk = next_thread(tsk); |
1533 | if (tsk->signal != current->signal) | 1533 | BUG_ON(tsk->signal != current->signal); |
1534 | BUG(); | ||
1535 | } while (tsk != current); | 1534 | } while (tsk != current); |
1536 | 1535 | ||
1537 | read_unlock(&tasklist_lock); | 1536 | read_unlock(&tasklist_lock); |
diff --git a/kernel/fork.c b/kernel/fork.c index ac8100e308..49adc0e8d4 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -368,6 +368,8 @@ void fastcall __mmdrop(struct mm_struct *mm) | |||
368 | */ | 368 | */ |
369 | void mmput(struct mm_struct *mm) | 369 | void mmput(struct mm_struct *mm) |
370 | { | 370 | { |
371 | might_sleep(); | ||
372 | |||
371 | if (atomic_dec_and_test(&mm->mm_users)) { | 373 | if (atomic_dec_and_test(&mm->mm_users)) { |
372 | exit_aio(mm); | 374 | exit_aio(mm); |
373 | exit_mmap(mm); | 375 | exit_mmap(mm); |
@@ -623,6 +625,7 @@ out: | |||
623 | /* | 625 | /* |
624 | * Allocate a new files structure and copy contents from the | 626 | * Allocate a new files structure and copy contents from the |
625 | * passed in files structure. | 627 | * passed in files structure. |
628 | * errorp will be valid only when the returned files_struct is NULL. | ||
626 | */ | 629 | */ |
627 | static struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) | 630 | static struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) |
628 | { | 631 | { |
@@ -631,6 +634,7 @@ static struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) | |||
631 | int open_files, size, i, expand; | 634 | int open_files, size, i, expand; |
632 | struct fdtable *old_fdt, *new_fdt; | 635 | struct fdtable *old_fdt, *new_fdt; |
633 | 636 | ||
637 | *errorp = -ENOMEM; | ||
634 | newf = alloc_files(); | 638 | newf = alloc_files(); |
635 | if (!newf) | 639 | if (!newf) |
636 | goto out; | 640 | goto out; |
@@ -744,7 +748,6 @@ static int copy_files(unsigned long clone_flags, struct task_struct * tsk) | |||
744 | * break this. | 748 | * break this. |
745 | */ | 749 | */ |
746 | tsk->files = NULL; | 750 | tsk->files = NULL; |
747 | error = -ENOMEM; | ||
748 | newf = dup_fd(oldf, &error); | 751 | newf = dup_fd(oldf, &error); |
749 | if (!newf) | 752 | if (!newf) |
750 | goto out; | 753 | goto out; |
diff --git a/kernel/futex.c b/kernel/futex.c index 5699c51205..e1a380c77a 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
@@ -1056,11 +1056,11 @@ asmlinkage long sys_futex(u32 __user *uaddr, int op, int val, | |||
1056 | (unsigned long)uaddr2, val2, val3); | 1056 | (unsigned long)uaddr2, val2, val3); |
1057 | } | 1057 | } |
1058 | 1058 | ||
1059 | static struct super_block * | 1059 | static int futexfs_get_sb(struct file_system_type *fs_type, |
1060 | futexfs_get_sb(struct file_system_type *fs_type, | 1060 | int flags, const char *dev_name, void *data, |
1061 | int flags, const char *dev_name, void *data) | 1061 | struct vfsmount *mnt) |
1062 | { | 1062 | { |
1063 | return get_sb_pseudo(fs_type, "futex", NULL, 0xBAD1DEA); | 1063 | return get_sb_pseudo(fs_type, "futex", NULL, 0xBAD1DEA, mnt); |
1064 | } | 1064 | } |
1065 | 1065 | ||
1066 | static struct file_system_type futex_fs_type = { | 1066 | static struct file_system_type futex_fs_type = { |
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 01fa2ae98a..1832430572 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c | |||
@@ -393,7 +393,7 @@ static void __remove_hrtimer(struct hrtimer *timer, struct hrtimer_base *base) | |||
393 | if (base->first == &timer->node) | 393 | if (base->first == &timer->node) |
394 | base->first = rb_next(&timer->node); | 394 | base->first = rb_next(&timer->node); |
395 | rb_erase(&timer->node, &base->active); | 395 | rb_erase(&timer->node, &base->active); |
396 | timer->node.rb_parent = HRTIMER_INACTIVE; | 396 | rb_set_parent(&timer->node, &timer->node); |
397 | } | 397 | } |
398 | 398 | ||
399 | /* | 399 | /* |
@@ -582,7 +582,7 @@ void hrtimer_init(struct hrtimer *timer, clockid_t clock_id, | |||
582 | clock_id = CLOCK_MONOTONIC; | 582 | clock_id = CLOCK_MONOTONIC; |
583 | 583 | ||
584 | timer->base = &bases[clock_id]; | 584 | timer->base = &bases[clock_id]; |
585 | timer->node.rb_parent = HRTIMER_INACTIVE; | 585 | rb_set_parent(&timer->node, &timer->node); |
586 | } | 586 | } |
587 | EXPORT_SYMBOL_GPL(hrtimer_init); | 587 | EXPORT_SYMBOL_GPL(hrtimer_init); |
588 | 588 | ||
diff --git a/kernel/intermodule.c b/kernel/intermodule.c deleted file mode 100644 index 55b1e5b85d..0000000000 --- a/kernel/intermodule.c +++ /dev/null | |||
@@ -1,184 +0,0 @@ | |||
1 | /* Deprecated, do not use. Moved from module.c to here. --RR */ | ||
2 | |||
3 | /* Written by Keith Owens <kaos@ocs.com.au> Oct 2000 */ | ||
4 | #include <linux/module.h> | ||
5 | #include <linux/kmod.h> | ||
6 | #include <linux/spinlock.h> | ||
7 | #include <linux/list.h> | ||
8 | #include <linux/slab.h> | ||
9 | |||
10 | /* inter_module functions are always available, even when the kernel is | ||
11 | * compiled without modules. Consumers of inter_module_xxx routines | ||
12 | * will always work, even when both are built into the kernel, this | ||
13 | * approach removes lots of #ifdefs in mainline code. | ||
14 | */ | ||
15 | |||
16 | static struct list_head ime_list = LIST_HEAD_INIT(ime_list); | ||
17 | static DEFINE_SPINLOCK(ime_lock); | ||
18 | static int kmalloc_failed; | ||
19 | |||
20 | struct inter_module_entry { | ||
21 | struct list_head list; | ||
22 | const char *im_name; | ||
23 | struct module *owner; | ||
24 | const void *userdata; | ||
25 | }; | ||
26 | |||
27 | /** | ||
28 | * inter_module_register - register a new set of inter module data. | ||
29 | * @im_name: an arbitrary string to identify the data, must be unique | ||
30 | * @owner: module that is registering the data, always use THIS_MODULE | ||
31 | * @userdata: pointer to arbitrary userdata to be registered | ||
32 | * | ||
33 | * Description: Check that the im_name has not already been registered, | ||
34 | * complain if it has. For new data, add it to the inter_module_entry | ||
35 | * list. | ||
36 | */ | ||
37 | void inter_module_register(const char *im_name, struct module *owner, const void *userdata) | ||
38 | { | ||
39 | struct list_head *tmp; | ||
40 | struct inter_module_entry *ime, *ime_new; | ||
41 | |||
42 | if (!(ime_new = kzalloc(sizeof(*ime), GFP_KERNEL))) { | ||
43 | /* Overloaded kernel, not fatal */ | ||
44 | printk(KERN_ERR | ||
45 | "Aiee, inter_module_register: cannot kmalloc entry for '%s'\n", | ||
46 | im_name); | ||
47 | kmalloc_failed = 1; | ||
48 | return; | ||
49 | } | ||
50 | ime_new->im_name = im_name; | ||
51 | ime_new->owner = owner; | ||
52 | ime_new->userdata = userdata; | ||
53 | |||
54 | spin_lock(&ime_lock); | ||
55 | list_for_each(tmp, &ime_list) { | ||
56 | ime = list_entry(tmp, struct inter_module_entry, list); | ||
57 | if (strcmp(ime->im_name, im_name) == 0) { | ||
58 | spin_unlock(&ime_lock); | ||
59 | kfree(ime_new); | ||
60 | /* Program logic error, fatal */ | ||
61 | printk(KERN_ERR "inter_module_register: duplicate im_name '%s'", im_name); | ||
62 | BUG(); | ||
63 | } | ||
64 | } | ||
65 | list_add(&(ime_new->list), &ime_list); | ||
66 | spin_unlock(&ime_lock); | ||
67 | } | ||
68 | |||
69 | /** | ||
70 | * inter_module_unregister - unregister a set of inter module data. | ||
71 | * @im_name: an arbitrary string to identify the data, must be unique | ||
72 | * | ||
73 | * Description: Check that the im_name has been registered, complain if | ||
74 | * it has not. For existing data, remove it from the | ||
75 | * inter_module_entry list. | ||
76 | */ | ||
77 | void inter_module_unregister(const char *im_name) | ||
78 | { | ||
79 | struct list_head *tmp; | ||
80 | struct inter_module_entry *ime; | ||
81 | |||
82 | spin_lock(&ime_lock); | ||
83 | list_for_each(tmp, &ime_list) { | ||
84 | ime = list_entry(tmp, struct inter_module_entry, list); | ||
85 | if (strcmp(ime->im_name, im_name) == 0) { | ||
86 | list_del(&(ime->list)); | ||
87 | spin_unlock(&ime_lock); | ||
88 | kfree(ime); | ||
89 | return; | ||
90 | } | ||
91 | } | ||
92 | spin_unlock(&ime_lock); | ||
93 | if (kmalloc_failed) { | ||
94 | printk(KERN_ERR | ||
95 | "inter_module_unregister: no entry for '%s', " | ||
96 | "probably caused by previous kmalloc failure\n", | ||
97 | im_name); | ||
98 | return; | ||
99 | } | ||
100 | else { | ||
101 | /* Program logic error, fatal */ | ||
102 | printk(KERN_ERR "inter_module_unregister: no entry for '%s'", im_name); | ||
103 | BUG(); | ||
104 | } | ||
105 | } | ||
106 | |||
107 | /** | ||
108 | * inter_module_get - return arbitrary userdata from another module. | ||
109 | * @im_name: an arbitrary string to identify the data, must be unique | ||
110 | * | ||
111 | * Description: If the im_name has not been registered, return NULL. | ||
112 | * Try to increment the use count on the owning module, if that fails | ||
113 | * then return NULL. Otherwise return the userdata. | ||
114 | */ | ||
115 | static const void *inter_module_get(const char *im_name) | ||
116 | { | ||
117 | struct list_head *tmp; | ||
118 | struct inter_module_entry *ime; | ||
119 | const void *result = NULL; | ||
120 | |||
121 | spin_lock(&ime_lock); | ||
122 | list_for_each(tmp, &ime_list) { | ||
123 | ime = list_entry(tmp, struct inter_module_entry, list); | ||
124 | if (strcmp(ime->im_name, im_name) == 0) { | ||
125 | if (try_module_get(ime->owner)) | ||
126 | result = ime->userdata; | ||
127 | break; | ||
128 | } | ||
129 | } | ||
130 | spin_unlock(&ime_lock); | ||
131 | return(result); | ||
132 | } | ||
133 | |||
134 | /** | ||
135 | * inter_module_get_request - im get with automatic request_module. | ||
136 | * @im_name: an arbitrary string to identify the data, must be unique | ||
137 | * @modname: module that is expected to register im_name | ||
138 | * | ||
139 | * Description: If inter_module_get fails, do request_module then retry. | ||
140 | */ | ||
141 | const void *inter_module_get_request(const char *im_name, const char *modname) | ||
142 | { | ||
143 | const void *result = inter_module_get(im_name); | ||
144 | if (!result) { | ||
145 | request_module("%s", modname); | ||
146 | result = inter_module_get(im_name); | ||
147 | } | ||
148 | return(result); | ||
149 | } | ||
150 | |||
151 | /** | ||
152 | * inter_module_put - release use of data from another module. | ||
153 | * @im_name: an arbitrary string to identify the data, must be unique | ||
154 | * | ||
155 | * Description: If the im_name has not been registered, complain, | ||
156 | * otherwise decrement the use count on the owning module. | ||
157 | */ | ||
158 | void inter_module_put(const char *im_name) | ||
159 | { | ||
160 | struct list_head *tmp; | ||
161 | struct inter_module_entry *ime; | ||
162 | |||
163 | spin_lock(&ime_lock); | ||
164 | list_for_each(tmp, &ime_list) { | ||
165 | ime = list_entry(tmp, struct inter_module_entry, list); | ||
166 | if (strcmp(ime->im_name, im_name) == 0) { | ||
167 | if (ime->owner) | ||
168 | module_put(ime->owner); | ||
169 | spin_unlock(&ime_lock); | ||
170 | return; | ||
171 | } | ||
172 | } | ||
173 | spin_unlock(&ime_lock); | ||
174 | printk(KERN_ERR "inter_module_put: no entry for '%s'", im_name); | ||
175 | BUG(); | ||
176 | } | ||
177 | |||
178 | EXPORT_SYMBOL(inter_module_register); | ||
179 | EXPORT_SYMBOL(inter_module_unregister); | ||
180 | EXPORT_SYMBOL(inter_module_get_request); | ||
181 | EXPORT_SYMBOL(inter_module_put); | ||
182 | |||
183 | MODULE_LICENSE("GPL"); | ||
184 | |||
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index 51df337b37..0f65301171 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c | |||
@@ -76,10 +76,11 @@ irqreturn_t no_action(int cpl, void *dev_id, struct pt_regs *regs) | |||
76 | /* | 76 | /* |
77 | * Have got an event to handle: | 77 | * Have got an event to handle: |
78 | */ | 78 | */ |
79 | fastcall int handle_IRQ_event(unsigned int irq, struct pt_regs *regs, | 79 | fastcall irqreturn_t handle_IRQ_event(unsigned int irq, struct pt_regs *regs, |
80 | struct irqaction *action) | 80 | struct irqaction *action) |
81 | { | 81 | { |
82 | int ret, retval = 0, status = 0; | 82 | irqreturn_t ret, retval = IRQ_NONE; |
83 | unsigned int status = 0; | ||
83 | 84 | ||
84 | if (!(action->flags & SA_INTERRUPT)) | 85 | if (!(action->flags & SA_INTERRUPT)) |
85 | local_irq_enable(); | 86 | local_irq_enable(); |
diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c index 134f9f2e0e..a12d00eb5e 100644 --- a/kernel/irq/migration.c +++ b/kernel/irq/migration.c | |||
@@ -30,7 +30,7 @@ void move_native_irq(int irq) | |||
30 | 30 | ||
31 | desc->move_irq = 0; | 31 | desc->move_irq = 0; |
32 | 32 | ||
33 | if (likely(cpus_empty(pending_irq_cpumask[irq]))) | 33 | if (unlikely(cpus_empty(pending_irq_cpumask[irq]))) |
34 | return; | 34 | return; |
35 | 35 | ||
36 | if (!desc->handler->set_affinity) | 36 | if (!desc->handler->set_affinity) |
@@ -49,7 +49,7 @@ void move_native_irq(int irq) | |||
49 | * cause some ioapics to mal-function. | 49 | * cause some ioapics to mal-function. |
50 | * Being paranoid i guess! | 50 | * Being paranoid i guess! |
51 | */ | 51 | */ |
52 | if (unlikely(!cpus_empty(tmp))) { | 52 | if (likely(!cpus_empty(tmp))) { |
53 | if (likely(!(desc->status & IRQ_DISABLED))) | 53 | if (likely(!(desc->status & IRQ_DISABLED))) |
54 | desc->handler->disable(irq); | 54 | desc->handler->disable(irq); |
55 | 55 | ||
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index d03b5eef8c..afacd6f585 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c | |||
@@ -24,6 +24,8 @@ static struct proc_dir_entry *smp_affinity_entry[NR_IRQS]; | |||
24 | #ifdef CONFIG_GENERIC_PENDING_IRQ | 24 | #ifdef CONFIG_GENERIC_PENDING_IRQ |
25 | void proc_set_irq_affinity(unsigned int irq, cpumask_t mask_val) | 25 | void proc_set_irq_affinity(unsigned int irq, cpumask_t mask_val) |
26 | { | 26 | { |
27 | set_balance_irq_affinity(irq, mask_val); | ||
28 | |||
27 | /* | 29 | /* |
28 | * Save these away for later use. Re-progam when the | 30 | * Save these away for later use. Re-progam when the |
29 | * interrupt is pending | 31 | * interrupt is pending |
@@ -33,6 +35,7 @@ void proc_set_irq_affinity(unsigned int irq, cpumask_t mask_val) | |||
33 | #else | 35 | #else |
34 | void proc_set_irq_affinity(unsigned int irq, cpumask_t mask_val) | 36 | void proc_set_irq_affinity(unsigned int irq, cpumask_t mask_val) |
35 | { | 37 | { |
38 | set_balance_irq_affinity(irq, mask_val); | ||
36 | irq_affinity[irq] = mask_val; | 39 | irq_affinity[irq] = mask_val; |
37 | irq_desc[irq].handler->set_affinity(irq, mask_val); | 40 | irq_desc[irq].handler->set_affinity(irq, mask_val); |
38 | } | 41 | } |
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c index 7df9abd5ec..b2fb3c18d0 100644 --- a/kernel/irq/spurious.c +++ b/kernel/irq/spurious.c | |||
@@ -11,7 +11,7 @@ | |||
11 | #include <linux/kallsyms.h> | 11 | #include <linux/kallsyms.h> |
12 | #include <linux/interrupt.h> | 12 | #include <linux/interrupt.h> |
13 | 13 | ||
14 | static int irqfixup; | 14 | static int irqfixup __read_mostly; |
15 | 15 | ||
16 | /* | 16 | /* |
17 | * Recovery handler for misrouted interrupts. | 17 | * Recovery handler for misrouted interrupts. |
@@ -136,9 +136,9 @@ static void report_bad_irq(unsigned int irq, irq_desc_t *desc, irqreturn_t actio | |||
136 | void note_interrupt(unsigned int irq, irq_desc_t *desc, irqreturn_t action_ret, | 136 | void note_interrupt(unsigned int irq, irq_desc_t *desc, irqreturn_t action_ret, |
137 | struct pt_regs *regs) | 137 | struct pt_regs *regs) |
138 | { | 138 | { |
139 | if (action_ret != IRQ_HANDLED) { | 139 | if (unlikely(action_ret != IRQ_HANDLED)) { |
140 | desc->irqs_unhandled++; | 140 | desc->irqs_unhandled++; |
141 | if (action_ret != IRQ_NONE) | 141 | if (unlikely(action_ret != IRQ_NONE)) |
142 | report_bad_irq(irq, desc, action_ret); | 142 | report_bad_irq(irq, desc, action_ret); |
143 | } | 143 | } |
144 | 144 | ||
@@ -152,11 +152,11 @@ void note_interrupt(unsigned int irq, irq_desc_t *desc, irqreturn_t action_ret, | |||
152 | } | 152 | } |
153 | 153 | ||
154 | desc->irq_count++; | 154 | desc->irq_count++; |
155 | if (desc->irq_count < 100000) | 155 | if (likely(desc->irq_count < 100000)) |
156 | return; | 156 | return; |
157 | 157 | ||
158 | desc->irq_count = 0; | 158 | desc->irq_count = 0; |
159 | if (desc->irqs_unhandled > 99900) { | 159 | if (unlikely(desc->irqs_unhandled > 99900)) { |
160 | /* | 160 | /* |
161 | * The interrupt is stuck | 161 | * The interrupt is stuck |
162 | */ | 162 | */ |
@@ -171,7 +171,7 @@ void note_interrupt(unsigned int irq, irq_desc_t *desc, irqreturn_t action_ret, | |||
171 | desc->irqs_unhandled = 0; | 171 | desc->irqs_unhandled = 0; |
172 | } | 172 | } |
173 | 173 | ||
174 | int noirqdebug; | 174 | int noirqdebug __read_mostly; |
175 | 175 | ||
176 | int __init noirqdebug_setup(char *str) | 176 | int __init noirqdebug_setup(char *str) |
177 | { | 177 | { |
diff --git a/kernel/kexec.c b/kernel/kexec.c index bf39d28e4c..58f0f38259 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c | |||
@@ -902,14 +902,14 @@ static int kimage_load_segment(struct kimage *image, | |||
902 | * kexec does not sync, or unmount filesystems so if you need | 902 | * kexec does not sync, or unmount filesystems so if you need |
903 | * that to happen you need to do that yourself. | 903 | * that to happen you need to do that yourself. |
904 | */ | 904 | */ |
905 | struct kimage *kexec_image = NULL; | 905 | struct kimage *kexec_image; |
906 | static struct kimage *kexec_crash_image = NULL; | 906 | struct kimage *kexec_crash_image; |
907 | /* | 907 | /* |
908 | * A home grown binary mutex. | 908 | * A home grown binary mutex. |
909 | * Nothing can wait so this mutex is safe to use | 909 | * Nothing can wait so this mutex is safe to use |
910 | * in interrupt context :) | 910 | * in interrupt context :) |
911 | */ | 911 | */ |
912 | static int kexec_lock = 0; | 912 | static int kexec_lock; |
913 | 913 | ||
914 | asmlinkage long sys_kexec_load(unsigned long entry, unsigned long nr_segments, | 914 | asmlinkage long sys_kexec_load(unsigned long entry, unsigned long nr_segments, |
915 | struct kexec_segment __user *segments, | 915 | struct kexec_segment __user *segments, |
diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c index f119e098e6..9e28478a17 100644 --- a/kernel/ksysfs.c +++ b/kernel/ksysfs.c | |||
@@ -14,6 +14,7 @@ | |||
14 | #include <linux/sysfs.h> | 14 | #include <linux/sysfs.h> |
15 | #include <linux/module.h> | 15 | #include <linux/module.h> |
16 | #include <linux/init.h> | 16 | #include <linux/init.h> |
17 | #include <linux/kexec.h> | ||
17 | 18 | ||
18 | #define KERNEL_ATTR_RO(_name) \ | 19 | #define KERNEL_ATTR_RO(_name) \ |
19 | static struct subsys_attribute _name##_attr = __ATTR_RO(_name) | 20 | static struct subsys_attribute _name##_attr = __ATTR_RO(_name) |
@@ -48,6 +49,20 @@ static ssize_t uevent_helper_store(struct subsystem *subsys, const char *page, s | |||
48 | KERNEL_ATTR_RW(uevent_helper); | 49 | KERNEL_ATTR_RW(uevent_helper); |
49 | #endif | 50 | #endif |
50 | 51 | ||
52 | #ifdef CONFIG_KEXEC | ||
53 | static ssize_t kexec_loaded_show(struct subsystem *subsys, char *page) | ||
54 | { | ||
55 | return sprintf(page, "%d\n", !!kexec_image); | ||
56 | } | ||
57 | KERNEL_ATTR_RO(kexec_loaded); | ||
58 | |||
59 | static ssize_t kexec_crash_loaded_show(struct subsystem *subsys, char *page) | ||
60 | { | ||
61 | return sprintf(page, "%d\n", !!kexec_crash_image); | ||
62 | } | ||
63 | KERNEL_ATTR_RO(kexec_crash_loaded); | ||
64 | #endif /* CONFIG_KEXEC */ | ||
65 | |||
51 | decl_subsys(kernel, NULL, NULL); | 66 | decl_subsys(kernel, NULL, NULL); |
52 | EXPORT_SYMBOL_GPL(kernel_subsys); | 67 | EXPORT_SYMBOL_GPL(kernel_subsys); |
53 | 68 | ||
@@ -56,6 +71,10 @@ static struct attribute * kernel_attrs[] = { | |||
56 | &uevent_seqnum_attr.attr, | 71 | &uevent_seqnum_attr.attr, |
57 | &uevent_helper_attr.attr, | 72 | &uevent_helper_attr.attr, |
58 | #endif | 73 | #endif |
74 | #ifdef CONFIG_KEXEC | ||
75 | &kexec_loaded_attr.attr, | ||
76 | &kexec_crash_loaded_attr.attr, | ||
77 | #endif | ||
59 | NULL | 78 | NULL |
60 | }; | 79 | }; |
61 | 80 | ||
diff --git a/kernel/power/main.c b/kernel/power/main.c index a6d9ef4600..cdf0f07af9 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c | |||
@@ -15,7 +15,7 @@ | |||
15 | #include <linux/errno.h> | 15 | #include <linux/errno.h> |
16 | #include <linux/init.h> | 16 | #include <linux/init.h> |
17 | #include <linux/pm.h> | 17 | #include <linux/pm.h> |
18 | 18 | #include <linux/console.h> | |
19 | 19 | ||
20 | #include "power.h" | 20 | #include "power.h" |
21 | 21 | ||
@@ -86,6 +86,7 @@ static int suspend_prepare(suspend_state_t state) | |||
86 | goto Thaw; | 86 | goto Thaw; |
87 | } | 87 | } |
88 | 88 | ||
89 | suspend_console(); | ||
89 | if ((error = device_suspend(PMSG_SUSPEND))) { | 90 | if ((error = device_suspend(PMSG_SUSPEND))) { |
90 | printk(KERN_ERR "Some devices failed to suspend\n"); | 91 | printk(KERN_ERR "Some devices failed to suspend\n"); |
91 | goto Finish; | 92 | goto Finish; |
@@ -133,6 +134,7 @@ int suspend_enter(suspend_state_t state) | |||
133 | static void suspend_finish(suspend_state_t state) | 134 | static void suspend_finish(suspend_state_t state) |
134 | { | 135 | { |
135 | device_resume(); | 136 | device_resume(); |
137 | resume_console(); | ||
136 | thaw_processes(); | 138 | thaw_processes(); |
137 | enable_nonboot_cpus(); | 139 | enable_nonboot_cpus(); |
138 | if (pm_ops && pm_ops->finish) | 140 | if (pm_ops && pm_ops->finish) |
diff --git a/kernel/power/power.h b/kernel/power/power.h index f06f12f217..98c41423f3 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h | |||
@@ -55,7 +55,7 @@ struct snapshot_handle { | |||
55 | unsigned int page; | 55 | unsigned int page; |
56 | unsigned int page_offset; | 56 | unsigned int page_offset; |
57 | unsigned int prev; | 57 | unsigned int prev; |
58 | struct pbe *pbe; | 58 | struct pbe *pbe, *last_pbe; |
59 | void *buffer; | 59 | void *buffer; |
60 | unsigned int buf_offset; | 60 | unsigned int buf_offset; |
61 | }; | 61 | }; |
@@ -105,6 +105,10 @@ extern struct bitmap_page *alloc_bitmap(unsigned int nr_bits); | |||
105 | extern unsigned long alloc_swap_page(int swap, struct bitmap_page *bitmap); | 105 | extern unsigned long alloc_swap_page(int swap, struct bitmap_page *bitmap); |
106 | extern void free_all_swap_pages(int swap, struct bitmap_page *bitmap); | 106 | extern void free_all_swap_pages(int swap, struct bitmap_page *bitmap); |
107 | 107 | ||
108 | extern unsigned int count_special_pages(void); | ||
109 | extern int save_special_mem(void); | ||
110 | extern int restore_special_mem(void); | ||
111 | |||
108 | extern int swsusp_check(void); | 112 | extern int swsusp_check(void); |
109 | extern int swsusp_shrink_memory(void); | 113 | extern int swsusp_shrink_memory(void); |
110 | extern void swsusp_free(void); | 114 | extern void swsusp_free(void); |
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 3eeedbb13b..3d9284100b 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c | |||
@@ -39,8 +39,90 @@ static unsigned int nr_copy_pages; | |||
39 | static unsigned int nr_meta_pages; | 39 | static unsigned int nr_meta_pages; |
40 | static unsigned long *buffer; | 40 | static unsigned long *buffer; |
41 | 41 | ||
42 | struct arch_saveable_page { | ||
43 | unsigned long start; | ||
44 | unsigned long end; | ||
45 | char *data; | ||
46 | struct arch_saveable_page *next; | ||
47 | }; | ||
48 | static struct arch_saveable_page *arch_pages; | ||
49 | |||
50 | int swsusp_add_arch_pages(unsigned long start, unsigned long end) | ||
51 | { | ||
52 | struct arch_saveable_page *tmp; | ||
53 | |||
54 | while (start < end) { | ||
55 | tmp = kzalloc(sizeof(struct arch_saveable_page), GFP_KERNEL); | ||
56 | if (!tmp) | ||
57 | return -ENOMEM; | ||
58 | tmp->start = start; | ||
59 | tmp->end = ((start >> PAGE_SHIFT) + 1) << PAGE_SHIFT; | ||
60 | if (tmp->end > end) | ||
61 | tmp->end = end; | ||
62 | tmp->next = arch_pages; | ||
63 | start = tmp->end; | ||
64 | arch_pages = tmp; | ||
65 | } | ||
66 | return 0; | ||
67 | } | ||
68 | |||
69 | static unsigned int count_arch_pages(void) | ||
70 | { | ||
71 | unsigned int count = 0; | ||
72 | struct arch_saveable_page *tmp = arch_pages; | ||
73 | while (tmp) { | ||
74 | count++; | ||
75 | tmp = tmp->next; | ||
76 | } | ||
77 | return count; | ||
78 | } | ||
79 | |||
80 | static int save_arch_mem(void) | ||
81 | { | ||
82 | char *kaddr; | ||
83 | struct arch_saveable_page *tmp = arch_pages; | ||
84 | int offset; | ||
85 | |||
86 | pr_debug("swsusp: Saving arch specific memory"); | ||
87 | while (tmp) { | ||
88 | tmp->data = (char *)__get_free_page(GFP_ATOMIC); | ||
89 | if (!tmp->data) | ||
90 | return -ENOMEM; | ||
91 | offset = tmp->start - (tmp->start & PAGE_MASK); | ||
92 | /* arch pages might haven't a 'struct page' */ | ||
93 | kaddr = kmap_atomic_pfn(tmp->start >> PAGE_SHIFT, KM_USER0); | ||
94 | memcpy(tmp->data + offset, kaddr + offset, | ||
95 | tmp->end - tmp->start); | ||
96 | kunmap_atomic(kaddr, KM_USER0); | ||
97 | |||
98 | tmp = tmp->next; | ||
99 | } | ||
100 | return 0; | ||
101 | } | ||
102 | |||
103 | static int restore_arch_mem(void) | ||
104 | { | ||
105 | char *kaddr; | ||
106 | struct arch_saveable_page *tmp = arch_pages; | ||
107 | int offset; | ||
108 | |||
109 | while (tmp) { | ||
110 | if (!tmp->data) | ||
111 | continue; | ||
112 | offset = tmp->start - (tmp->start & PAGE_MASK); | ||
113 | kaddr = kmap_atomic_pfn(tmp->start >> PAGE_SHIFT, KM_USER0); | ||
114 | memcpy(kaddr + offset, tmp->data + offset, | ||
115 | tmp->end - tmp->start); | ||
116 | kunmap_atomic(kaddr, KM_USER0); | ||
117 | free_page((long)tmp->data); | ||
118 | tmp->data = NULL; | ||
119 | tmp = tmp->next; | ||
120 | } | ||
121 | return 0; | ||
122 | } | ||
123 | |||
42 | #ifdef CONFIG_HIGHMEM | 124 | #ifdef CONFIG_HIGHMEM |
43 | unsigned int count_highmem_pages(void) | 125 | static unsigned int count_highmem_pages(void) |
44 | { | 126 | { |
45 | struct zone *zone; | 127 | struct zone *zone; |
46 | unsigned long zone_pfn; | 128 | unsigned long zone_pfn; |
@@ -117,7 +199,7 @@ static int save_highmem_zone(struct zone *zone) | |||
117 | return 0; | 199 | return 0; |
118 | } | 200 | } |
119 | 201 | ||
120 | int save_highmem(void) | 202 | static int save_highmem(void) |
121 | { | 203 | { |
122 | struct zone *zone; | 204 | struct zone *zone; |
123 | int res = 0; | 205 | int res = 0; |
@@ -134,7 +216,7 @@ int save_highmem(void) | |||
134 | return 0; | 216 | return 0; |
135 | } | 217 | } |
136 | 218 | ||
137 | int restore_highmem(void) | 219 | static int restore_highmem(void) |
138 | { | 220 | { |
139 | printk("swsusp: Restoring Highmem\n"); | 221 | printk("swsusp: Restoring Highmem\n"); |
140 | while (highmem_copy) { | 222 | while (highmem_copy) { |
@@ -150,8 +232,35 @@ int restore_highmem(void) | |||
150 | } | 232 | } |
151 | return 0; | 233 | return 0; |
152 | } | 234 | } |
235 | #else | ||
236 | static inline unsigned int count_highmem_pages(void) {return 0;} | ||
237 | static inline int save_highmem(void) {return 0;} | ||
238 | static inline int restore_highmem(void) {return 0;} | ||
153 | #endif | 239 | #endif |
154 | 240 | ||
241 | unsigned int count_special_pages(void) | ||
242 | { | ||
243 | return count_arch_pages() + count_highmem_pages(); | ||
244 | } | ||
245 | |||
246 | int save_special_mem(void) | ||
247 | { | ||
248 | int ret; | ||
249 | ret = save_arch_mem(); | ||
250 | if (!ret) | ||
251 | ret = save_highmem(); | ||
252 | return ret; | ||
253 | } | ||
254 | |||
255 | int restore_special_mem(void) | ||
256 | { | ||
257 | int ret; | ||
258 | ret = restore_arch_mem(); | ||
259 | if (!ret) | ||
260 | ret = restore_highmem(); | ||
261 | return ret; | ||
262 | } | ||
263 | |||
155 | static int pfn_is_nosave(unsigned long pfn) | 264 | static int pfn_is_nosave(unsigned long pfn) |
156 | { | 265 | { |
157 | unsigned long nosave_begin_pfn = __pa(&__nosave_begin) >> PAGE_SHIFT; | 266 | unsigned long nosave_begin_pfn = __pa(&__nosave_begin) >> PAGE_SHIFT; |
@@ -177,7 +286,6 @@ static int saveable(struct zone *zone, unsigned long *zone_pfn) | |||
177 | return 0; | 286 | return 0; |
178 | 287 | ||
179 | page = pfn_to_page(pfn); | 288 | page = pfn_to_page(pfn); |
180 | BUG_ON(PageReserved(page) && PageNosave(page)); | ||
181 | if (PageNosave(page)) | 289 | if (PageNosave(page)) |
182 | return 0; | 290 | return 0; |
183 | if (PageReserved(page) && pfn_is_nosave(pfn)) | 291 | if (PageReserved(page) && pfn_is_nosave(pfn)) |
@@ -293,62 +401,29 @@ static inline void create_pbe_list(struct pbe *pblist, unsigned int nr_pages) | |||
293 | } | 401 | } |
294 | } | 402 | } |
295 | 403 | ||
296 | /** | 404 | static unsigned int unsafe_pages; |
297 | * On resume it is necessary to trace and eventually free the unsafe | ||
298 | * pages that have been allocated, because they are needed for I/O | ||
299 | * (on x86-64 we likely will "eat" these pages once again while | ||
300 | * creating the temporary page translation tables) | ||
301 | */ | ||
302 | |||
303 | struct eaten_page { | ||
304 | struct eaten_page *next; | ||
305 | char padding[PAGE_SIZE - sizeof(void *)]; | ||
306 | }; | ||
307 | |||
308 | static struct eaten_page *eaten_pages = NULL; | ||
309 | |||
310 | static void release_eaten_pages(void) | ||
311 | { | ||
312 | struct eaten_page *p, *q; | ||
313 | |||
314 | p = eaten_pages; | ||
315 | while (p) { | ||
316 | q = p->next; | ||
317 | /* We don't want swsusp_free() to free this page again */ | ||
318 | ClearPageNosave(virt_to_page(p)); | ||
319 | free_page((unsigned long)p); | ||
320 | p = q; | ||
321 | } | ||
322 | eaten_pages = NULL; | ||
323 | } | ||
324 | 405 | ||
325 | /** | 406 | /** |
326 | * @safe_needed - on resume, for storing the PBE list and the image, | 407 | * @safe_needed - on resume, for storing the PBE list and the image, |
327 | * we can only use memory pages that do not conflict with the pages | 408 | * we can only use memory pages that do not conflict with the pages |
328 | * which had been used before suspend. | 409 | * used before suspend. |
329 | * | 410 | * |
330 | * The unsafe pages are marked with the PG_nosave_free flag | 411 | * The unsafe pages are marked with the PG_nosave_free flag |
331 | * | 412 | * and we count them using unsafe_pages |
332 | * Allocated but unusable (ie eaten) memory pages should be marked | ||
333 | * so that swsusp_free() can release them | ||
334 | */ | 413 | */ |
335 | 414 | ||
336 | static inline void *alloc_image_page(gfp_t gfp_mask, int safe_needed) | 415 | static inline void *alloc_image_page(gfp_t gfp_mask, int safe_needed) |
337 | { | 416 | { |
338 | void *res; | 417 | void *res; |
339 | 418 | ||
419 | res = (void *)get_zeroed_page(gfp_mask); | ||
340 | if (safe_needed) | 420 | if (safe_needed) |
341 | do { | 421 | while (res && PageNosaveFree(virt_to_page(res))) { |
422 | /* The page is unsafe, mark it for swsusp_free() */ | ||
423 | SetPageNosave(virt_to_page(res)); | ||
424 | unsafe_pages++; | ||
342 | res = (void *)get_zeroed_page(gfp_mask); | 425 | res = (void *)get_zeroed_page(gfp_mask); |
343 | if (res && PageNosaveFree(virt_to_page(res))) { | 426 | } |
344 | /* This is for swsusp_free() */ | ||
345 | SetPageNosave(virt_to_page(res)); | ||
346 | ((struct eaten_page *)res)->next = eaten_pages; | ||
347 | eaten_pages = res; | ||
348 | } | ||
349 | } while (res && PageNosaveFree(virt_to_page(res))); | ||
350 | else | ||
351 | res = (void *)get_zeroed_page(gfp_mask); | ||
352 | if (res) { | 427 | if (res) { |
353 | SetPageNosave(virt_to_page(res)); | 428 | SetPageNosave(virt_to_page(res)); |
354 | SetPageNosaveFree(virt_to_page(res)); | 429 | SetPageNosaveFree(virt_to_page(res)); |
@@ -374,7 +449,8 @@ unsigned long get_safe_page(gfp_t gfp_mask) | |||
374 | * On each page we set up a list of struct_pbe elements. | 449 | * On each page we set up a list of struct_pbe elements. |
375 | */ | 450 | */ |
376 | 451 | ||
377 | struct pbe *alloc_pagedir(unsigned int nr_pages, gfp_t gfp_mask, int safe_needed) | 452 | static struct pbe *alloc_pagedir(unsigned int nr_pages, gfp_t gfp_mask, |
453 | int safe_needed) | ||
378 | { | 454 | { |
379 | unsigned int num; | 455 | unsigned int num; |
380 | struct pbe *pblist, *pbe; | 456 | struct pbe *pblist, *pbe; |
@@ -642,6 +718,8 @@ static int mark_unsafe_pages(struct pbe *pblist) | |||
642 | return -EFAULT; | 718 | return -EFAULT; |
643 | } | 719 | } |
644 | 720 | ||
721 | unsafe_pages = 0; | ||
722 | |||
645 | return 0; | 723 | return 0; |
646 | } | 724 | } |
647 | 725 | ||
@@ -719,42 +797,99 @@ static inline struct pbe *unpack_orig_addresses(unsigned long *buf, | |||
719 | } | 797 | } |
720 | 798 | ||
721 | /** | 799 | /** |
722 | * create_image - use metadata contained in the PBE list | 800 | * prepare_image - use metadata contained in the PBE list |
723 | * pointed to by pagedir_nosave to mark the pages that will | 801 | * pointed to by pagedir_nosave to mark the pages that will |
724 | * be overwritten in the process of restoring the system | 802 | * be overwritten in the process of restoring the system |
725 | * memory state from the image and allocate memory for | 803 | * memory state from the image ("unsafe" pages) and allocate |
726 | * the image avoiding these pages | 804 | * memory for the image |
805 | * | ||
806 | * The idea is to allocate the PBE list first and then | ||
807 | * allocate as many pages as it's needed for the image data, | ||
808 | * but not to assign these pages to the PBEs initially. | ||
809 | * Instead, we just mark them as allocated and create a list | ||
810 | * of "safe" which will be used later | ||
727 | */ | 811 | */ |
728 | 812 | ||
729 | static int create_image(struct snapshot_handle *handle) | 813 | struct safe_page { |
814 | struct safe_page *next; | ||
815 | char padding[PAGE_SIZE - sizeof(void *)]; | ||
816 | }; | ||
817 | |||
818 | static struct safe_page *safe_pages; | ||
819 | |||
820 | static int prepare_image(struct snapshot_handle *handle) | ||
730 | { | 821 | { |
731 | int error = 0; | 822 | int error = 0; |
732 | struct pbe *p, *pblist; | 823 | unsigned int nr_pages = nr_copy_pages; |
824 | struct pbe *p, *pblist = NULL; | ||
733 | 825 | ||
734 | p = pagedir_nosave; | 826 | p = pagedir_nosave; |
735 | error = mark_unsafe_pages(p); | 827 | error = mark_unsafe_pages(p); |
736 | if (!error) { | 828 | if (!error) { |
737 | pblist = alloc_pagedir(nr_copy_pages, GFP_ATOMIC, 1); | 829 | pblist = alloc_pagedir(nr_pages, GFP_ATOMIC, 1); |
738 | if (pblist) | 830 | if (pblist) |
739 | copy_page_backup_list(pblist, p); | 831 | copy_page_backup_list(pblist, p); |
740 | free_pagedir(p, 0); | 832 | free_pagedir(p, 0); |
741 | if (!pblist) | 833 | if (!pblist) |
742 | error = -ENOMEM; | 834 | error = -ENOMEM; |
743 | } | 835 | } |
744 | if (!error) | 836 | safe_pages = NULL; |
745 | error = alloc_data_pages(pblist, GFP_ATOMIC, 1); | 837 | if (!error && nr_pages > unsafe_pages) { |
838 | nr_pages -= unsafe_pages; | ||
839 | while (nr_pages--) { | ||
840 | struct safe_page *ptr; | ||
841 | |||
842 | ptr = (struct safe_page *)get_zeroed_page(GFP_ATOMIC); | ||
843 | if (!ptr) { | ||
844 | error = -ENOMEM; | ||
845 | break; | ||
846 | } | ||
847 | if (!PageNosaveFree(virt_to_page(ptr))) { | ||
848 | /* The page is "safe", add it to the list */ | ||
849 | ptr->next = safe_pages; | ||
850 | safe_pages = ptr; | ||
851 | } | ||
852 | /* Mark the page as allocated */ | ||
853 | SetPageNosave(virt_to_page(ptr)); | ||
854 | SetPageNosaveFree(virt_to_page(ptr)); | ||
855 | } | ||
856 | } | ||
746 | if (!error) { | 857 | if (!error) { |
747 | release_eaten_pages(); | ||
748 | pagedir_nosave = pblist; | 858 | pagedir_nosave = pblist; |
749 | } else { | 859 | } else { |
750 | pagedir_nosave = NULL; | ||
751 | handle->pbe = NULL; | 860 | handle->pbe = NULL; |
752 | nr_copy_pages = 0; | 861 | swsusp_free(); |
753 | nr_meta_pages = 0; | ||
754 | } | 862 | } |
755 | return error; | 863 | return error; |
756 | } | 864 | } |
757 | 865 | ||
866 | static void *get_buffer(struct snapshot_handle *handle) | ||
867 | { | ||
868 | struct pbe *pbe = handle->pbe, *last = handle->last_pbe; | ||
869 | struct page *page = virt_to_page(pbe->orig_address); | ||
870 | |||
871 | if (PageNosave(page) && PageNosaveFree(page)) { | ||
872 | /* | ||
873 | * We have allocated the "original" page frame and we can | ||
874 | * use it directly to store the read page | ||
875 | */ | ||
876 | pbe->address = 0; | ||
877 | if (last && last->next) | ||
878 | last->next = NULL; | ||
879 | return (void *)pbe->orig_address; | ||
880 | } | ||
881 | /* | ||
882 | * The "original" page frame has not been allocated and we have to | ||
883 | * use a "safe" page frame to store the read page | ||
884 | */ | ||
885 | pbe->address = (unsigned long)safe_pages; | ||
886 | safe_pages = safe_pages->next; | ||
887 | if (last) | ||
888 | last->next = pbe; | ||
889 | handle->last_pbe = pbe; | ||
890 | return (void *)pbe->address; | ||
891 | } | ||
892 | |||
758 | /** | 893 | /** |
759 | * snapshot_write_next - used for writing the system memory snapshot. | 894 | * snapshot_write_next - used for writing the system memory snapshot. |
760 | * | 895 | * |
@@ -799,15 +934,16 @@ int snapshot_write_next(struct snapshot_handle *handle, size_t count) | |||
799 | } else if (handle->prev <= nr_meta_pages) { | 934 | } else if (handle->prev <= nr_meta_pages) { |
800 | handle->pbe = unpack_orig_addresses(buffer, handle->pbe); | 935 | handle->pbe = unpack_orig_addresses(buffer, handle->pbe); |
801 | if (!handle->pbe) { | 936 | if (!handle->pbe) { |
802 | error = create_image(handle); | 937 | error = prepare_image(handle); |
803 | if (error) | 938 | if (error) |
804 | return error; | 939 | return error; |
805 | handle->pbe = pagedir_nosave; | 940 | handle->pbe = pagedir_nosave; |
806 | handle->buffer = (void *)handle->pbe->address; | 941 | handle->last_pbe = NULL; |
942 | handle->buffer = get_buffer(handle); | ||
807 | } | 943 | } |
808 | } else { | 944 | } else { |
809 | handle->pbe = handle->pbe->next; | 945 | handle->pbe = handle->pbe->next; |
810 | handle->buffer = (void *)handle->pbe->address; | 946 | handle->buffer = get_buffer(handle); |
811 | } | 947 | } |
812 | handle->prev = handle->page; | 948 | handle->prev = handle->page; |
813 | } | 949 | } |
diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c index c4016cbbd3..f0ee4e7780 100644 --- a/kernel/power/swsusp.c +++ b/kernel/power/swsusp.c | |||
@@ -62,16 +62,6 @@ unsigned long image_size = 500 * 1024 * 1024; | |||
62 | 62 | ||
63 | int in_suspend __nosavedata = 0; | 63 | int in_suspend __nosavedata = 0; |
64 | 64 | ||
65 | #ifdef CONFIG_HIGHMEM | ||
66 | unsigned int count_highmem_pages(void); | ||
67 | int save_highmem(void); | ||
68 | int restore_highmem(void); | ||
69 | #else | ||
70 | static int save_highmem(void) { return 0; } | ||
71 | static int restore_highmem(void) { return 0; } | ||
72 | static unsigned int count_highmem_pages(void) { return 0; } | ||
73 | #endif | ||
74 | |||
75 | /** | 65 | /** |
76 | * The following functions are used for tracing the allocated | 66 | * The following functions are used for tracing the allocated |
77 | * swap pages, so that they can be freed in case of an error. | 67 | * swap pages, so that they can be freed in case of an error. |
@@ -175,6 +165,12 @@ void free_all_swap_pages(int swap, struct bitmap_page *bitmap) | |||
175 | */ | 165 | */ |
176 | 166 | ||
177 | #define SHRINK_BITE 10000 | 167 | #define SHRINK_BITE 10000 |
168 | static inline unsigned long __shrink_memory(long tmp) | ||
169 | { | ||
170 | if (tmp > SHRINK_BITE) | ||
171 | tmp = SHRINK_BITE; | ||
172 | return shrink_all_memory(tmp); | ||
173 | } | ||
178 | 174 | ||
179 | int swsusp_shrink_memory(void) | 175 | int swsusp_shrink_memory(void) |
180 | { | 176 | { |
@@ -186,21 +182,23 @@ int swsusp_shrink_memory(void) | |||
186 | 182 | ||
187 | printk("Shrinking memory... "); | 183 | printk("Shrinking memory... "); |
188 | do { | 184 | do { |
189 | size = 2 * count_highmem_pages(); | 185 | size = 2 * count_special_pages(); |
190 | size += size / 50 + count_data_pages(); | 186 | size += size / 50 + count_data_pages(); |
191 | size += (size + PBES_PER_PAGE - 1) / PBES_PER_PAGE + | 187 | size += (size + PBES_PER_PAGE - 1) / PBES_PER_PAGE + |
192 | PAGES_FOR_IO; | 188 | PAGES_FOR_IO; |
193 | tmp = size; | 189 | tmp = size; |
194 | for_each_zone (zone) | 190 | for_each_zone (zone) |
195 | if (!is_highmem(zone)) | 191 | if (!is_highmem(zone) && populated_zone(zone)) { |
196 | tmp -= zone->free_pages; | 192 | tmp -= zone->free_pages; |
193 | tmp += zone->lowmem_reserve[ZONE_NORMAL]; | ||
194 | } | ||
197 | if (tmp > 0) { | 195 | if (tmp > 0) { |
198 | tmp = shrink_all_memory(SHRINK_BITE); | 196 | tmp = __shrink_memory(tmp); |
199 | if (!tmp) | 197 | if (!tmp) |
200 | return -ENOMEM; | 198 | return -ENOMEM; |
201 | pages += tmp; | 199 | pages += tmp; |
202 | } else if (size > image_size / PAGE_SIZE) { | 200 | } else if (size > image_size / PAGE_SIZE) { |
203 | tmp = shrink_all_memory(SHRINK_BITE); | 201 | tmp = __shrink_memory(size - (image_size / PAGE_SIZE)); |
204 | pages += tmp; | 202 | pages += tmp; |
205 | } | 203 | } |
206 | printk("\b%c", p[i++%4]); | 204 | printk("\b%c", p[i++%4]); |
@@ -228,7 +226,7 @@ int swsusp_suspend(void) | |||
228 | goto Enable_irqs; | 226 | goto Enable_irqs; |
229 | } | 227 | } |
230 | 228 | ||
231 | if ((error = save_highmem())) { | 229 | if ((error = save_special_mem())) { |
232 | printk(KERN_ERR "swsusp: Not enough free pages for highmem\n"); | 230 | printk(KERN_ERR "swsusp: Not enough free pages for highmem\n"); |
233 | goto Restore_highmem; | 231 | goto Restore_highmem; |
234 | } | 232 | } |
@@ -239,7 +237,7 @@ int swsusp_suspend(void) | |||
239 | /* Restore control flow magically appears here */ | 237 | /* Restore control flow magically appears here */ |
240 | restore_processor_state(); | 238 | restore_processor_state(); |
241 | Restore_highmem: | 239 | Restore_highmem: |
242 | restore_highmem(); | 240 | restore_special_mem(); |
243 | device_power_up(); | 241 | device_power_up(); |
244 | Enable_irqs: | 242 | Enable_irqs: |
245 | local_irq_enable(); | 243 | local_irq_enable(); |
@@ -265,7 +263,7 @@ int swsusp_resume(void) | |||
265 | */ | 263 | */ |
266 | swsusp_free(); | 264 | swsusp_free(); |
267 | restore_processor_state(); | 265 | restore_processor_state(); |
268 | restore_highmem(); | 266 | restore_special_mem(); |
269 | touch_softlockup_watchdog(); | 267 | touch_softlockup_watchdog(); |
270 | device_power_up(); | 268 | device_power_up(); |
271 | local_irq_enable(); | 269 | local_irq_enable(); |
diff --git a/kernel/printk.c b/kernel/printk.c index c056f33244..19a9556192 100644 --- a/kernel/printk.c +++ b/kernel/printk.c | |||
@@ -67,6 +67,7 @@ EXPORT_SYMBOL(oops_in_progress); | |||
67 | * driver system. | 67 | * driver system. |
68 | */ | 68 | */ |
69 | static DECLARE_MUTEX(console_sem); | 69 | static DECLARE_MUTEX(console_sem); |
70 | static DECLARE_MUTEX(secondary_console_sem); | ||
70 | struct console *console_drivers; | 71 | struct console *console_drivers; |
71 | /* | 72 | /* |
72 | * This is used for debugging the mess that is the VT code by | 73 | * This is used for debugging the mess that is the VT code by |
@@ -76,7 +77,7 @@ struct console *console_drivers; | |||
76 | * path in the console code where we end up in places I want | 77 | * path in the console code where we end up in places I want |
77 | * locked without the console sempahore held | 78 | * locked without the console sempahore held |
78 | */ | 79 | */ |
79 | static int console_locked; | 80 | static int console_locked, console_suspended; |
80 | 81 | ||
81 | /* | 82 | /* |
82 | * logbuf_lock protects log_buf, log_start, log_end, con_start and logged_chars | 83 | * logbuf_lock protects log_buf, log_start, log_end, con_start and logged_chars |
@@ -698,6 +699,23 @@ int __init add_preferred_console(char *name, int idx, char *options) | |||
698 | } | 699 | } |
699 | 700 | ||
700 | /** | 701 | /** |
702 | * suspend_console - suspend the console subsystem | ||
703 | * | ||
704 | * This disables printk() while we go into suspend states | ||
705 | */ | ||
706 | void suspend_console(void) | ||
707 | { | ||
708 | acquire_console_sem(); | ||
709 | console_suspended = 1; | ||
710 | } | ||
711 | |||
712 | void resume_console(void) | ||
713 | { | ||
714 | console_suspended = 0; | ||
715 | release_console_sem(); | ||
716 | } | ||
717 | |||
718 | /** | ||
701 | * acquire_console_sem - lock the console system for exclusive use. | 719 | * acquire_console_sem - lock the console system for exclusive use. |
702 | * | 720 | * |
703 | * Acquires a semaphore which guarantees that the caller has | 721 | * Acquires a semaphore which guarantees that the caller has |
@@ -708,6 +726,10 @@ int __init add_preferred_console(char *name, int idx, char *options) | |||
708 | void acquire_console_sem(void) | 726 | void acquire_console_sem(void) |
709 | { | 727 | { |
710 | BUG_ON(in_interrupt()); | 728 | BUG_ON(in_interrupt()); |
729 | if (console_suspended) { | ||
730 | down(&secondary_console_sem); | ||
731 | return; | ||
732 | } | ||
711 | down(&console_sem); | 733 | down(&console_sem); |
712 | console_locked = 1; | 734 | console_locked = 1; |
713 | console_may_schedule = 1; | 735 | console_may_schedule = 1; |
@@ -750,6 +772,10 @@ void release_console_sem(void) | |||
750 | unsigned long _con_start, _log_end; | 772 | unsigned long _con_start, _log_end; |
751 | unsigned long wake_klogd = 0; | 773 | unsigned long wake_klogd = 0; |
752 | 774 | ||
775 | if (console_suspended) { | ||
776 | up(&secondary_console_sem); | ||
777 | return; | ||
778 | } | ||
753 | for ( ; ; ) { | 779 | for ( ; ; ) { |
754 | spin_lock_irqsave(&logbuf_lock, flags); | 780 | spin_lock_irqsave(&logbuf_lock, flags); |
755 | wake_klogd |= log_start - log_end; | 781 | wake_klogd |= log_start - log_end; |
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index 2058f88c7b..20e9710fc2 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c | |||
@@ -612,14 +612,6 @@ void synchronize_rcu(void) | |||
612 | wait_for_completion(&rcu.completion); | 612 | wait_for_completion(&rcu.completion); |
613 | } | 613 | } |
614 | 614 | ||
615 | /* | ||
616 | * Deprecated, use synchronize_rcu() or synchronize_sched() instead. | ||
617 | */ | ||
618 | void synchronize_kernel(void) | ||
619 | { | ||
620 | synchronize_rcu(); | ||
621 | } | ||
622 | |||
623 | module_param(blimit, int, 0); | 615 | module_param(blimit, int, 0); |
624 | module_param(qhimark, int, 0); | 616 | module_param(qhimark, int, 0); |
625 | module_param(qlowmark, int, 0); | 617 | module_param(qlowmark, int, 0); |
@@ -627,7 +619,6 @@ module_param(qlowmark, int, 0); | |||
627 | module_param(rsinterval, int, 0); | 619 | module_param(rsinterval, int, 0); |
628 | #endif | 620 | #endif |
629 | EXPORT_SYMBOL_GPL(rcu_batches_completed); | 621 | EXPORT_SYMBOL_GPL(rcu_batches_completed); |
630 | EXPORT_SYMBOL_GPL_FUTURE(call_rcu); /* WARNING: GPL-only in April 2006. */ | 622 | EXPORT_SYMBOL_GPL(call_rcu); |
631 | EXPORT_SYMBOL_GPL_FUTURE(call_rcu_bh); /* WARNING: GPL-only in April 2006. */ | 623 | EXPORT_SYMBOL_GPL(call_rcu_bh); |
632 | EXPORT_SYMBOL_GPL(synchronize_rcu); | 624 | EXPORT_SYMBOL_GPL(synchronize_rcu); |
633 | EXPORT_SYMBOL_GPL_FUTURE(synchronize_kernel); /* WARNING: GPL-only in April 2006. */ | ||
diff --git a/kernel/sched.c b/kernel/sched.c index c13f1bd2df..5dbc426944 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -3886,6 +3886,10 @@ long sched_setaffinity(pid_t pid, cpumask_t new_mask) | |||
3886 | !capable(CAP_SYS_NICE)) | 3886 | !capable(CAP_SYS_NICE)) |
3887 | goto out_unlock; | 3887 | goto out_unlock; |
3888 | 3888 | ||
3889 | retval = security_task_setscheduler(p, 0, NULL); | ||
3890 | if (retval) | ||
3891 | goto out_unlock; | ||
3892 | |||
3889 | cpus_allowed = cpuset_cpus_allowed(p); | 3893 | cpus_allowed = cpuset_cpus_allowed(p); |
3890 | cpus_and(new_mask, new_mask, cpus_allowed); | 3894 | cpus_and(new_mask, new_mask, cpus_allowed); |
3891 | retval = set_cpus_allowed(p, new_mask); | 3895 | retval = set_cpus_allowed(p, new_mask); |
@@ -3954,7 +3958,10 @@ long sched_getaffinity(pid_t pid, cpumask_t *mask) | |||
3954 | if (!p) | 3958 | if (!p) |
3955 | goto out_unlock; | 3959 | goto out_unlock; |
3956 | 3960 | ||
3957 | retval = 0; | 3961 | retval = security_task_getscheduler(p); |
3962 | if (retval) | ||
3963 | goto out_unlock; | ||
3964 | |||
3958 | cpus_and(*mask, p->cpus_allowed, cpu_online_map); | 3965 | cpus_and(*mask, p->cpus_allowed, cpu_online_map); |
3959 | 3966 | ||
3960 | out_unlock: | 3967 | out_unlock: |
@@ -4046,6 +4053,9 @@ asmlinkage long sys_sched_yield(void) | |||
4046 | 4053 | ||
4047 | static inline void __cond_resched(void) | 4054 | static inline void __cond_resched(void) |
4048 | { | 4055 | { |
4056 | #ifdef CONFIG_DEBUG_SPINLOCK_SLEEP | ||
4057 | __might_sleep(__FILE__, __LINE__); | ||
4058 | #endif | ||
4049 | /* | 4059 | /* |
4050 | * The BKS might be reacquired before we have dropped | 4060 | * The BKS might be reacquired before we have dropped |
4051 | * PREEMPT_ACTIVE, which could trigger a second | 4061 | * PREEMPT_ACTIVE, which could trigger a second |
diff --git a/kernel/signal.c b/kernel/signal.c index e5f8aea78f..1b3c921737 100644 --- a/kernel/signal.c +++ b/kernel/signal.c | |||
@@ -23,12 +23,12 @@ | |||
23 | #include <linux/syscalls.h> | 23 | #include <linux/syscalls.h> |
24 | #include <linux/ptrace.h> | 24 | #include <linux/ptrace.h> |
25 | #include <linux/signal.h> | 25 | #include <linux/signal.h> |
26 | #include <linux/audit.h> | ||
27 | #include <linux/capability.h> | 26 | #include <linux/capability.h> |
28 | #include <asm/param.h> | 27 | #include <asm/param.h> |
29 | #include <asm/uaccess.h> | 28 | #include <asm/uaccess.h> |
30 | #include <asm/unistd.h> | 29 | #include <asm/unistd.h> |
31 | #include <asm/siginfo.h> | 30 | #include <asm/siginfo.h> |
31 | #include "audit.h" /* audit_signal_info() */ | ||
32 | 32 | ||
33 | /* | 33 | /* |
34 | * SLAB caches for signal bits. | 34 | * SLAB caches for signal bits. |
diff --git a/kernel/sys.c b/kernel/sys.c index 0b6ec0e793..90930b28d2 100644 --- a/kernel/sys.c +++ b/kernel/sys.c | |||
@@ -13,7 +13,6 @@ | |||
13 | #include <linux/notifier.h> | 13 | #include <linux/notifier.h> |
14 | #include <linux/reboot.h> | 14 | #include <linux/reboot.h> |
15 | #include <linux/prctl.h> | 15 | #include <linux/prctl.h> |
16 | #include <linux/init.h> | ||
17 | #include <linux/highuid.h> | 16 | #include <linux/highuid.h> |
18 | #include <linux/fs.h> | 17 | #include <linux/fs.h> |
19 | #include <linux/kernel.h> | 18 | #include <linux/kernel.h> |
@@ -57,6 +56,12 @@ | |||
57 | #ifndef GET_FPEXC_CTL | 56 | #ifndef GET_FPEXC_CTL |
58 | # define GET_FPEXC_CTL(a,b) (-EINVAL) | 57 | # define GET_FPEXC_CTL(a,b) (-EINVAL) |
59 | #endif | 58 | #endif |
59 | #ifndef GET_ENDIAN | ||
60 | # define GET_ENDIAN(a,b) (-EINVAL) | ||
61 | #endif | ||
62 | #ifndef SET_ENDIAN | ||
63 | # define SET_ENDIAN(a,b) (-EINVAL) | ||
64 | #endif | ||
60 | 65 | ||
61 | /* | 66 | /* |
62 | * this is where the system-wide overflow UID and GID are defined, for | 67 | * this is where the system-wide overflow UID and GID are defined, for |
@@ -1860,23 +1865,20 @@ out: | |||
1860 | * fields when reaping, so a sample either gets all the additions of a | 1865 | * fields when reaping, so a sample either gets all the additions of a |
1861 | * given child after it's reaped, or none so this sample is before reaping. | 1866 | * given child after it's reaped, or none so this sample is before reaping. |
1862 | * | 1867 | * |
1863 | * tasklist_lock locking optimisation: | 1868 | * Locking: |
1864 | * If we are current and single threaded, we do not need to take the tasklist | 1869 | * We need to take the siglock for CHILDEREN, SELF and BOTH |
1865 | * lock or the siglock. No one else can take our signal_struct away, | 1870 | * for the cases current multithreaded, non-current single threaded |
1866 | * no one else can reap the children to update signal->c* counters, and | 1871 | * non-current multithreaded. Thread traversal is now safe with |
1867 | * no one else can race with the signal-> fields. | 1872 | * the siglock held. |
1868 | * If we do not take the tasklist_lock, the signal-> fields could be read | 1873 | * Strictly speaking, we donot need to take the siglock if we are current and |
1869 | * out of order while another thread was just exiting. So we place a | 1874 | * single threaded, as no one else can take our signal_struct away, no one |
1870 | * read memory barrier when we avoid the lock. On the writer side, | 1875 | * else can reap the children to update signal->c* counters, and no one else |
1871 | * write memory barrier is implied in __exit_signal as __exit_signal releases | 1876 | * can race with the signal-> fields. If we do not take any lock, the |
1872 | * the siglock spinlock after updating the signal-> fields. | 1877 | * signal-> fields could be read out of order while another thread was just |
1873 | * | 1878 | * exiting. So we should place a read memory barrier when we avoid the lock. |
1874 | * We don't really need the siglock when we access the non c* fields | 1879 | * On the writer side, write memory barrier is implied in __exit_signal |
1875 | * of the signal_struct (for RUSAGE_SELF) even in multithreaded | 1880 | * as __exit_signal releases the siglock spinlock after updating the signal-> |
1876 | * case, since we take the tasklist lock for read and the non c* signal-> | 1881 | * fields. But we don't do this yet to keep things simple. |
1877 | * fields are updated only in __exit_signal, which is called with | ||
1878 | * tasklist_lock taken for write, hence these two threads cannot execute | ||
1879 | * concurrently. | ||
1880 | * | 1882 | * |
1881 | */ | 1883 | */ |
1882 | 1884 | ||
@@ -1885,35 +1887,25 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r) | |||
1885 | struct task_struct *t; | 1887 | struct task_struct *t; |
1886 | unsigned long flags; | 1888 | unsigned long flags; |
1887 | cputime_t utime, stime; | 1889 | cputime_t utime, stime; |
1888 | int need_lock = 0; | ||
1889 | 1890 | ||
1890 | memset((char *) r, 0, sizeof *r); | 1891 | memset((char *) r, 0, sizeof *r); |
1891 | utime = stime = cputime_zero; | 1892 | utime = stime = cputime_zero; |
1892 | 1893 | ||
1893 | if (p != current || !thread_group_empty(p)) | 1894 | rcu_read_lock(); |
1894 | need_lock = 1; | 1895 | if (!lock_task_sighand(p, &flags)) { |
1895 | 1896 | rcu_read_unlock(); | |
1896 | if (need_lock) { | 1897 | return; |
1897 | read_lock(&tasklist_lock); | 1898 | } |
1898 | if (unlikely(!p->signal)) { | ||
1899 | read_unlock(&tasklist_lock); | ||
1900 | return; | ||
1901 | } | ||
1902 | } else | ||
1903 | /* See locking comments above */ | ||
1904 | smp_rmb(); | ||
1905 | 1899 | ||
1906 | switch (who) { | 1900 | switch (who) { |
1907 | case RUSAGE_BOTH: | 1901 | case RUSAGE_BOTH: |
1908 | case RUSAGE_CHILDREN: | 1902 | case RUSAGE_CHILDREN: |
1909 | spin_lock_irqsave(&p->sighand->siglock, flags); | ||
1910 | utime = p->signal->cutime; | 1903 | utime = p->signal->cutime; |
1911 | stime = p->signal->cstime; | 1904 | stime = p->signal->cstime; |
1912 | r->ru_nvcsw = p->signal->cnvcsw; | 1905 | r->ru_nvcsw = p->signal->cnvcsw; |
1913 | r->ru_nivcsw = p->signal->cnivcsw; | 1906 | r->ru_nivcsw = p->signal->cnivcsw; |
1914 | r->ru_minflt = p->signal->cmin_flt; | 1907 | r->ru_minflt = p->signal->cmin_flt; |
1915 | r->ru_majflt = p->signal->cmaj_flt; | 1908 | r->ru_majflt = p->signal->cmaj_flt; |
1916 | spin_unlock_irqrestore(&p->sighand->siglock, flags); | ||
1917 | 1909 | ||
1918 | if (who == RUSAGE_CHILDREN) | 1910 | if (who == RUSAGE_CHILDREN) |
1919 | break; | 1911 | break; |
@@ -1941,8 +1933,9 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r) | |||
1941 | BUG(); | 1933 | BUG(); |
1942 | } | 1934 | } |
1943 | 1935 | ||
1944 | if (need_lock) | 1936 | unlock_task_sighand(p, &flags); |
1945 | read_unlock(&tasklist_lock); | 1937 | rcu_read_unlock(); |
1938 | |||
1946 | cputime_to_timeval(utime, &r->ru_utime); | 1939 | cputime_to_timeval(utime, &r->ru_utime); |
1947 | cputime_to_timeval(stime, &r->ru_stime); | 1940 | cputime_to_timeval(stime, &r->ru_stime); |
1948 | } | 1941 | } |
@@ -2057,6 +2050,13 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3, | |||
2057 | return -EFAULT; | 2050 | return -EFAULT; |
2058 | return 0; | 2051 | return 0; |
2059 | } | 2052 | } |
2053 | case PR_GET_ENDIAN: | ||
2054 | error = GET_ENDIAN(current, arg2); | ||
2055 | break; | ||
2056 | case PR_SET_ENDIAN: | ||
2057 | error = SET_ENDIAN(current, arg2); | ||
2058 | break; | ||
2059 | |||
2060 | default: | 2060 | default: |
2061 | error = -EINVAL; | 2061 | error = -EINVAL; |
2062 | break; | 2062 | break; |
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index 5433195040..6991bece67 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c | |||
@@ -87,6 +87,7 @@ cond_syscall(sys_inotify_init); | |||
87 | cond_syscall(sys_inotify_add_watch); | 87 | cond_syscall(sys_inotify_add_watch); |
88 | cond_syscall(sys_inotify_rm_watch); | 88 | cond_syscall(sys_inotify_rm_watch); |
89 | cond_syscall(sys_migrate_pages); | 89 | cond_syscall(sys_migrate_pages); |
90 | cond_syscall(sys_move_pages); | ||
90 | cond_syscall(sys_chown16); | 91 | cond_syscall(sys_chown16); |
91 | cond_syscall(sys_fchown16); | 92 | cond_syscall(sys_fchown16); |
92 | cond_syscall(sys_getegid16); | 93 | cond_syscall(sys_getegid16); |
@@ -132,3 +133,4 @@ cond_syscall(sys_mincore); | |||
132 | cond_syscall(sys_madvise); | 133 | cond_syscall(sys_madvise); |
133 | cond_syscall(sys_mremap); | 134 | cond_syscall(sys_mremap); |
134 | cond_syscall(sys_remap_file_pages); | 135 | cond_syscall(sys_remap_file_pages); |
136 | cond_syscall(compat_sys_move_pages); | ||
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index e82726faee..eb8bd214e7 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -59,6 +59,7 @@ extern int proc_nr_files(ctl_table *table, int write, struct file *filp, | |||
59 | extern int C_A_D; | 59 | extern int C_A_D; |
60 | extern int sysctl_overcommit_memory; | 60 | extern int sysctl_overcommit_memory; |
61 | extern int sysctl_overcommit_ratio; | 61 | extern int sysctl_overcommit_ratio; |
62 | extern int sysctl_panic_on_oom; | ||
62 | extern int max_threads; | 63 | extern int max_threads; |
63 | extern int sysrq_enabled; | 64 | extern int sysrq_enabled; |
64 | extern int core_uses_pid; | 65 | extern int core_uses_pid; |
@@ -150,7 +151,7 @@ extern ctl_table random_table[]; | |||
150 | #ifdef CONFIG_UNIX98_PTYS | 151 | #ifdef CONFIG_UNIX98_PTYS |
151 | extern ctl_table pty_table[]; | 152 | extern ctl_table pty_table[]; |
152 | #endif | 153 | #endif |
153 | #ifdef CONFIG_INOTIFY | 154 | #ifdef CONFIG_INOTIFY_USER |
154 | extern ctl_table inotify_table[]; | 155 | extern ctl_table inotify_table[]; |
155 | #endif | 156 | #endif |
156 | 157 | ||
@@ -398,7 +399,7 @@ static ctl_table kern_table[] = { | |||
398 | .strategy = &sysctl_string, | 399 | .strategy = &sysctl_string, |
399 | }, | 400 | }, |
400 | #endif | 401 | #endif |
401 | #ifdef CONFIG_HOTPLUG | 402 | #if defined(CONFIG_HOTPLUG) && defined(CONFIG_NET) |
402 | { | 403 | { |
403 | .ctl_name = KERN_HOTPLUG, | 404 | .ctl_name = KERN_HOTPLUG, |
404 | .procname = "hotplug", | 405 | .procname = "hotplug", |
@@ -702,6 +703,14 @@ static ctl_table vm_table[] = { | |||
702 | .proc_handler = &proc_dointvec, | 703 | .proc_handler = &proc_dointvec, |
703 | }, | 704 | }, |
704 | { | 705 | { |
706 | .ctl_name = VM_PANIC_ON_OOM, | ||
707 | .procname = "panic_on_oom", | ||
708 | .data = &sysctl_panic_on_oom, | ||
709 | .maxlen = sizeof(sysctl_panic_on_oom), | ||
710 | .mode = 0644, | ||
711 | .proc_handler = &proc_dointvec, | ||
712 | }, | ||
713 | { | ||
705 | .ctl_name = VM_OVERCOMMIT_RATIO, | 714 | .ctl_name = VM_OVERCOMMIT_RATIO, |
706 | .procname = "overcommit_ratio", | 715 | .procname = "overcommit_ratio", |
707 | .data = &sysctl_overcommit_ratio, | 716 | .data = &sysctl_overcommit_ratio, |
@@ -1028,7 +1037,7 @@ static ctl_table fs_table[] = { | |||
1028 | .mode = 0644, | 1037 | .mode = 0644, |
1029 | .proc_handler = &proc_doulongvec_minmax, | 1038 | .proc_handler = &proc_doulongvec_minmax, |
1030 | }, | 1039 | }, |
1031 | #ifdef CONFIG_INOTIFY | 1040 | #ifdef CONFIG_INOTIFY_USER |
1032 | { | 1041 | { |
1033 | .ctl_name = FS_INOTIFY, | 1042 | .ctl_name = FS_INOTIFY, |
1034 | .procname = "inotify", | 1043 | .procname = "inotify", |
diff --git a/kernel/timer.c b/kernel/timer.c index 9e49deed46..f35b3939e9 100644 --- a/kernel/timer.c +++ b/kernel/timer.c | |||
@@ -383,23 +383,19 @@ EXPORT_SYMBOL(del_timer_sync); | |||
383 | static int cascade(tvec_base_t *base, tvec_t *tv, int index) | 383 | static int cascade(tvec_base_t *base, tvec_t *tv, int index) |
384 | { | 384 | { |
385 | /* cascade all the timers from tv up one level */ | 385 | /* cascade all the timers from tv up one level */ |
386 | struct list_head *head, *curr; | 386 | struct timer_list *timer, *tmp; |
387 | struct list_head tv_list; | ||
388 | |||
389 | list_replace_init(tv->vec + index, &tv_list); | ||
387 | 390 | ||
388 | head = tv->vec + index; | ||
389 | curr = head->next; | ||
390 | /* | 391 | /* |
391 | * We are removing _all_ timers from the list, so we don't have to | 392 | * We are removing _all_ timers from the list, so we |
392 | * detach them individually, just clear the list afterwards. | 393 | * don't have to detach them individually. |
393 | */ | 394 | */ |
394 | while (curr != head) { | 395 | list_for_each_entry_safe(timer, tmp, &tv_list, entry) { |
395 | struct timer_list *tmp; | 396 | BUG_ON(timer->base != base); |
396 | 397 | internal_add_timer(base, timer); | |
397 | tmp = list_entry(curr, struct timer_list, entry); | ||
398 | BUG_ON(tmp->base != base); | ||
399 | curr = curr->next; | ||
400 | internal_add_timer(base, tmp); | ||
401 | } | 398 | } |
402 | INIT_LIST_HEAD(head); | ||
403 | 399 | ||
404 | return index; | 400 | return index; |
405 | } | 401 | } |
@@ -419,10 +415,10 @@ static inline void __run_timers(tvec_base_t *base) | |||
419 | 415 | ||
420 | spin_lock_irq(&base->lock); | 416 | spin_lock_irq(&base->lock); |
421 | while (time_after_eq(jiffies, base->timer_jiffies)) { | 417 | while (time_after_eq(jiffies, base->timer_jiffies)) { |
422 | struct list_head work_list = LIST_HEAD_INIT(work_list); | 418 | struct list_head work_list; |
423 | struct list_head *head = &work_list; | 419 | struct list_head *head = &work_list; |
424 | int index = base->timer_jiffies & TVR_MASK; | 420 | int index = base->timer_jiffies & TVR_MASK; |
425 | 421 | ||
426 | /* | 422 | /* |
427 | * Cascade timers: | 423 | * Cascade timers: |
428 | */ | 424 | */ |
@@ -431,8 +427,8 @@ static inline void __run_timers(tvec_base_t *base) | |||
431 | (!cascade(base, &base->tv3, INDEX(1))) && | 427 | (!cascade(base, &base->tv3, INDEX(1))) && |
432 | !cascade(base, &base->tv4, INDEX(2))) | 428 | !cascade(base, &base->tv4, INDEX(2))) |
433 | cascade(base, &base->tv5, INDEX(3)); | 429 | cascade(base, &base->tv5, INDEX(3)); |
434 | ++base->timer_jiffies; | 430 | ++base->timer_jiffies; |
435 | list_splice_init(base->tv1.vec + index, &work_list); | 431 | list_replace_init(base->tv1.vec + index, &work_list); |
436 | while (!list_empty(head)) { | 432 | while (!list_empty(head)) { |
437 | void (*fn)(unsigned long); | 433 | void (*fn)(unsigned long); |
438 | unsigned long data; | 434 | unsigned long data; |
diff --git a/kernel/user.c b/kernel/user.c index 2116642f42..6408c04242 100644 --- a/kernel/user.c +++ b/kernel/user.c | |||
@@ -140,7 +140,7 @@ struct user_struct * alloc_uid(uid_t uid) | |||
140 | atomic_set(&new->processes, 0); | 140 | atomic_set(&new->processes, 0); |
141 | atomic_set(&new->files, 0); | 141 | atomic_set(&new->files, 0); |
142 | atomic_set(&new->sigpending, 0); | 142 | atomic_set(&new->sigpending, 0); |
143 | #ifdef CONFIG_INOTIFY | 143 | #ifdef CONFIG_INOTIFY_USER |
144 | atomic_set(&new->inotify_watches, 0); | 144 | atomic_set(&new->inotify_watches, 0); |
145 | atomic_set(&new->inotify_devs, 0); | 145 | atomic_set(&new->inotify_devs, 0); |
146 | #endif | 146 | #endif |
@@ -148,7 +148,7 @@ struct user_struct * alloc_uid(uid_t uid) | |||
148 | new->mq_bytes = 0; | 148 | new->mq_bytes = 0; |
149 | new->locked_shm = 0; | 149 | new->locked_shm = 0; |
150 | 150 | ||
151 | if (alloc_uid_keyring(new) < 0) { | 151 | if (alloc_uid_keyring(new, current) < 0) { |
152 | kmem_cache_free(uid_cachep, new); | 152 | kmem_cache_free(uid_cachep, new); |
153 | return NULL; | 153 | return NULL; |
154 | } | 154 | } |
diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 880fb415a8..740c5abceb 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c | |||
@@ -531,11 +531,11 @@ int current_is_keventd(void) | |||
531 | static void take_over_work(struct workqueue_struct *wq, unsigned int cpu) | 531 | static void take_over_work(struct workqueue_struct *wq, unsigned int cpu) |
532 | { | 532 | { |
533 | struct cpu_workqueue_struct *cwq = per_cpu_ptr(wq->cpu_wq, cpu); | 533 | struct cpu_workqueue_struct *cwq = per_cpu_ptr(wq->cpu_wq, cpu); |
534 | LIST_HEAD(list); | 534 | struct list_head list; |
535 | struct work_struct *work; | 535 | struct work_struct *work; |
536 | 536 | ||
537 | spin_lock_irq(&cwq->lock); | 537 | spin_lock_irq(&cwq->lock); |
538 | list_splice_init(&cwq->worklist, &list); | 538 | list_replace_init(&cwq->worklist, &list); |
539 | 539 | ||
540 | while (!list_empty(&list)) { | 540 | while (!list_empty(&list)) { |
541 | printk("Taking work for %s\n", wq->name); | 541 | printk("Taking work for %s\n", wq->name); |