aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile1
-rw-r--r--kernel/acct.c2
-rw-r--r--kernel/audit.c205
-rw-r--r--kernel/audit.h61
-rw-r--r--kernel/auditfilter.c899
-rw-r--r--kernel/auditsc.c649
-rw-r--r--kernel/compat.c23
-rw-r--r--kernel/cpuset.c16
-rw-r--r--kernel/exit.c5
-rw-r--r--kernel/fork.c5
-rw-r--r--kernel/futex.c8
-rw-r--r--kernel/hrtimer.c4
-rw-r--r--kernel/intermodule.c184
-rw-r--r--kernel/irq/handle.c5
-rw-r--r--kernel/irq/migration.c4
-rw-r--r--kernel/irq/proc.c3
-rw-r--r--kernel/irq/spurious.c12
-rw-r--r--kernel/kexec.c6
-rw-r--r--kernel/ksysfs.c19
-rw-r--r--kernel/power/main.c4
-rw-r--r--kernel/power/power.h6
-rw-r--r--kernel/power/snapshot.c260
-rw-r--r--kernel/power/swsusp.c32
-rw-r--r--kernel/printk.c28
-rw-r--r--kernel/rcupdate.c13
-rw-r--r--kernel/sched.c12
-rw-r--r--kernel/signal.c2
-rw-r--r--kernel/sys.c70
-rw-r--r--kernel/sys_ni.c2
-rw-r--r--kernel/sysctl.c15
-rw-r--r--kernel/timer.c30
-rw-r--r--kernel/user.c4
-rw-r--r--kernel/workqueue.c4
33 files changed, 1964 insertions, 629 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index 58908f9d15..f6ef00f4f9 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -20,7 +20,6 @@ obj-$(CONFIG_SMP) += cpu.o spinlock.o
20obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o 20obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o
21obj-$(CONFIG_UID16) += uid16.o 21obj-$(CONFIG_UID16) += uid16.o
22obj-$(CONFIG_MODULES) += module.o 22obj-$(CONFIG_MODULES) += module.o
23obj-$(CONFIG_OBSOLETE_INTERMODULE) += intermodule.o
24obj-$(CONFIG_KALLSYMS) += kallsyms.o 23obj-$(CONFIG_KALLSYMS) += kallsyms.o
25obj-$(CONFIG_PM) += power/ 24obj-$(CONFIG_PM) += power/
26obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o 25obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o
diff --git a/kernel/acct.c b/kernel/acct.c
index b327f4d201..6802020e0c 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -118,7 +118,7 @@ static int check_free_space(struct file *file)
118 spin_unlock(&acct_globals.lock); 118 spin_unlock(&acct_globals.lock);
119 119
120 /* May block */ 120 /* May block */
121 if (vfs_statfs(file->f_dentry->d_inode->i_sb, &sbuf)) 121 if (vfs_statfs(file->f_dentry, &sbuf))
122 return res; 122 return res;
123 suspend = sbuf.f_blocks * SUSPEND; 123 suspend = sbuf.f_blocks * SUSPEND;
124 resume = sbuf.f_blocks * RESUME; 124 resume = sbuf.f_blocks * RESUME;
diff --git a/kernel/audit.c b/kernel/audit.c
index df57b493e1..7dfac7031b 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -56,6 +56,7 @@
56#include <linux/skbuff.h> 56#include <linux/skbuff.h>
57#include <linux/netlink.h> 57#include <linux/netlink.h>
58#include <linux/selinux.h> 58#include <linux/selinux.h>
59#include <linux/inotify.h>
59 60
60#include "audit.h" 61#include "audit.h"
61 62
@@ -89,6 +90,7 @@ static int audit_backlog_wait_overflow = 0;
89/* The identity of the user shutting down the audit system. */ 90/* The identity of the user shutting down the audit system. */
90uid_t audit_sig_uid = -1; 91uid_t audit_sig_uid = -1;
91pid_t audit_sig_pid = -1; 92pid_t audit_sig_pid = -1;
93u32 audit_sig_sid = 0;
92 94
93/* Records can be lost in several ways: 95/* Records can be lost in several ways:
94 0) [suppressed in audit_alloc] 96 0) [suppressed in audit_alloc]
@@ -102,6 +104,12 @@ static atomic_t audit_lost = ATOMIC_INIT(0);
102/* The netlink socket. */ 104/* The netlink socket. */
103static struct sock *audit_sock; 105static struct sock *audit_sock;
104 106
107/* Inotify handle. */
108struct inotify_handle *audit_ih;
109
110/* Hash for inode-based rules */
111struct list_head audit_inode_hash[AUDIT_INODE_BUCKETS];
112
105/* The audit_freelist is a list of pre-allocated audit buffers (if more 113/* The audit_freelist is a list of pre-allocated audit buffers (if more
106 * than AUDIT_MAXFREE are in use, the audit buffer is freed instead of 114 * than AUDIT_MAXFREE are in use, the audit buffer is freed instead of
107 * being placed on the freelist). */ 115 * being placed on the freelist). */
@@ -114,10 +122,8 @@ static struct task_struct *kauditd_task;
114static DECLARE_WAIT_QUEUE_HEAD(kauditd_wait); 122static DECLARE_WAIT_QUEUE_HEAD(kauditd_wait);
115static DECLARE_WAIT_QUEUE_HEAD(audit_backlog_wait); 123static DECLARE_WAIT_QUEUE_HEAD(audit_backlog_wait);
116 124
117/* The netlink socket is only to be read by 1 CPU, which lets us assume 125/* Serialize requests from userspace. */
118 * that list additions and deletions never happen simultaneously in 126static DEFINE_MUTEX(audit_cmd_mutex);
119 * auditsc.c */
120DEFINE_MUTEX(audit_netlink_mutex);
121 127
122/* AUDIT_BUFSIZ is the size of the temporary buffer used for formatting 128/* AUDIT_BUFSIZ is the size of the temporary buffer used for formatting
123 * audit records. Since printk uses a 1024 byte buffer, this buffer 129 * audit records. Since printk uses a 1024 byte buffer, this buffer
@@ -250,7 +256,7 @@ static int audit_set_rate_limit(int limit, uid_t loginuid, u32 sid)
250 "audit_rate_limit=%d old=%d by auid=%u", 256 "audit_rate_limit=%d old=%d by auid=%u",
251 limit, old, loginuid); 257 limit, old, loginuid);
252 audit_rate_limit = limit; 258 audit_rate_limit = limit;
253 return old; 259 return 0;
254} 260}
255 261
256static int audit_set_backlog_limit(int limit, uid_t loginuid, u32 sid) 262static int audit_set_backlog_limit(int limit, uid_t loginuid, u32 sid)
@@ -273,7 +279,7 @@ static int audit_set_backlog_limit(int limit, uid_t loginuid, u32 sid)
273 "audit_backlog_limit=%d old=%d by auid=%u", 279 "audit_backlog_limit=%d old=%d by auid=%u",
274 limit, old, loginuid); 280 limit, old, loginuid);
275 audit_backlog_limit = limit; 281 audit_backlog_limit = limit;
276 return old; 282 return 0;
277} 283}
278 284
279static int audit_set_enabled(int state, uid_t loginuid, u32 sid) 285static int audit_set_enabled(int state, uid_t loginuid, u32 sid)
@@ -299,7 +305,7 @@ static int audit_set_enabled(int state, uid_t loginuid, u32 sid)
299 "audit_enabled=%d old=%d by auid=%u", 305 "audit_enabled=%d old=%d by auid=%u",
300 state, old, loginuid); 306 state, old, loginuid);
301 audit_enabled = state; 307 audit_enabled = state;
302 return old; 308 return 0;
303} 309}
304 310
305static int audit_set_failure(int state, uid_t loginuid, u32 sid) 311static int audit_set_failure(int state, uid_t loginuid, u32 sid)
@@ -327,7 +333,7 @@ static int audit_set_failure(int state, uid_t loginuid, u32 sid)
327 "audit_failure=%d old=%d by auid=%u", 333 "audit_failure=%d old=%d by auid=%u",
328 state, old, loginuid); 334 state, old, loginuid);
329 audit_failure = state; 335 audit_failure = state;
330 return old; 336 return 0;
331} 337}
332 338
333static int kauditd_thread(void *dummy) 339static int kauditd_thread(void *dummy)
@@ -363,9 +369,52 @@ static int kauditd_thread(void *dummy)
363 remove_wait_queue(&kauditd_wait, &wait); 369 remove_wait_queue(&kauditd_wait, &wait);
364 } 370 }
365 } 371 }
372}
373
374int audit_send_list(void *_dest)
375{
376 struct audit_netlink_list *dest = _dest;
377 int pid = dest->pid;
378 struct sk_buff *skb;
379
380 /* wait for parent to finish and send an ACK */
381 mutex_lock(&audit_cmd_mutex);
382 mutex_unlock(&audit_cmd_mutex);
383
384 while ((skb = __skb_dequeue(&dest->q)) != NULL)
385 netlink_unicast(audit_sock, skb, pid, 0);
386
387 kfree(dest);
388
366 return 0; 389 return 0;
367} 390}
368 391
392struct sk_buff *audit_make_reply(int pid, int seq, int type, int done,
393 int multi, void *payload, int size)
394{
395 struct sk_buff *skb;
396 struct nlmsghdr *nlh;
397 int len = NLMSG_SPACE(size);
398 void *data;
399 int flags = multi ? NLM_F_MULTI : 0;
400 int t = done ? NLMSG_DONE : type;
401
402 skb = alloc_skb(len, GFP_KERNEL);
403 if (!skb)
404 return NULL;
405
406 nlh = NLMSG_PUT(skb, pid, seq, t, size);
407 nlh->nlmsg_flags = flags;
408 data = NLMSG_DATA(nlh);
409 memcpy(data, payload, size);
410 return skb;
411
412nlmsg_failure: /* Used by NLMSG_PUT */
413 if (skb)
414 kfree_skb(skb);
415 return NULL;
416}
417
369/** 418/**
370 * audit_send_reply - send an audit reply message via netlink 419 * audit_send_reply - send an audit reply message via netlink
371 * @pid: process id to send reply to 420 * @pid: process id to send reply to
@@ -383,29 +432,13 @@ void audit_send_reply(int pid, int seq, int type, int done, int multi,
383 void *payload, int size) 432 void *payload, int size)
384{ 433{
385 struct sk_buff *skb; 434 struct sk_buff *skb;
386 struct nlmsghdr *nlh; 435 skb = audit_make_reply(pid, seq, type, done, multi, payload, size);
387 int len = NLMSG_SPACE(size);
388 void *data;
389 int flags = multi ? NLM_F_MULTI : 0;
390 int t = done ? NLMSG_DONE : type;
391
392 skb = alloc_skb(len, GFP_KERNEL);
393 if (!skb) 436 if (!skb)
394 return; 437 return;
395
396 nlh = NLMSG_PUT(skb, pid, seq, t, size);
397 nlh->nlmsg_flags = flags;
398 data = NLMSG_DATA(nlh);
399 memcpy(data, payload, size);
400
401 /* Ignore failure. It'll only happen if the sender goes away, 438 /* Ignore failure. It'll only happen if the sender goes away,
402 because our timeout is set to infinite. */ 439 because our timeout is set to infinite. */
403 netlink_unicast(audit_sock, skb, pid, 0); 440 netlink_unicast(audit_sock, skb, pid, 0);
404 return; 441 return;
405
406nlmsg_failure: /* Used by NLMSG_PUT */
407 if (skb)
408 kfree_skb(skb);
409} 442}
410 443
411/* 444/*
@@ -451,7 +484,9 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
451 struct audit_buffer *ab; 484 struct audit_buffer *ab;
452 u16 msg_type = nlh->nlmsg_type; 485 u16 msg_type = nlh->nlmsg_type;
453 uid_t loginuid; /* loginuid of sender */ 486 uid_t loginuid; /* loginuid of sender */
454 struct audit_sig_info sig_data; 487 struct audit_sig_info *sig_data;
488 char *ctx;
489 u32 len;
455 490
456 err = audit_netlink_ok(NETLINK_CB(skb).eff_cap, msg_type); 491 err = audit_netlink_ok(NETLINK_CB(skb).eff_cap, msg_type);
457 if (err) 492 if (err)
@@ -503,12 +538,9 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
503 if (status_get->mask & AUDIT_STATUS_PID) { 538 if (status_get->mask & AUDIT_STATUS_PID) {
504 int old = audit_pid; 539 int old = audit_pid;
505 if (sid) { 540 if (sid) {
506 char *ctx = NULL; 541 if ((err = selinux_ctxid_to_string(
507 u32 len;
508 int rc;
509 if ((rc = selinux_ctxid_to_string(
510 sid, &ctx, &len))) 542 sid, &ctx, &len)))
511 return rc; 543 return err;
512 else 544 else
513 audit_log(NULL, GFP_KERNEL, 545 audit_log(NULL, GFP_KERNEL,
514 AUDIT_CONFIG_CHANGE, 546 AUDIT_CONFIG_CHANGE,
@@ -523,10 +555,10 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
523 audit_pid = status_get->pid; 555 audit_pid = status_get->pid;
524 } 556 }
525 if (status_get->mask & AUDIT_STATUS_RATE_LIMIT) 557 if (status_get->mask & AUDIT_STATUS_RATE_LIMIT)
526 audit_set_rate_limit(status_get->rate_limit, 558 err = audit_set_rate_limit(status_get->rate_limit,
527 loginuid, sid); 559 loginuid, sid);
528 if (status_get->mask & AUDIT_STATUS_BACKLOG_LIMIT) 560 if (status_get->mask & AUDIT_STATUS_BACKLOG_LIMIT)
529 audit_set_backlog_limit(status_get->backlog_limit, 561 err = audit_set_backlog_limit(status_get->backlog_limit,
530 loginuid, sid); 562 loginuid, sid);
531 break; 563 break;
532 case AUDIT_USER: 564 case AUDIT_USER:
@@ -544,8 +576,6 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
544 "user pid=%d uid=%u auid=%u", 576 "user pid=%d uid=%u auid=%u",
545 pid, uid, loginuid); 577 pid, uid, loginuid);
546 if (sid) { 578 if (sid) {
547 char *ctx = NULL;
548 u32 len;
549 if (selinux_ctxid_to_string( 579 if (selinux_ctxid_to_string(
550 sid, &ctx, &len)) { 580 sid, &ctx, &len)) {
551 audit_log_format(ab, 581 audit_log_format(ab,
@@ -584,10 +614,21 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
584 loginuid, sid); 614 loginuid, sid);
585 break; 615 break;
586 case AUDIT_SIGNAL_INFO: 616 case AUDIT_SIGNAL_INFO:
587 sig_data.uid = audit_sig_uid; 617 err = selinux_ctxid_to_string(audit_sig_sid, &ctx, &len);
588 sig_data.pid = audit_sig_pid; 618 if (err)
619 return err;
620 sig_data = kmalloc(sizeof(*sig_data) + len, GFP_KERNEL);
621 if (!sig_data) {
622 kfree(ctx);
623 return -ENOMEM;
624 }
625 sig_data->uid = audit_sig_uid;
626 sig_data->pid = audit_sig_pid;
627 memcpy(sig_data->ctx, ctx, len);
628 kfree(ctx);
589 audit_send_reply(NETLINK_CB(skb).pid, seq, AUDIT_SIGNAL_INFO, 629 audit_send_reply(NETLINK_CB(skb).pid, seq, AUDIT_SIGNAL_INFO,
590 0, 0, &sig_data, sizeof(sig_data)); 630 0, 0, sig_data, sizeof(*sig_data) + len);
631 kfree(sig_data);
591 break; 632 break;
592 default: 633 default:
593 err = -EINVAL; 634 err = -EINVAL;
@@ -629,20 +670,30 @@ static void audit_receive(struct sock *sk, int length)
629 struct sk_buff *skb; 670 struct sk_buff *skb;
630 unsigned int qlen; 671 unsigned int qlen;
631 672
632 mutex_lock(&audit_netlink_mutex); 673 mutex_lock(&audit_cmd_mutex);
633 674
634 for (qlen = skb_queue_len(&sk->sk_receive_queue); qlen; qlen--) { 675 for (qlen = skb_queue_len(&sk->sk_receive_queue); qlen; qlen--) {
635 skb = skb_dequeue(&sk->sk_receive_queue); 676 skb = skb_dequeue(&sk->sk_receive_queue);
636 audit_receive_skb(skb); 677 audit_receive_skb(skb);
637 kfree_skb(skb); 678 kfree_skb(skb);
638 } 679 }
639 mutex_unlock(&audit_netlink_mutex); 680 mutex_unlock(&audit_cmd_mutex);
640} 681}
641 682
683#ifdef CONFIG_AUDITSYSCALL
684static const struct inotify_operations audit_inotify_ops = {
685 .handle_event = audit_handle_ievent,
686 .destroy_watch = audit_free_parent,
687};
688#endif
642 689
643/* Initialize audit support at boot time. */ 690/* Initialize audit support at boot time. */
644static int __init audit_init(void) 691static int __init audit_init(void)
645{ 692{
693#ifdef CONFIG_AUDITSYSCALL
694 int i;
695#endif
696
646 printk(KERN_INFO "audit: initializing netlink socket (%s)\n", 697 printk(KERN_INFO "audit: initializing netlink socket (%s)\n",
647 audit_default ? "enabled" : "disabled"); 698 audit_default ? "enabled" : "disabled");
648 audit_sock = netlink_kernel_create(NETLINK_AUDIT, 0, audit_receive, 699 audit_sock = netlink_kernel_create(NETLINK_AUDIT, 0, audit_receive,
@@ -661,6 +712,16 @@ static int __init audit_init(void)
661 selinux_audit_set_callback(&selinux_audit_rule_update); 712 selinux_audit_set_callback(&selinux_audit_rule_update);
662 713
663 audit_log(NULL, GFP_KERNEL, AUDIT_KERNEL, "initialized"); 714 audit_log(NULL, GFP_KERNEL, AUDIT_KERNEL, "initialized");
715
716#ifdef CONFIG_AUDITSYSCALL
717 audit_ih = inotify_init(&audit_inotify_ops);
718 if (IS_ERR(audit_ih))
719 audit_panic("cannot initialize inotify handle");
720
721 for (i = 0; i < AUDIT_INODE_BUCKETS; i++)
722 INIT_LIST_HEAD(&audit_inode_hash[i]);
723#endif
724
664 return 0; 725 return 0;
665} 726}
666__initcall(audit_init); 727__initcall(audit_init);
@@ -690,10 +751,12 @@ static void audit_buffer_free(struct audit_buffer *ab)
690 kfree_skb(ab->skb); 751 kfree_skb(ab->skb);
691 752
692 spin_lock_irqsave(&audit_freelist_lock, flags); 753 spin_lock_irqsave(&audit_freelist_lock, flags);
693 if (++audit_freelist_count > AUDIT_MAXFREE) 754 if (audit_freelist_count > AUDIT_MAXFREE)
694 kfree(ab); 755 kfree(ab);
695 else 756 else {
757 audit_freelist_count++;
696 list_add(&ab->list, &audit_freelist); 758 list_add(&ab->list, &audit_freelist);
759 }
697 spin_unlock_irqrestore(&audit_freelist_lock, flags); 760 spin_unlock_irqrestore(&audit_freelist_lock, flags);
698} 761}
699 762
@@ -988,28 +1051,76 @@ void audit_log_hex(struct audit_buffer *ab, const unsigned char *buf,
988 skb_put(skb, len << 1); /* new string is twice the old string */ 1051 skb_put(skb, len << 1); /* new string is twice the old string */
989} 1052}
990 1053
1054/*
1055 * Format a string of no more than slen characters into the audit buffer,
1056 * enclosed in quote marks.
1057 */
1058static void audit_log_n_string(struct audit_buffer *ab, size_t slen,
1059 const char *string)
1060{
1061 int avail, new_len;
1062 unsigned char *ptr;
1063 struct sk_buff *skb;
1064
1065 BUG_ON(!ab->skb);
1066 skb = ab->skb;
1067 avail = skb_tailroom(skb);
1068 new_len = slen + 3; /* enclosing quotes + null terminator */
1069 if (new_len > avail) {
1070 avail = audit_expand(ab, new_len);
1071 if (!avail)
1072 return;
1073 }
1074 ptr = skb->tail;
1075 *ptr++ = '"';
1076 memcpy(ptr, string, slen);
1077 ptr += slen;
1078 *ptr++ = '"';
1079 *ptr = 0;
1080 skb_put(skb, slen + 2); /* don't include null terminator */
1081}
1082
991/** 1083/**
992 * audit_log_unstrustedstring - log a string that may contain random characters 1084 * audit_log_n_unstrustedstring - log a string that may contain random characters
993 * @ab: audit_buffer 1085 * @ab: audit_buffer
1086 * @len: lenth of string (not including trailing null)
994 * @string: string to be logged 1087 * @string: string to be logged
995 * 1088 *
996 * This code will escape a string that is passed to it if the string 1089 * This code will escape a string that is passed to it if the string
997 * contains a control character, unprintable character, double quote mark, 1090 * contains a control character, unprintable character, double quote mark,
998 * or a space. Unescaped strings will start and end with a double quote mark. 1091 * or a space. Unescaped strings will start and end with a double quote mark.
999 * Strings that are escaped are printed in hex (2 digits per char). 1092 * Strings that are escaped are printed in hex (2 digits per char).
1093 *
1094 * The caller specifies the number of characters in the string to log, which may
1095 * or may not be the entire string.
1000 */ 1096 */
1001void audit_log_untrustedstring(struct audit_buffer *ab, const char *string) 1097const char *audit_log_n_untrustedstring(struct audit_buffer *ab, size_t len,
1098 const char *string)
1002{ 1099{
1003 const unsigned char *p = string; 1100 const unsigned char *p = string;
1004 1101
1005 while (*p) { 1102 while (*p) {
1006 if (*p == '"' || *p < 0x21 || *p > 0x7f) { 1103 if (*p == '"' || *p < 0x21 || *p > 0x7f) {
1007 audit_log_hex(ab, string, strlen(string)); 1104 audit_log_hex(ab, string, len);
1008 return; 1105 return string + len + 1;
1009 } 1106 }
1010 p++; 1107 p++;
1011 } 1108 }
1012 audit_log_format(ab, "\"%s\"", string); 1109 audit_log_n_string(ab, len, string);
1110 return p + 1;
1111}
1112
1113/**
1114 * audit_log_unstrustedstring - log a string that may contain random characters
1115 * @ab: audit_buffer
1116 * @string: string to be logged
1117 *
1118 * Same as audit_log_n_unstrustedstring(), except that strlen is used to
1119 * determine string length.
1120 */
1121const char *audit_log_untrustedstring(struct audit_buffer *ab, const char *string)
1122{
1123 return audit_log_n_untrustedstring(ab, strlen(string), string);
1013} 1124}
1014 1125
1015/* This is a helper-function to print the escaped d_path */ 1126/* This is a helper-function to print the escaped d_path */
diff --git a/kernel/audit.h b/kernel/audit.h
index 6f733920fd..8323e4132a 100644
--- a/kernel/audit.h
+++ b/kernel/audit.h
@@ -19,9 +19,9 @@
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 */ 20 */
21 21
22#include <linux/mutex.h>
23#include <linux/fs.h> 22#include <linux/fs.h>
24#include <linux/audit.h> 23#include <linux/audit.h>
24#include <linux/skbuff.h>
25 25
26/* 0 = no checking 26/* 0 = no checking
27 1 = put_count checking 27 1 = put_count checking
@@ -53,6 +53,18 @@ enum audit_state {
53}; 53};
54 54
55/* Rule lists */ 55/* Rule lists */
56struct audit_parent;
57
58struct audit_watch {
59 atomic_t count; /* reference count */
60 char *path; /* insertion path */
61 dev_t dev; /* associated superblock device */
62 unsigned long ino; /* associated inode number */
63 struct audit_parent *parent; /* associated parent */
64 struct list_head wlist; /* entry in parent->watches list */
65 struct list_head rules; /* associated rules */
66};
67
56struct audit_field { 68struct audit_field {
57 u32 type; 69 u32 type;
58 u32 val; 70 u32 val;
@@ -70,6 +82,9 @@ struct audit_krule {
70 u32 buflen; /* for data alloc on list rules */ 82 u32 buflen; /* for data alloc on list rules */
71 u32 field_count; 83 u32 field_count;
72 struct audit_field *fields; 84 struct audit_field *fields;
85 struct audit_field *inode_f; /* quick access to an inode field */
86 struct audit_watch *watch; /* associated watch */
87 struct list_head rlist; /* entry in audit_watch.rules list */
73}; 88};
74 89
75struct audit_entry { 90struct audit_entry {
@@ -78,15 +93,53 @@ struct audit_entry {
78 struct audit_krule rule; 93 struct audit_krule rule;
79}; 94};
80 95
81
82extern int audit_pid; 96extern int audit_pid;
83extern int audit_comparator(const u32 left, const u32 op, const u32 right);
84 97
98#define AUDIT_INODE_BUCKETS 32
99extern struct list_head audit_inode_hash[AUDIT_INODE_BUCKETS];
100
101static inline int audit_hash_ino(u32 ino)
102{
103 return (ino & (AUDIT_INODE_BUCKETS-1));
104}
105
106extern int audit_comparator(const u32 left, const u32 op, const u32 right);
107extern int audit_compare_dname_path(const char *dname, const char *path,
108 int *dirlen);
109extern struct sk_buff * audit_make_reply(int pid, int seq, int type,
110 int done, int multi,
111 void *payload, int size);
85extern void audit_send_reply(int pid, int seq, int type, 112extern void audit_send_reply(int pid, int seq, int type,
86 int done, int multi, 113 int done, int multi,
87 void *payload, int size); 114 void *payload, int size);
88extern void audit_log_lost(const char *message); 115extern void audit_log_lost(const char *message);
89extern void audit_panic(const char *message); 116extern void audit_panic(const char *message);
90extern struct mutex audit_netlink_mutex;
91 117
118struct audit_netlink_list {
119 int pid;
120 struct sk_buff_head q;
121};
122
123int audit_send_list(void *);
124
125struct inotify_watch;
126extern void audit_free_parent(struct inotify_watch *);
127extern void audit_handle_ievent(struct inotify_watch *, u32, u32, u32,
128 const char *, struct inode *);
92extern int selinux_audit_rule_update(void); 129extern int selinux_audit_rule_update(void);
130
131#ifdef CONFIG_AUDITSYSCALL
132extern void __audit_signal_info(int sig, struct task_struct *t);
133static inline void audit_signal_info(int sig, struct task_struct *t)
134{
135 if (unlikely(audit_pid && t->tgid == audit_pid))
136 __audit_signal_info(sig, t);
137}
138extern enum audit_state audit_filter_inodes(struct task_struct *,
139 struct audit_context *);
140extern void audit_set_auditable(struct audit_context *);
141#else
142#define audit_signal_info(s,t)
143#define audit_filter_inodes(t,c) AUDIT_DISABLED
144#define audit_set_auditable(c)
145#endif
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index 7c134906d6..4c99d2c586 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -22,13 +22,59 @@
22#include <linux/kernel.h> 22#include <linux/kernel.h>
23#include <linux/audit.h> 23#include <linux/audit.h>
24#include <linux/kthread.h> 24#include <linux/kthread.h>
25#include <linux/mutex.h>
26#include <linux/fs.h>
27#include <linux/namei.h>
25#include <linux/netlink.h> 28#include <linux/netlink.h>
29#include <linux/sched.h>
30#include <linux/inotify.h>
26#include <linux/selinux.h> 31#include <linux/selinux.h>
27#include "audit.h" 32#include "audit.h"
28 33
29/* There are three lists of rules -- one to search at task creation 34/*
30 * time, one to search at syscall entry time, and another to search at 35 * Locking model:
31 * syscall exit time. */ 36 *
37 * audit_filter_mutex:
38 * Synchronizes writes and blocking reads of audit's filterlist
39 * data. Rcu is used to traverse the filterlist and access
40 * contents of structs audit_entry, audit_watch and opaque
41 * selinux rules during filtering. If modified, these structures
42 * must be copied and replace their counterparts in the filterlist.
43 * An audit_parent struct is not accessed during filtering, so may
44 * be written directly provided audit_filter_mutex is held.
45 */
46
47/*
48 * Reference counting:
49 *
50 * audit_parent: lifetime is from audit_init_parent() to receipt of an IN_IGNORED
51 * event. Each audit_watch holds a reference to its associated parent.
52 *
53 * audit_watch: if added to lists, lifetime is from audit_init_watch() to
54 * audit_remove_watch(). Additionally, an audit_watch may exist
55 * temporarily to assist in searching existing filter data. Each
56 * audit_krule holds a reference to its associated watch.
57 */
58
59struct audit_parent {
60 struct list_head ilist; /* entry in inotify registration list */
61 struct list_head watches; /* associated watches */
62 struct inotify_watch wdata; /* inotify watch data */
63 unsigned flags; /* status flags */
64};
65
66/*
67 * audit_parent status flags:
68 *
69 * AUDIT_PARENT_INVALID - set anytime rules/watches are auto-removed due to
70 * a filesystem event to ensure we're adding audit watches to a valid parent.
71 * Technically not needed for IN_DELETE_SELF or IN_UNMOUNT events, as we cannot
72 * receive them while we have nameidata, but must be used for IN_MOVE_SELF which
73 * we can receive while holding nameidata.
74 */
75#define AUDIT_PARENT_INVALID 0x001
76
77/* Audit filter lists, defined in <linux/audit.h> */
32struct list_head audit_filter_list[AUDIT_NR_FILTERS] = { 78struct list_head audit_filter_list[AUDIT_NR_FILTERS] = {
33 LIST_HEAD_INIT(audit_filter_list[0]), 79 LIST_HEAD_INIT(audit_filter_list[0]),
34 LIST_HEAD_INIT(audit_filter_list[1]), 80 LIST_HEAD_INIT(audit_filter_list[1]),
@@ -41,9 +87,53 @@ struct list_head audit_filter_list[AUDIT_NR_FILTERS] = {
41#endif 87#endif
42}; 88};
43 89
90static DEFINE_MUTEX(audit_filter_mutex);
91
92/* Inotify handle */
93extern struct inotify_handle *audit_ih;
94
95/* Inotify events we care about. */
96#define AUDIT_IN_WATCH IN_MOVE|IN_CREATE|IN_DELETE|IN_DELETE_SELF|IN_MOVE_SELF
97
98void audit_free_parent(struct inotify_watch *i_watch)
99{
100 struct audit_parent *parent;
101
102 parent = container_of(i_watch, struct audit_parent, wdata);
103 WARN_ON(!list_empty(&parent->watches));
104 kfree(parent);
105}
106
107static inline void audit_get_watch(struct audit_watch *watch)
108{
109 atomic_inc(&watch->count);
110}
111
112static void audit_put_watch(struct audit_watch *watch)
113{
114 if (atomic_dec_and_test(&watch->count)) {
115 WARN_ON(watch->parent);
116 WARN_ON(!list_empty(&watch->rules));
117 kfree(watch->path);
118 kfree(watch);
119 }
120}
121
122static void audit_remove_watch(struct audit_watch *watch)
123{
124 list_del(&watch->wlist);
125 put_inotify_watch(&watch->parent->wdata);
126 watch->parent = NULL;
127 audit_put_watch(watch); /* match initial get */
128}
129
44static inline void audit_free_rule(struct audit_entry *e) 130static inline void audit_free_rule(struct audit_entry *e)
45{ 131{
46 int i; 132 int i;
133
134 /* some rules don't have associated watches */
135 if (e->rule.watch)
136 audit_put_watch(e->rule.watch);
47 if (e->rule.fields) 137 if (e->rule.fields)
48 for (i = 0; i < e->rule.field_count; i++) { 138 for (i = 0; i < e->rule.field_count; i++) {
49 struct audit_field *f = &e->rule.fields[i]; 139 struct audit_field *f = &e->rule.fields[i];
@@ -60,6 +150,50 @@ static inline void audit_free_rule_rcu(struct rcu_head *head)
60 audit_free_rule(e); 150 audit_free_rule(e);
61} 151}
62 152
153/* Initialize a parent watch entry. */
154static struct audit_parent *audit_init_parent(struct nameidata *ndp)
155{
156 struct audit_parent *parent;
157 s32 wd;
158
159 parent = kzalloc(sizeof(*parent), GFP_KERNEL);
160 if (unlikely(!parent))
161 return ERR_PTR(-ENOMEM);
162
163 INIT_LIST_HEAD(&parent->watches);
164 parent->flags = 0;
165
166 inotify_init_watch(&parent->wdata);
167 /* grab a ref so inotify watch hangs around until we take audit_filter_mutex */
168 get_inotify_watch(&parent->wdata);
169 wd = inotify_add_watch(audit_ih, &parent->wdata, ndp->dentry->d_inode,
170 AUDIT_IN_WATCH);
171 if (wd < 0) {
172 audit_free_parent(&parent->wdata);
173 return ERR_PTR(wd);
174 }
175
176 return parent;
177}
178
179/* Initialize a watch entry. */
180static struct audit_watch *audit_init_watch(char *path)
181{
182 struct audit_watch *watch;
183
184 watch = kzalloc(sizeof(*watch), GFP_KERNEL);
185 if (unlikely(!watch))
186 return ERR_PTR(-ENOMEM);
187
188 INIT_LIST_HEAD(&watch->rules);
189 atomic_set(&watch->count, 1);
190 watch->path = path;
191 watch->dev = (dev_t)-1;
192 watch->ino = (unsigned long)-1;
193
194 return watch;
195}
196
63/* Initialize an audit filterlist entry. */ 197/* Initialize an audit filterlist entry. */
64static inline struct audit_entry *audit_init_entry(u32 field_count) 198static inline struct audit_entry *audit_init_entry(u32 field_count)
65{ 199{
@@ -107,6 +241,43 @@ static char *audit_unpack_string(void **bufp, size_t *remain, size_t len)
107 return str; 241 return str;
108} 242}
109 243
244/* Translate an inode field to kernel respresentation. */
245static inline int audit_to_inode(struct audit_krule *krule,
246 struct audit_field *f)
247{
248 if (krule->listnr != AUDIT_FILTER_EXIT ||
249 krule->watch || krule->inode_f)
250 return -EINVAL;
251
252 krule->inode_f = f;
253 return 0;
254}
255
256/* Translate a watch string to kernel respresentation. */
257static int audit_to_watch(struct audit_krule *krule, char *path, int len,
258 u32 op)
259{
260 struct audit_watch *watch;
261
262 if (!audit_ih)
263 return -EOPNOTSUPP;
264
265 if (path[0] != '/' || path[len-1] == '/' ||
266 krule->listnr != AUDIT_FILTER_EXIT ||
267 op & ~AUDIT_EQUAL ||
268 krule->inode_f || krule->watch) /* 1 inode # per rule, for hash */
269 return -EINVAL;
270
271 watch = audit_init_watch(path);
272 if (unlikely(IS_ERR(watch)))
273 return PTR_ERR(watch);
274
275 audit_get_watch(watch);
276 krule->watch = watch;
277
278 return 0;
279}
280
110/* Common user-space to kernel rule translation. */ 281/* Common user-space to kernel rule translation. */
111static inline struct audit_entry *audit_to_entry_common(struct audit_rule *rule) 282static inline struct audit_entry *audit_to_entry_common(struct audit_rule *rule)
112{ 283{
@@ -128,8 +299,11 @@ static inline struct audit_entry *audit_to_entry_common(struct audit_rule *rule)
128#endif 299#endif
129 ; 300 ;
130 } 301 }
131 if (rule->action != AUDIT_NEVER && rule->action != AUDIT_POSSIBLE && 302 if (unlikely(rule->action == AUDIT_POSSIBLE)) {
132 rule->action != AUDIT_ALWAYS) 303 printk(KERN_ERR "AUDIT_POSSIBLE is deprecated\n");
304 goto exit_err;
305 }
306 if (rule->action != AUDIT_NEVER && rule->action != AUDIT_ALWAYS)
133 goto exit_err; 307 goto exit_err;
134 if (rule->field_count > AUDIT_MAX_FIELDS) 308 if (rule->field_count > AUDIT_MAX_FIELDS)
135 goto exit_err; 309 goto exit_err;
@@ -158,6 +332,7 @@ exit_err:
158static struct audit_entry *audit_rule_to_entry(struct audit_rule *rule) 332static struct audit_entry *audit_rule_to_entry(struct audit_rule *rule)
159{ 333{
160 struct audit_entry *entry; 334 struct audit_entry *entry;
335 struct audit_field *f;
161 int err = 0; 336 int err = 0;
162 int i; 337 int i;
163 338
@@ -172,14 +347,37 @@ static struct audit_entry *audit_rule_to_entry(struct audit_rule *rule)
172 f->type = rule->fields[i] & ~(AUDIT_NEGATE|AUDIT_OPERATORS); 347 f->type = rule->fields[i] & ~(AUDIT_NEGATE|AUDIT_OPERATORS);
173 f->val = rule->values[i]; 348 f->val = rule->values[i];
174 349
175 if (f->type & AUDIT_UNUSED_BITS || 350 err = -EINVAL;
176 f->type == AUDIT_SE_USER || 351 switch(f->type) {
177 f->type == AUDIT_SE_ROLE || 352 default:
178 f->type == AUDIT_SE_TYPE ||
179 f->type == AUDIT_SE_SEN ||
180 f->type == AUDIT_SE_CLR) {
181 err = -EINVAL;
182 goto exit_free; 353 goto exit_free;
354 case AUDIT_PID:
355 case AUDIT_UID:
356 case AUDIT_EUID:
357 case AUDIT_SUID:
358 case AUDIT_FSUID:
359 case AUDIT_GID:
360 case AUDIT_EGID:
361 case AUDIT_SGID:
362 case AUDIT_FSGID:
363 case AUDIT_LOGINUID:
364 case AUDIT_PERS:
365 case AUDIT_ARCH:
366 case AUDIT_MSGTYPE:
367 case AUDIT_DEVMAJOR:
368 case AUDIT_DEVMINOR:
369 case AUDIT_EXIT:
370 case AUDIT_SUCCESS:
371 case AUDIT_ARG0:
372 case AUDIT_ARG1:
373 case AUDIT_ARG2:
374 case AUDIT_ARG3:
375 break;
376 case AUDIT_INODE:
377 err = audit_to_inode(&entry->rule, f);
378 if (err)
379 goto exit_free;
380 break;
183 } 381 }
184 382
185 entry->rule.vers_ops = (f->op & AUDIT_OPERATORS) ? 2 : 1; 383 entry->rule.vers_ops = (f->op & AUDIT_OPERATORS) ? 2 : 1;
@@ -196,6 +394,18 @@ static struct audit_entry *audit_rule_to_entry(struct audit_rule *rule)
196 } 394 }
197 } 395 }
198 396
397 f = entry->rule.inode_f;
398 if (f) {
399 switch(f->op) {
400 case AUDIT_NOT_EQUAL:
401 entry->rule.inode_f = NULL;
402 case AUDIT_EQUAL:
403 break;
404 default:
405 goto exit_free;
406 }
407 }
408
199exit_nofree: 409exit_nofree:
200 return entry; 410 return entry;
201 411
@@ -210,6 +420,7 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
210{ 420{
211 int err = 0; 421 int err = 0;
212 struct audit_entry *entry; 422 struct audit_entry *entry;
423 struct audit_field *f;
213 void *bufp; 424 void *bufp;
214 size_t remain = datasz - sizeof(struct audit_rule_data); 425 size_t remain = datasz - sizeof(struct audit_rule_data);
215 int i; 426 int i;
@@ -235,6 +446,29 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
235 f->se_str = NULL; 446 f->se_str = NULL;
236 f->se_rule = NULL; 447 f->se_rule = NULL;
237 switch(f->type) { 448 switch(f->type) {
449 case AUDIT_PID:
450 case AUDIT_UID:
451 case AUDIT_EUID:
452 case AUDIT_SUID:
453 case AUDIT_FSUID:
454 case AUDIT_GID:
455 case AUDIT_EGID:
456 case AUDIT_SGID:
457 case AUDIT_FSGID:
458 case AUDIT_LOGINUID:
459 case AUDIT_PERS:
460 case AUDIT_ARCH:
461 case AUDIT_MSGTYPE:
462 case AUDIT_PPID:
463 case AUDIT_DEVMAJOR:
464 case AUDIT_DEVMINOR:
465 case AUDIT_EXIT:
466 case AUDIT_SUCCESS:
467 case AUDIT_ARG0:
468 case AUDIT_ARG1:
469 case AUDIT_ARG2:
470 case AUDIT_ARG3:
471 break;
238 case AUDIT_SE_USER: 472 case AUDIT_SE_USER:
239 case AUDIT_SE_ROLE: 473 case AUDIT_SE_ROLE:
240 case AUDIT_SE_TYPE: 474 case AUDIT_SE_TYPE:
@@ -260,6 +494,37 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
260 } else 494 } else
261 f->se_str = str; 495 f->se_str = str;
262 break; 496 break;
497 case AUDIT_WATCH:
498 str = audit_unpack_string(&bufp, &remain, f->val);
499 if (IS_ERR(str))
500 goto exit_free;
501 entry->rule.buflen += f->val;
502
503 err = audit_to_watch(&entry->rule, str, f->val, f->op);
504 if (err) {
505 kfree(str);
506 goto exit_free;
507 }
508 break;
509 case AUDIT_INODE:
510 err = audit_to_inode(&entry->rule, f);
511 if (err)
512 goto exit_free;
513 break;
514 default:
515 goto exit_free;
516 }
517 }
518
519 f = entry->rule.inode_f;
520 if (f) {
521 switch(f->op) {
522 case AUDIT_NOT_EQUAL:
523 entry->rule.inode_f = NULL;
524 case AUDIT_EQUAL:
525 break;
526 default:
527 goto exit_free;
263 } 528 }
264 } 529 }
265 530
@@ -291,7 +556,7 @@ static struct audit_rule *audit_krule_to_rule(struct audit_krule *krule)
291 556
292 rule = kmalloc(sizeof(*rule), GFP_KERNEL); 557 rule = kmalloc(sizeof(*rule), GFP_KERNEL);
293 if (unlikely(!rule)) 558 if (unlikely(!rule))
294 return ERR_PTR(-ENOMEM); 559 return NULL;
295 memset(rule, 0, sizeof(*rule)); 560 memset(rule, 0, sizeof(*rule));
296 561
297 rule->flags = krule->flags | krule->listnr; 562 rule->flags = krule->flags | krule->listnr;
@@ -322,7 +587,7 @@ static struct audit_rule_data *audit_krule_to_data(struct audit_krule *krule)
322 587
323 data = kmalloc(sizeof(*data) + krule->buflen, GFP_KERNEL); 588 data = kmalloc(sizeof(*data) + krule->buflen, GFP_KERNEL);
324 if (unlikely(!data)) 589 if (unlikely(!data))
325 return ERR_PTR(-ENOMEM); 590 return NULL;
326 memset(data, 0, sizeof(*data)); 591 memset(data, 0, sizeof(*data));
327 592
328 data->flags = krule->flags | krule->listnr; 593 data->flags = krule->flags | krule->listnr;
@@ -343,6 +608,10 @@ static struct audit_rule_data *audit_krule_to_data(struct audit_krule *krule)
343 data->buflen += data->values[i] = 608 data->buflen += data->values[i] =
344 audit_pack_string(&bufp, f->se_str); 609 audit_pack_string(&bufp, f->se_str);
345 break; 610 break;
611 case AUDIT_WATCH:
612 data->buflen += data->values[i] =
613 audit_pack_string(&bufp, krule->watch->path);
614 break;
346 default: 615 default:
347 data->values[i] = f->val; 616 data->values[i] = f->val;
348 } 617 }
@@ -378,6 +647,10 @@ static int audit_compare_rule(struct audit_krule *a, struct audit_krule *b)
378 if (strcmp(a->fields[i].se_str, b->fields[i].se_str)) 647 if (strcmp(a->fields[i].se_str, b->fields[i].se_str))
379 return 1; 648 return 1;
380 break; 649 break;
650 case AUDIT_WATCH:
651 if (strcmp(a->watch->path, b->watch->path))
652 return 1;
653 break;
381 default: 654 default:
382 if (a->fields[i].val != b->fields[i].val) 655 if (a->fields[i].val != b->fields[i].val)
383 return 1; 656 return 1;
@@ -391,6 +664,32 @@ static int audit_compare_rule(struct audit_krule *a, struct audit_krule *b)
391 return 0; 664 return 0;
392} 665}
393 666
667/* Duplicate the given audit watch. The new watch's rules list is initialized
668 * to an empty list and wlist is undefined. */
669static struct audit_watch *audit_dupe_watch(struct audit_watch *old)
670{
671 char *path;
672 struct audit_watch *new;
673
674 path = kstrdup(old->path, GFP_KERNEL);
675 if (unlikely(!path))
676 return ERR_PTR(-ENOMEM);
677
678 new = audit_init_watch(path);
679 if (unlikely(IS_ERR(new))) {
680 kfree(path);
681 goto out;
682 }
683
684 new->dev = old->dev;
685 new->ino = old->ino;
686 get_inotify_watch(&old->parent->wdata);
687 new->parent = old->parent;
688
689out:
690 return new;
691}
692
394/* Duplicate selinux field information. The se_rule is opaque, so must be 693/* Duplicate selinux field information. The se_rule is opaque, so must be
395 * re-initialized. */ 694 * re-initialized. */
396static inline int audit_dupe_selinux_field(struct audit_field *df, 695static inline int audit_dupe_selinux_field(struct audit_field *df,
@@ -422,8 +721,11 @@ static inline int audit_dupe_selinux_field(struct audit_field *df,
422/* Duplicate an audit rule. This will be a deep copy with the exception 721/* Duplicate an audit rule. This will be a deep copy with the exception
423 * of the watch - that pointer is carried over. The selinux specific fields 722 * of the watch - that pointer is carried over. The selinux specific fields
424 * will be updated in the copy. The point is to be able to replace the old 723 * will be updated in the copy. The point is to be able to replace the old
425 * rule with the new rule in the filterlist, then free the old rule. */ 724 * rule with the new rule in the filterlist, then free the old rule.
426static struct audit_entry *audit_dupe_rule(struct audit_krule *old) 725 * The rlist element is undefined; list manipulations are handled apart from
726 * the initial copy. */
727static struct audit_entry *audit_dupe_rule(struct audit_krule *old,
728 struct audit_watch *watch)
427{ 729{
428 u32 fcount = old->field_count; 730 u32 fcount = old->field_count;
429 struct audit_entry *entry; 731 struct audit_entry *entry;
@@ -442,6 +744,8 @@ static struct audit_entry *audit_dupe_rule(struct audit_krule *old)
442 for (i = 0; i < AUDIT_BITMASK_SIZE; i++) 744 for (i = 0; i < AUDIT_BITMASK_SIZE; i++)
443 new->mask[i] = old->mask[i]; 745 new->mask[i] = old->mask[i];
444 new->buflen = old->buflen; 746 new->buflen = old->buflen;
747 new->inode_f = old->inode_f;
748 new->watch = NULL;
445 new->field_count = old->field_count; 749 new->field_count = old->field_count;
446 memcpy(new->fields, old->fields, sizeof(struct audit_field) * fcount); 750 memcpy(new->fields, old->fields, sizeof(struct audit_field) * fcount);
447 751
@@ -463,68 +767,409 @@ static struct audit_entry *audit_dupe_rule(struct audit_krule *old)
463 } 767 }
464 } 768 }
465 769
770 if (watch) {
771 audit_get_watch(watch);
772 new->watch = watch;
773 }
774
466 return entry; 775 return entry;
467} 776}
468 777
469/* Add rule to given filterlist if not a duplicate. Protected by 778/* Update inode info in audit rules based on filesystem event. */
470 * audit_netlink_mutex. */ 779static void audit_update_watch(struct audit_parent *parent,
780 const char *dname, dev_t dev,
781 unsigned long ino, unsigned invalidating)
782{
783 struct audit_watch *owatch, *nwatch, *nextw;
784 struct audit_krule *r, *nextr;
785 struct audit_entry *oentry, *nentry;
786 struct audit_buffer *ab;
787
788 mutex_lock(&audit_filter_mutex);
789 list_for_each_entry_safe(owatch, nextw, &parent->watches, wlist) {
790 if (audit_compare_dname_path(dname, owatch->path, NULL))
791 continue;
792
793 /* If the update involves invalidating rules, do the inode-based
794 * filtering now, so we don't omit records. */
795 if (invalidating &&
796 audit_filter_inodes(current, current->audit_context) == AUDIT_RECORD_CONTEXT)
797 audit_set_auditable(current->audit_context);
798
799 nwatch = audit_dupe_watch(owatch);
800 if (unlikely(IS_ERR(nwatch))) {
801 mutex_unlock(&audit_filter_mutex);
802 audit_panic("error updating watch, skipping");
803 return;
804 }
805 nwatch->dev = dev;
806 nwatch->ino = ino;
807
808 list_for_each_entry_safe(r, nextr, &owatch->rules, rlist) {
809
810 oentry = container_of(r, struct audit_entry, rule);
811 list_del(&oentry->rule.rlist);
812 list_del_rcu(&oentry->list);
813
814 nentry = audit_dupe_rule(&oentry->rule, nwatch);
815 if (unlikely(IS_ERR(nentry)))
816 audit_panic("error updating watch, removing");
817 else {
818 int h = audit_hash_ino((u32)ino);
819 list_add(&nentry->rule.rlist, &nwatch->rules);
820 list_add_rcu(&nentry->list, &audit_inode_hash[h]);
821 }
822
823 call_rcu(&oentry->rcu, audit_free_rule_rcu);
824 }
825
826 ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE);
827 audit_log_format(ab, "audit updated rules specifying watch=");
828 audit_log_untrustedstring(ab, owatch->path);
829 audit_log_format(ab, " with dev=%u ino=%lu\n", dev, ino);
830 audit_log_end(ab);
831
832 audit_remove_watch(owatch);
833 goto add_watch_to_parent; /* event applies to a single watch */
834 }
835 mutex_unlock(&audit_filter_mutex);
836 return;
837
838add_watch_to_parent:
839 list_add(&nwatch->wlist, &parent->watches);
840 mutex_unlock(&audit_filter_mutex);
841 return;
842}
843
844/* Remove all watches & rules associated with a parent that is going away. */
845static void audit_remove_parent_watches(struct audit_parent *parent)
846{
847 struct audit_watch *w, *nextw;
848 struct audit_krule *r, *nextr;
849 struct audit_entry *e;
850
851 mutex_lock(&audit_filter_mutex);
852 parent->flags |= AUDIT_PARENT_INVALID;
853 list_for_each_entry_safe(w, nextw, &parent->watches, wlist) {
854 list_for_each_entry_safe(r, nextr, &w->rules, rlist) {
855 e = container_of(r, struct audit_entry, rule);
856 list_del(&r->rlist);
857 list_del_rcu(&e->list);
858 call_rcu(&e->rcu, audit_free_rule_rcu);
859
860 audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
861 "audit implicitly removed rule from list=%d\n",
862 AUDIT_FILTER_EXIT);
863 }
864 audit_remove_watch(w);
865 }
866 mutex_unlock(&audit_filter_mutex);
867}
868
869/* Unregister inotify watches for parents on in_list.
870 * Generates an IN_IGNORED event. */
871static void audit_inotify_unregister(struct list_head *in_list)
872{
873 struct audit_parent *p, *n;
874
875 list_for_each_entry_safe(p, n, in_list, ilist) {
876 list_del(&p->ilist);
877 inotify_rm_watch(audit_ih, &p->wdata);
878 /* the put matching the get in audit_do_del_rule() */
879 put_inotify_watch(&p->wdata);
880 }
881}
882
883/* Find an existing audit rule.
884 * Caller must hold audit_filter_mutex to prevent stale rule data. */
885static struct audit_entry *audit_find_rule(struct audit_entry *entry,
886 struct list_head *list)
887{
888 struct audit_entry *e, *found = NULL;
889 int h;
890
891 if (entry->rule.watch) {
892 /* we don't know the inode number, so must walk entire hash */
893 for (h = 0; h < AUDIT_INODE_BUCKETS; h++) {
894 list = &audit_inode_hash[h];
895 list_for_each_entry(e, list, list)
896 if (!audit_compare_rule(&entry->rule, &e->rule)) {
897 found = e;
898 goto out;
899 }
900 }
901 goto out;
902 }
903
904 list_for_each_entry(e, list, list)
905 if (!audit_compare_rule(&entry->rule, &e->rule)) {
906 found = e;
907 goto out;
908 }
909
910out:
911 return found;
912}
913
914/* Get path information necessary for adding watches. */
915static int audit_get_nd(char *path, struct nameidata **ndp,
916 struct nameidata **ndw)
917{
918 struct nameidata *ndparent, *ndwatch;
919 int err;
920
921 ndparent = kmalloc(sizeof(*ndparent), GFP_KERNEL);
922 if (unlikely(!ndparent))
923 return -ENOMEM;
924
925 ndwatch = kmalloc(sizeof(*ndwatch), GFP_KERNEL);
926 if (unlikely(!ndwatch)) {
927 kfree(ndparent);
928 return -ENOMEM;
929 }
930
931 err = path_lookup(path, LOOKUP_PARENT, ndparent);
932 if (err) {
933 kfree(ndparent);
934 kfree(ndwatch);
935 return err;
936 }
937
938 err = path_lookup(path, 0, ndwatch);
939 if (err) {
940 kfree(ndwatch);
941 ndwatch = NULL;
942 }
943
944 *ndp = ndparent;
945 *ndw = ndwatch;
946
947 return 0;
948}
949
950/* Release resources used for watch path information. */
951static void audit_put_nd(struct nameidata *ndp, struct nameidata *ndw)
952{
953 if (ndp) {
954 path_release(ndp);
955 kfree(ndp);
956 }
957 if (ndw) {
958 path_release(ndw);
959 kfree(ndw);
960 }
961}
962
963/* Associate the given rule with an existing parent inotify_watch.
964 * Caller must hold audit_filter_mutex. */
965static void audit_add_to_parent(struct audit_krule *krule,
966 struct audit_parent *parent)
967{
968 struct audit_watch *w, *watch = krule->watch;
969 int watch_found = 0;
970
971 list_for_each_entry(w, &parent->watches, wlist) {
972 if (strcmp(watch->path, w->path))
973 continue;
974
975 watch_found = 1;
976
977 /* put krule's and initial refs to temporary watch */
978 audit_put_watch(watch);
979 audit_put_watch(watch);
980
981 audit_get_watch(w);
982 krule->watch = watch = w;
983 break;
984 }
985
986 if (!watch_found) {
987 get_inotify_watch(&parent->wdata);
988 watch->parent = parent;
989
990 list_add(&watch->wlist, &parent->watches);
991 }
992 list_add(&krule->rlist, &watch->rules);
993}
994
995/* Find a matching watch entry, or add this one.
996 * Caller must hold audit_filter_mutex. */
997static int audit_add_watch(struct audit_krule *krule, struct nameidata *ndp,
998 struct nameidata *ndw)
999{
1000 struct audit_watch *watch = krule->watch;
1001 struct inotify_watch *i_watch;
1002 struct audit_parent *parent;
1003 int ret = 0;
1004
1005 /* update watch filter fields */
1006 if (ndw) {
1007 watch->dev = ndw->dentry->d_inode->i_sb->s_dev;
1008 watch->ino = ndw->dentry->d_inode->i_ino;
1009 }
1010
1011 /* The audit_filter_mutex must not be held during inotify calls because
1012 * we hold it during inotify event callback processing. If an existing
1013 * inotify watch is found, inotify_find_watch() grabs a reference before
1014 * returning.
1015 */
1016 mutex_unlock(&audit_filter_mutex);
1017
1018 if (inotify_find_watch(audit_ih, ndp->dentry->d_inode, &i_watch) < 0) {
1019 parent = audit_init_parent(ndp);
1020 if (IS_ERR(parent)) {
1021 /* caller expects mutex locked */
1022 mutex_lock(&audit_filter_mutex);
1023 return PTR_ERR(parent);
1024 }
1025 } else
1026 parent = container_of(i_watch, struct audit_parent, wdata);
1027
1028 mutex_lock(&audit_filter_mutex);
1029
1030 /* parent was moved before we took audit_filter_mutex */
1031 if (parent->flags & AUDIT_PARENT_INVALID)
1032 ret = -ENOENT;
1033 else
1034 audit_add_to_parent(krule, parent);
1035
1036 /* match get in audit_init_parent or inotify_find_watch */
1037 put_inotify_watch(&parent->wdata);
1038 return ret;
1039}
1040
1041/* Add rule to given filterlist if not a duplicate. */
471static inline int audit_add_rule(struct audit_entry *entry, 1042static inline int audit_add_rule(struct audit_entry *entry,
472 struct list_head *list) 1043 struct list_head *list)
473{ 1044{
474 struct audit_entry *e; 1045 struct audit_entry *e;
1046 struct audit_field *inode_f = entry->rule.inode_f;
1047 struct audit_watch *watch = entry->rule.watch;
1048 struct nameidata *ndp, *ndw;
1049 int h, err, putnd_needed = 0;
1050
1051 if (inode_f) {
1052 h = audit_hash_ino(inode_f->val);
1053 list = &audit_inode_hash[h];
1054 }
475 1055
476 /* Do not use the _rcu iterator here, since this is the only 1056 mutex_lock(&audit_filter_mutex);
477 * addition routine. */ 1057 e = audit_find_rule(entry, list);
478 list_for_each_entry(e, list, list) { 1058 mutex_unlock(&audit_filter_mutex);
479 if (!audit_compare_rule(&entry->rule, &e->rule)) 1059 if (e) {
480 return -EEXIST; 1060 err = -EEXIST;
1061 goto error;
1062 }
1063
1064 /* Avoid calling path_lookup under audit_filter_mutex. */
1065 if (watch) {
1066 err = audit_get_nd(watch->path, &ndp, &ndw);
1067 if (err)
1068 goto error;
1069 putnd_needed = 1;
1070 }
1071
1072 mutex_lock(&audit_filter_mutex);
1073 if (watch) {
1074 /* audit_filter_mutex is dropped and re-taken during this call */
1075 err = audit_add_watch(&entry->rule, ndp, ndw);
1076 if (err) {
1077 mutex_unlock(&audit_filter_mutex);
1078 goto error;
1079 }
1080 h = audit_hash_ino((u32)watch->ino);
1081 list = &audit_inode_hash[h];
481 } 1082 }
482 1083
483 if (entry->rule.flags & AUDIT_FILTER_PREPEND) { 1084 if (entry->rule.flags & AUDIT_FILTER_PREPEND) {
484 list_add_rcu(&entry->list, list); 1085 list_add_rcu(&entry->list, list);
1086 entry->rule.flags &= ~AUDIT_FILTER_PREPEND;
485 } else { 1087 } else {
486 list_add_tail_rcu(&entry->list, list); 1088 list_add_tail_rcu(&entry->list, list);
487 } 1089 }
1090 mutex_unlock(&audit_filter_mutex);
488 1091
489 return 0; 1092 if (putnd_needed)
1093 audit_put_nd(ndp, ndw);
1094
1095 return 0;
1096
1097error:
1098 if (putnd_needed)
1099 audit_put_nd(ndp, ndw);
1100 if (watch)
1101 audit_put_watch(watch); /* tmp watch, matches initial get */
1102 return err;
490} 1103}
491 1104
492/* Remove an existing rule from filterlist. Protected by 1105/* Remove an existing rule from filterlist. */
493 * audit_netlink_mutex. */
494static inline int audit_del_rule(struct audit_entry *entry, 1106static inline int audit_del_rule(struct audit_entry *entry,
495 struct list_head *list) 1107 struct list_head *list)
496{ 1108{
497 struct audit_entry *e; 1109 struct audit_entry *e;
1110 struct audit_field *inode_f = entry->rule.inode_f;
1111 struct audit_watch *watch, *tmp_watch = entry->rule.watch;
1112 LIST_HEAD(inotify_list);
1113 int h, ret = 0;
1114
1115 if (inode_f) {
1116 h = audit_hash_ino(inode_f->val);
1117 list = &audit_inode_hash[h];
1118 }
498 1119
499 /* Do not use the _rcu iterator here, since this is the only 1120 mutex_lock(&audit_filter_mutex);
500 * deletion routine. */ 1121 e = audit_find_rule(entry, list);
501 list_for_each_entry(e, list, list) { 1122 if (!e) {
502 if (!audit_compare_rule(&entry->rule, &e->rule)) { 1123 mutex_unlock(&audit_filter_mutex);
503 list_del_rcu(&e->list); 1124 ret = -ENOENT;
504 call_rcu(&e->rcu, audit_free_rule_rcu); 1125 goto out;
505 return 0; 1126 }
1127
1128 watch = e->rule.watch;
1129 if (watch) {
1130 struct audit_parent *parent = watch->parent;
1131
1132 list_del(&e->rule.rlist);
1133
1134 if (list_empty(&watch->rules)) {
1135 audit_remove_watch(watch);
1136
1137 if (list_empty(&parent->watches)) {
1138 /* Put parent on the inotify un-registration
1139 * list. Grab a reference before releasing
1140 * audit_filter_mutex, to be released in
1141 * audit_inotify_unregister(). */
1142 list_add(&parent->ilist, &inotify_list);
1143 get_inotify_watch(&parent->wdata);
1144 }
506 } 1145 }
507 } 1146 }
508 return -ENOENT; /* No matching rule */ 1147
1148 list_del_rcu(&e->list);
1149 call_rcu(&e->rcu, audit_free_rule_rcu);
1150
1151 mutex_unlock(&audit_filter_mutex);
1152
1153 if (!list_empty(&inotify_list))
1154 audit_inotify_unregister(&inotify_list);
1155
1156out:
1157 if (tmp_watch)
1158 audit_put_watch(tmp_watch); /* match initial get */
1159
1160 return ret;
509} 1161}
510 1162
511/* List rules using struct audit_rule. Exists for backward 1163/* List rules using struct audit_rule. Exists for backward
512 * compatibility with userspace. */ 1164 * compatibility with userspace. */
513static int audit_list(void *_dest) 1165static void audit_list(int pid, int seq, struct sk_buff_head *q)
514{ 1166{
515 int pid, seq; 1167 struct sk_buff *skb;
516 int *dest = _dest;
517 struct audit_entry *entry; 1168 struct audit_entry *entry;
518 int i; 1169 int i;
519 1170
520 pid = dest[0]; 1171 /* This is a blocking read, so use audit_filter_mutex instead of rcu
521 seq = dest[1]; 1172 * iterator to sync with list writers. */
522 kfree(dest);
523
524 mutex_lock(&audit_netlink_mutex);
525
526 /* The *_rcu iterators not needed here because we are
527 always called with audit_netlink_mutex held. */
528 for (i=0; i<AUDIT_NR_FILTERS; i++) { 1173 for (i=0; i<AUDIT_NR_FILTERS; i++) {
529 list_for_each_entry(entry, &audit_filter_list[i], list) { 1174 list_for_each_entry(entry, &audit_filter_list[i], list) {
530 struct audit_rule *rule; 1175 struct audit_rule *rule;
@@ -532,33 +1177,41 @@ static int audit_list(void *_dest)
532 rule = audit_krule_to_rule(&entry->rule); 1177 rule = audit_krule_to_rule(&entry->rule);
533 if (unlikely(!rule)) 1178 if (unlikely(!rule))
534 break; 1179 break;
535 audit_send_reply(pid, seq, AUDIT_LIST, 0, 1, 1180 skb = audit_make_reply(pid, seq, AUDIT_LIST, 0, 1,
536 rule, sizeof(*rule)); 1181 rule, sizeof(*rule));
1182 if (skb)
1183 skb_queue_tail(q, skb);
537 kfree(rule); 1184 kfree(rule);
538 } 1185 }
539 } 1186 }
540 audit_send_reply(pid, seq, AUDIT_LIST, 1, 1, NULL, 0); 1187 for (i = 0; i < AUDIT_INODE_BUCKETS; i++) {
541 1188 list_for_each_entry(entry, &audit_inode_hash[i], list) {
542 mutex_unlock(&audit_netlink_mutex); 1189 struct audit_rule *rule;
543 return 0; 1190
1191 rule = audit_krule_to_rule(&entry->rule);
1192 if (unlikely(!rule))
1193 break;
1194 skb = audit_make_reply(pid, seq, AUDIT_LIST, 0, 1,
1195 rule, sizeof(*rule));
1196 if (skb)
1197 skb_queue_tail(q, skb);
1198 kfree(rule);
1199 }
1200 }
1201 skb = audit_make_reply(pid, seq, AUDIT_LIST, 1, 1, NULL, 0);
1202 if (skb)
1203 skb_queue_tail(q, skb);
544} 1204}
545 1205
546/* List rules using struct audit_rule_data. */ 1206/* List rules using struct audit_rule_data. */
547static int audit_list_rules(void *_dest) 1207static void audit_list_rules(int pid, int seq, struct sk_buff_head *q)
548{ 1208{
549 int pid, seq; 1209 struct sk_buff *skb;
550 int *dest = _dest;
551 struct audit_entry *e; 1210 struct audit_entry *e;
552 int i; 1211 int i;
553 1212
554 pid = dest[0]; 1213 /* This is a blocking read, so use audit_filter_mutex instead of rcu
555 seq = dest[1]; 1214 * iterator to sync with list writers. */
556 kfree(dest);
557
558 mutex_lock(&audit_netlink_mutex);
559
560 /* The *_rcu iterators not needed here because we are
561 always called with audit_netlink_mutex held. */
562 for (i=0; i<AUDIT_NR_FILTERS; i++) { 1215 for (i=0; i<AUDIT_NR_FILTERS; i++) {
563 list_for_each_entry(e, &audit_filter_list[i], list) { 1216 list_for_each_entry(e, &audit_filter_list[i], list) {
564 struct audit_rule_data *data; 1217 struct audit_rule_data *data;
@@ -566,15 +1219,30 @@ static int audit_list_rules(void *_dest)
566 data = audit_krule_to_data(&e->rule); 1219 data = audit_krule_to_data(&e->rule);
567 if (unlikely(!data)) 1220 if (unlikely(!data))
568 break; 1221 break;
569 audit_send_reply(pid, seq, AUDIT_LIST_RULES, 0, 1, 1222 skb = audit_make_reply(pid, seq, AUDIT_LIST_RULES, 0, 1,
570 data, sizeof(*data)); 1223 data, sizeof(*data) + data->buflen);
1224 if (skb)
1225 skb_queue_tail(q, skb);
571 kfree(data); 1226 kfree(data);
572 } 1227 }
573 } 1228 }
574 audit_send_reply(pid, seq, AUDIT_LIST_RULES, 1, 1, NULL, 0); 1229 for (i=0; i< AUDIT_INODE_BUCKETS; i++) {
1230 list_for_each_entry(e, &audit_inode_hash[i], list) {
1231 struct audit_rule_data *data;
575 1232
576 mutex_unlock(&audit_netlink_mutex); 1233 data = audit_krule_to_data(&e->rule);
577 return 0; 1234 if (unlikely(!data))
1235 break;
1236 skb = audit_make_reply(pid, seq, AUDIT_LIST_RULES, 0, 1,
1237 data, sizeof(*data) + data->buflen);
1238 if (skb)
1239 skb_queue_tail(q, skb);
1240 kfree(data);
1241 }
1242 }
1243 skb = audit_make_reply(pid, seq, AUDIT_LIST_RULES, 1, 1, NULL, 0);
1244 if (skb)
1245 skb_queue_tail(q, skb);
578} 1246}
579 1247
580/** 1248/**
@@ -592,7 +1260,7 @@ int audit_receive_filter(int type, int pid, int uid, int seq, void *data,
592 size_t datasz, uid_t loginuid, u32 sid) 1260 size_t datasz, uid_t loginuid, u32 sid)
593{ 1261{
594 struct task_struct *tsk; 1262 struct task_struct *tsk;
595 int *dest; 1263 struct audit_netlink_list *dest;
596 int err = 0; 1264 int err = 0;
597 struct audit_entry *entry; 1265 struct audit_entry *entry;
598 1266
@@ -605,18 +1273,22 @@ int audit_receive_filter(int type, int pid, int uid, int seq, void *data,
605 * happen if we're actually running in the context of auditctl 1273 * happen if we're actually running in the context of auditctl
606 * trying to _send_ the stuff */ 1274 * trying to _send_ the stuff */
607 1275
608 dest = kmalloc(2 * sizeof(int), GFP_KERNEL); 1276 dest = kmalloc(sizeof(struct audit_netlink_list), GFP_KERNEL);
609 if (!dest) 1277 if (!dest)
610 return -ENOMEM; 1278 return -ENOMEM;
611 dest[0] = pid; 1279 dest->pid = pid;
612 dest[1] = seq; 1280 skb_queue_head_init(&dest->q);
613 1281
1282 mutex_lock(&audit_filter_mutex);
614 if (type == AUDIT_LIST) 1283 if (type == AUDIT_LIST)
615 tsk = kthread_run(audit_list, dest, "audit_list"); 1284 audit_list(pid, seq, &dest->q);
616 else 1285 else
617 tsk = kthread_run(audit_list_rules, dest, 1286 audit_list_rules(pid, seq, &dest->q);
618 "audit_list_rules"); 1287 mutex_unlock(&audit_filter_mutex);
1288
1289 tsk = kthread_run(audit_send_list, dest, "audit_send_list");
619 if (IS_ERR(tsk)) { 1290 if (IS_ERR(tsk)) {
1291 skb_queue_purge(&dest->q);
620 kfree(dest); 1292 kfree(dest);
621 err = PTR_ERR(tsk); 1293 err = PTR_ERR(tsk);
622 } 1294 }
@@ -632,6 +1304,7 @@ int audit_receive_filter(int type, int pid, int uid, int seq, void *data,
632 1304
633 err = audit_add_rule(entry, 1305 err = audit_add_rule(entry,
634 &audit_filter_list[entry->rule.listnr]); 1306 &audit_filter_list[entry->rule.listnr]);
1307
635 if (sid) { 1308 if (sid) {
636 char *ctx = NULL; 1309 char *ctx = NULL;
637 u32 len; 1310 u32 len;
@@ -712,7 +1385,43 @@ int audit_comparator(const u32 left, const u32 op, const u32 right)
712 return 0; 1385 return 0;
713} 1386}
714 1387
1388/* Compare given dentry name with last component in given path,
1389 * return of 0 indicates a match. */
1390int audit_compare_dname_path(const char *dname, const char *path,
1391 int *dirlen)
1392{
1393 int dlen, plen;
1394 const char *p;
715 1395
1396 if (!dname || !path)
1397 return 1;
1398
1399 dlen = strlen(dname);
1400 plen = strlen(path);
1401 if (plen < dlen)
1402 return 1;
1403
1404 /* disregard trailing slashes */
1405 p = path + plen - 1;
1406 while ((*p == '/') && (p > path))
1407 p--;
1408
1409 /* find last path component */
1410 p = p - dlen + 1;
1411 if (p < path)
1412 return 1;
1413 else if (p > path) {
1414 if (*--p != '/')
1415 return 1;
1416 else
1417 p++;
1418 }
1419
1420 /* return length of path's directory component */
1421 if (dirlen)
1422 *dirlen = p - path;
1423 return strncmp(p, dname, dlen);
1424}
716 1425
717static int audit_filter_user_rules(struct netlink_skb_parms *cb, 1426static int audit_filter_user_rules(struct netlink_skb_parms *cb,
718 struct audit_krule *rule, 1427 struct audit_krule *rule,
@@ -744,7 +1453,6 @@ static int audit_filter_user_rules(struct netlink_skb_parms *cb,
744 } 1453 }
745 switch (rule->action) { 1454 switch (rule->action) {
746 case AUDIT_NEVER: *state = AUDIT_DISABLED; break; 1455 case AUDIT_NEVER: *state = AUDIT_DISABLED; break;
747 case AUDIT_POSSIBLE: *state = AUDIT_BUILD_CONTEXT; break;
748 case AUDIT_ALWAYS: *state = AUDIT_RECORD_CONTEXT; break; 1456 case AUDIT_ALWAYS: *state = AUDIT_RECORD_CONTEXT; break;
749 } 1457 }
750 return 1; 1458 return 1;
@@ -826,32 +1534,65 @@ static inline int audit_rule_has_selinux(struct audit_krule *rule)
826int selinux_audit_rule_update(void) 1534int selinux_audit_rule_update(void)
827{ 1535{
828 struct audit_entry *entry, *n, *nentry; 1536 struct audit_entry *entry, *n, *nentry;
1537 struct audit_watch *watch;
829 int i, err = 0; 1538 int i, err = 0;
830 1539
831 /* audit_netlink_mutex synchronizes the writers */ 1540 /* audit_filter_mutex synchronizes the writers */
832 mutex_lock(&audit_netlink_mutex); 1541 mutex_lock(&audit_filter_mutex);
833 1542
834 for (i = 0; i < AUDIT_NR_FILTERS; i++) { 1543 for (i = 0; i < AUDIT_NR_FILTERS; i++) {
835 list_for_each_entry_safe(entry, n, &audit_filter_list[i], list) { 1544 list_for_each_entry_safe(entry, n, &audit_filter_list[i], list) {
836 if (!audit_rule_has_selinux(&entry->rule)) 1545 if (!audit_rule_has_selinux(&entry->rule))
837 continue; 1546 continue;
838 1547
839 nentry = audit_dupe_rule(&entry->rule); 1548 watch = entry->rule.watch;
1549 nentry = audit_dupe_rule(&entry->rule, watch);
840 if (unlikely(IS_ERR(nentry))) { 1550 if (unlikely(IS_ERR(nentry))) {
841 /* save the first error encountered for the 1551 /* save the first error encountered for the
842 * return value */ 1552 * return value */
843 if (!err) 1553 if (!err)
844 err = PTR_ERR(nentry); 1554 err = PTR_ERR(nentry);
845 audit_panic("error updating selinux filters"); 1555 audit_panic("error updating selinux filters");
1556 if (watch)
1557 list_del(&entry->rule.rlist);
846 list_del_rcu(&entry->list); 1558 list_del_rcu(&entry->list);
847 } else { 1559 } else {
1560 if (watch) {
1561 list_add(&nentry->rule.rlist,
1562 &watch->rules);
1563 list_del(&entry->rule.rlist);
1564 }
848 list_replace_rcu(&entry->list, &nentry->list); 1565 list_replace_rcu(&entry->list, &nentry->list);
849 } 1566 }
850 call_rcu(&entry->rcu, audit_free_rule_rcu); 1567 call_rcu(&entry->rcu, audit_free_rule_rcu);
851 } 1568 }
852 } 1569 }
853 1570
854 mutex_unlock(&audit_netlink_mutex); 1571 mutex_unlock(&audit_filter_mutex);
855 1572
856 return err; 1573 return err;
857} 1574}
1575
1576/* Update watch data in audit rules based on inotify events. */
1577void audit_handle_ievent(struct inotify_watch *i_watch, u32 wd, u32 mask,
1578 u32 cookie, const char *dname, struct inode *inode)
1579{
1580 struct audit_parent *parent;
1581
1582 parent = container_of(i_watch, struct audit_parent, wdata);
1583
1584 if (mask & (IN_CREATE|IN_MOVED_TO) && inode)
1585 audit_update_watch(parent, dname, inode->i_sb->s_dev,
1586 inode->i_ino, 0);
1587 else if (mask & (IN_DELETE|IN_MOVED_FROM))
1588 audit_update_watch(parent, dname, (dev_t)-1, (unsigned long)-1, 1);
1589 /* inotify automatically removes the watch and sends IN_IGNORED */
1590 else if (mask & (IN_DELETE_SELF|IN_UNMOUNT))
1591 audit_remove_parent_watches(parent);
1592 /* inotify does not remove the watch, so remove it manually */
1593 else if(mask & IN_MOVE_SELF) {
1594 audit_remove_parent_watches(parent);
1595 inotify_remove_watch_locked(audit_ih, i_watch);
1596 } else if (mask & IN_IGNORED)
1597 put_inotify_watch(i_watch);
1598}
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 1c03a4ed1b..9ebd96fda2 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -3,7 +3,7 @@
3 * 3 *
4 * Copyright 2003-2004 Red Hat Inc., Durham, North Carolina. 4 * Copyright 2003-2004 Red Hat Inc., Durham, North Carolina.
5 * Copyright 2005 Hewlett-Packard Development Company, L.P. 5 * Copyright 2005 Hewlett-Packard Development Company, L.P.
6 * Copyright (C) 2005 IBM Corporation 6 * Copyright (C) 2005, 2006 IBM Corporation
7 * All Rights Reserved. 7 * All Rights Reserved.
8 * 8 *
9 * This program is free software; you can redistribute it and/or modify 9 * This program is free software; you can redistribute it and/or modify
@@ -29,6 +29,9 @@
29 * this file -- see entry.S) is based on a GPL'd patch written by 29 * this file -- see entry.S) is based on a GPL'd patch written by
30 * okir@suse.de and Copyright 2003 SuSE Linux AG. 30 * okir@suse.de and Copyright 2003 SuSE Linux AG.
31 * 31 *
32 * POSIX message queue support added by George Wilson <ltcgcw@us.ibm.com>,
33 * 2006.
34 *
32 * The support of additional filter rules compares (>, <, >=, <=) was 35 * The support of additional filter rules compares (>, <, >=, <=) was
33 * added by Dustin Kirkland <dustin.kirkland@us.ibm.com>, 2005. 36 * added by Dustin Kirkland <dustin.kirkland@us.ibm.com>, 2005.
34 * 37 *
@@ -49,6 +52,7 @@
49#include <linux/module.h> 52#include <linux/module.h>
50#include <linux/mount.h> 53#include <linux/mount.h>
51#include <linux/socket.h> 54#include <linux/socket.h>
55#include <linux/mqueue.h>
52#include <linux/audit.h> 56#include <linux/audit.h>
53#include <linux/personality.h> 57#include <linux/personality.h>
54#include <linux/time.h> 58#include <linux/time.h>
@@ -59,6 +63,8 @@
59#include <linux/list.h> 63#include <linux/list.h>
60#include <linux/tty.h> 64#include <linux/tty.h>
61#include <linux/selinux.h> 65#include <linux/selinux.h>
66#include <linux/binfmts.h>
67#include <linux/syscalls.h>
62 68
63#include "audit.h" 69#include "audit.h"
64 70
@@ -76,6 +82,9 @@ extern int audit_enabled;
76 * path_lookup. */ 82 * path_lookup. */
77#define AUDIT_NAMES_RESERVED 7 83#define AUDIT_NAMES_RESERVED 7
78 84
85/* Indicates that audit should log the full pathname. */
86#define AUDIT_NAME_FULL -1
87
79/* When fs/namei.c:getname() is called, we store the pointer in name and 88/* When fs/namei.c:getname() is called, we store the pointer in name and
80 * we don't let putname() free it (instead we free all of the saved 89 * we don't let putname() free it (instead we free all of the saved
81 * pointers at syscall exit time). 90 * pointers at syscall exit time).
@@ -83,8 +92,9 @@ extern int audit_enabled;
83 * Further, in fs/namei.c:path_lookup() we store the inode and device. */ 92 * Further, in fs/namei.c:path_lookup() we store the inode and device. */
84struct audit_names { 93struct audit_names {
85 const char *name; 94 const char *name;
95 int name_len; /* number of name's characters to log */
96 unsigned name_put; /* call __putname() for this name */
86 unsigned long ino; 97 unsigned long ino;
87 unsigned long pino;
88 dev_t dev; 98 dev_t dev;
89 umode_t mode; 99 umode_t mode;
90 uid_t uid; 100 uid_t uid;
@@ -100,6 +110,33 @@ struct audit_aux_data {
100 110
101#define AUDIT_AUX_IPCPERM 0 111#define AUDIT_AUX_IPCPERM 0
102 112
113struct audit_aux_data_mq_open {
114 struct audit_aux_data d;
115 int oflag;
116 mode_t mode;
117 struct mq_attr attr;
118};
119
120struct audit_aux_data_mq_sendrecv {
121 struct audit_aux_data d;
122 mqd_t mqdes;
123 size_t msg_len;
124 unsigned int msg_prio;
125 struct timespec abs_timeout;
126};
127
128struct audit_aux_data_mq_notify {
129 struct audit_aux_data d;
130 mqd_t mqdes;
131 struct sigevent notification;
132};
133
134struct audit_aux_data_mq_getsetattr {
135 struct audit_aux_data d;
136 mqd_t mqdes;
137 struct mq_attr mqstat;
138};
139
103struct audit_aux_data_ipcctl { 140struct audit_aux_data_ipcctl {
104 struct audit_aux_data d; 141 struct audit_aux_data d;
105 struct ipc_perm p; 142 struct ipc_perm p;
@@ -110,6 +147,13 @@ struct audit_aux_data_ipcctl {
110 u32 osid; 147 u32 osid;
111}; 148};
112 149
150struct audit_aux_data_execve {
151 struct audit_aux_data d;
152 int argc;
153 int envc;
154 char mem[0];
155};
156
113struct audit_aux_data_socketcall { 157struct audit_aux_data_socketcall {
114 struct audit_aux_data d; 158 struct audit_aux_data d;
115 int nargs; 159 int nargs;
@@ -148,7 +192,7 @@ struct audit_context {
148 struct audit_aux_data *aux; 192 struct audit_aux_data *aux;
149 193
150 /* Save things to print about task_struct */ 194 /* Save things to print about task_struct */
151 pid_t pid; 195 pid_t pid, ppid;
152 uid_t uid, euid, suid, fsuid; 196 uid_t uid, euid, suid, fsuid;
153 gid_t gid, egid, sgid, fsgid; 197 gid_t gid, egid, sgid, fsgid;
154 unsigned long personality; 198 unsigned long personality;
@@ -160,12 +204,13 @@ struct audit_context {
160#endif 204#endif
161}; 205};
162 206
163 207/* Determine if any context name data matches a rule's watch data */
164/* Compare a task_struct with an audit_rule. Return 1 on match, 0 208/* Compare a task_struct with an audit_rule. Return 1 on match, 0
165 * otherwise. */ 209 * otherwise. */
166static int audit_filter_rules(struct task_struct *tsk, 210static int audit_filter_rules(struct task_struct *tsk,
167 struct audit_krule *rule, 211 struct audit_krule *rule,
168 struct audit_context *ctx, 212 struct audit_context *ctx,
213 struct audit_names *name,
169 enum audit_state *state) 214 enum audit_state *state)
170{ 215{
171 int i, j, need_sid = 1; 216 int i, j, need_sid = 1;
@@ -179,6 +224,10 @@ static int audit_filter_rules(struct task_struct *tsk,
179 case AUDIT_PID: 224 case AUDIT_PID:
180 result = audit_comparator(tsk->pid, f->op, f->val); 225 result = audit_comparator(tsk->pid, f->op, f->val);
181 break; 226 break;
227 case AUDIT_PPID:
228 if (ctx)
229 result = audit_comparator(ctx->ppid, f->op, f->val);
230 break;
182 case AUDIT_UID: 231 case AUDIT_UID:
183 result = audit_comparator(tsk->uid, f->op, f->val); 232 result = audit_comparator(tsk->uid, f->op, f->val);
184 break; 233 break;
@@ -224,7 +273,10 @@ static int audit_filter_rules(struct task_struct *tsk,
224 } 273 }
225 break; 274 break;
226 case AUDIT_DEVMAJOR: 275 case AUDIT_DEVMAJOR:
227 if (ctx) { 276 if (name)
277 result = audit_comparator(MAJOR(name->dev),
278 f->op, f->val);
279 else if (ctx) {
228 for (j = 0; j < ctx->name_count; j++) { 280 for (j = 0; j < ctx->name_count; j++) {
229 if (audit_comparator(MAJOR(ctx->names[j].dev), f->op, f->val)) { 281 if (audit_comparator(MAJOR(ctx->names[j].dev), f->op, f->val)) {
230 ++result; 282 ++result;
@@ -234,7 +286,10 @@ static int audit_filter_rules(struct task_struct *tsk,
234 } 286 }
235 break; 287 break;
236 case AUDIT_DEVMINOR: 288 case AUDIT_DEVMINOR:
237 if (ctx) { 289 if (name)
290 result = audit_comparator(MINOR(name->dev),
291 f->op, f->val);
292 else if (ctx) {
238 for (j = 0; j < ctx->name_count; j++) { 293 for (j = 0; j < ctx->name_count; j++) {
239 if (audit_comparator(MINOR(ctx->names[j].dev), f->op, f->val)) { 294 if (audit_comparator(MINOR(ctx->names[j].dev), f->op, f->val)) {
240 ++result; 295 ++result;
@@ -244,16 +299,22 @@ static int audit_filter_rules(struct task_struct *tsk,
244 } 299 }
245 break; 300 break;
246 case AUDIT_INODE: 301 case AUDIT_INODE:
247 if (ctx) { 302 if (name)
303 result = (name->ino == f->val);
304 else if (ctx) {
248 for (j = 0; j < ctx->name_count; j++) { 305 for (j = 0; j < ctx->name_count; j++) {
249 if (audit_comparator(ctx->names[j].ino, f->op, f->val) || 306 if (audit_comparator(ctx->names[j].ino, f->op, f->val)) {
250 audit_comparator(ctx->names[j].pino, f->op, f->val)) {
251 ++result; 307 ++result;
252 break; 308 break;
253 } 309 }
254 } 310 }
255 } 311 }
256 break; 312 break;
313 case AUDIT_WATCH:
314 if (name && rule->watch->ino != (unsigned long)-1)
315 result = (name->dev == rule->watch->dev &&
316 name->ino == rule->watch->ino);
317 break;
257 case AUDIT_LOGINUID: 318 case AUDIT_LOGINUID:
258 result = 0; 319 result = 0;
259 if (ctx) 320 if (ctx)
@@ -294,7 +355,6 @@ static int audit_filter_rules(struct task_struct *tsk,
294 } 355 }
295 switch (rule->action) { 356 switch (rule->action) {
296 case AUDIT_NEVER: *state = AUDIT_DISABLED; break; 357 case AUDIT_NEVER: *state = AUDIT_DISABLED; break;
297 case AUDIT_POSSIBLE: *state = AUDIT_BUILD_CONTEXT; break;
298 case AUDIT_ALWAYS: *state = AUDIT_RECORD_CONTEXT; break; 358 case AUDIT_ALWAYS: *state = AUDIT_RECORD_CONTEXT; break;
299 } 359 }
300 return 1; 360 return 1;
@@ -311,7 +371,7 @@ static enum audit_state audit_filter_task(struct task_struct *tsk)
311 371
312 rcu_read_lock(); 372 rcu_read_lock();
313 list_for_each_entry_rcu(e, &audit_filter_list[AUDIT_FILTER_TASK], list) { 373 list_for_each_entry_rcu(e, &audit_filter_list[AUDIT_FILTER_TASK], list) {
314 if (audit_filter_rules(tsk, &e->rule, NULL, &state)) { 374 if (audit_filter_rules(tsk, &e->rule, NULL, NULL, &state)) {
315 rcu_read_unlock(); 375 rcu_read_unlock();
316 return state; 376 return state;
317 } 377 }
@@ -341,8 +401,47 @@ static enum audit_state audit_filter_syscall(struct task_struct *tsk,
341 int bit = AUDIT_BIT(ctx->major); 401 int bit = AUDIT_BIT(ctx->major);
342 402
343 list_for_each_entry_rcu(e, list, list) { 403 list_for_each_entry_rcu(e, list, list) {
344 if ((e->rule.mask[word] & bit) == bit 404 if ((e->rule.mask[word] & bit) == bit &&
345 && audit_filter_rules(tsk, &e->rule, ctx, &state)) { 405 audit_filter_rules(tsk, &e->rule, ctx, NULL,
406 &state)) {
407 rcu_read_unlock();
408 return state;
409 }
410 }
411 }
412 rcu_read_unlock();
413 return AUDIT_BUILD_CONTEXT;
414}
415
416/* At syscall exit time, this filter is called if any audit_names[] have been
417 * collected during syscall processing. We only check rules in sublists at hash
418 * buckets applicable to the inode numbers in audit_names[].
419 * Regarding audit_state, same rules apply as for audit_filter_syscall().
420 */
421enum audit_state audit_filter_inodes(struct task_struct *tsk,
422 struct audit_context *ctx)
423{
424 int i;
425 struct audit_entry *e;
426 enum audit_state state;
427
428 if (audit_pid && tsk->tgid == audit_pid)
429 return AUDIT_DISABLED;
430
431 rcu_read_lock();
432 for (i = 0; i < ctx->name_count; i++) {
433 int word = AUDIT_WORD(ctx->major);
434 int bit = AUDIT_BIT(ctx->major);
435 struct audit_names *n = &ctx->names[i];
436 int h = audit_hash_ino((u32)n->ino);
437 struct list_head *list = &audit_inode_hash[h];
438
439 if (list_empty(list))
440 continue;
441
442 list_for_each_entry_rcu(e, list, list) {
443 if ((e->rule.mask[word] & bit) == bit &&
444 audit_filter_rules(tsk, &e->rule, ctx, n, &state)) {
346 rcu_read_unlock(); 445 rcu_read_unlock();
347 return state; 446 return state;
348 } 447 }
@@ -352,6 +451,11 @@ static enum audit_state audit_filter_syscall(struct task_struct *tsk,
352 return AUDIT_BUILD_CONTEXT; 451 return AUDIT_BUILD_CONTEXT;
353} 452}
354 453
454void audit_set_auditable(struct audit_context *ctx)
455{
456 ctx->auditable = 1;
457}
458
355static inline struct audit_context *audit_get_context(struct task_struct *tsk, 459static inline struct audit_context *audit_get_context(struct task_struct *tsk,
356 int return_valid, 460 int return_valid,
357 int return_code) 461 int return_code)
@@ -365,12 +469,22 @@ static inline struct audit_context *audit_get_context(struct task_struct *tsk,
365 469
366 if (context->in_syscall && !context->auditable) { 470 if (context->in_syscall && !context->auditable) {
367 enum audit_state state; 471 enum audit_state state;
472
368 state = audit_filter_syscall(tsk, context, &audit_filter_list[AUDIT_FILTER_EXIT]); 473 state = audit_filter_syscall(tsk, context, &audit_filter_list[AUDIT_FILTER_EXIT]);
474 if (state == AUDIT_RECORD_CONTEXT) {
475 context->auditable = 1;
476 goto get_context;
477 }
478
479 state = audit_filter_inodes(tsk, context);
369 if (state == AUDIT_RECORD_CONTEXT) 480 if (state == AUDIT_RECORD_CONTEXT)
370 context->auditable = 1; 481 context->auditable = 1;
482
371 } 483 }
372 484
485get_context:
373 context->pid = tsk->pid; 486 context->pid = tsk->pid;
487 context->ppid = sys_getppid(); /* sic. tsk == current in all cases */
374 context->uid = tsk->uid; 488 context->uid = tsk->uid;
375 context->gid = tsk->gid; 489 context->gid = tsk->gid;
376 context->euid = tsk->euid; 490 context->euid = tsk->euid;
@@ -413,7 +527,7 @@ static inline void audit_free_names(struct audit_context *context)
413#endif 527#endif
414 528
415 for (i = 0; i < context->name_count; i++) { 529 for (i = 0; i < context->name_count; i++) {
416 if (context->names[i].name) 530 if (context->names[i].name && context->names[i].name_put)
417 __putname(context->names[i].name); 531 __putname(context->names[i].name);
418 } 532 }
419 context->name_count = 0; 533 context->name_count = 0;
@@ -606,7 +720,7 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
606 tty = "(none)"; 720 tty = "(none)";
607 audit_log_format(ab, 721 audit_log_format(ab,
608 " a0=%lx a1=%lx a2=%lx a3=%lx items=%d" 722 " a0=%lx a1=%lx a2=%lx a3=%lx items=%d"
609 " pid=%d auid=%u uid=%u gid=%u" 723 " ppid=%d pid=%d auid=%u uid=%u gid=%u"
610 " euid=%u suid=%u fsuid=%u" 724 " euid=%u suid=%u fsuid=%u"
611 " egid=%u sgid=%u fsgid=%u tty=%s", 725 " egid=%u sgid=%u fsgid=%u tty=%s",
612 context->argv[0], 726 context->argv[0],
@@ -614,6 +728,7 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
614 context->argv[2], 728 context->argv[2],
615 context->argv[3], 729 context->argv[3],
616 context->name_count, 730 context->name_count,
731 context->ppid,
617 context->pid, 732 context->pid,
618 context->loginuid, 733 context->loginuid,
619 context->uid, 734 context->uid,
@@ -630,11 +745,48 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
630 continue; /* audit_panic has been called */ 745 continue; /* audit_panic has been called */
631 746
632 switch (aux->type) { 747 switch (aux->type) {
748 case AUDIT_MQ_OPEN: {
749 struct audit_aux_data_mq_open *axi = (void *)aux;
750 audit_log_format(ab,
751 "oflag=0x%x mode=%#o mq_flags=0x%lx mq_maxmsg=%ld "
752 "mq_msgsize=%ld mq_curmsgs=%ld",
753 axi->oflag, axi->mode, axi->attr.mq_flags,
754 axi->attr.mq_maxmsg, axi->attr.mq_msgsize,
755 axi->attr.mq_curmsgs);
756 break; }
757
758 case AUDIT_MQ_SENDRECV: {
759 struct audit_aux_data_mq_sendrecv *axi = (void *)aux;
760 audit_log_format(ab,
761 "mqdes=%d msg_len=%zd msg_prio=%u "
762 "abs_timeout_sec=%ld abs_timeout_nsec=%ld",
763 axi->mqdes, axi->msg_len, axi->msg_prio,
764 axi->abs_timeout.tv_sec, axi->abs_timeout.tv_nsec);
765 break; }
766
767 case AUDIT_MQ_NOTIFY: {
768 struct audit_aux_data_mq_notify *axi = (void *)aux;
769 audit_log_format(ab,
770 "mqdes=%d sigev_signo=%d",
771 axi->mqdes,
772 axi->notification.sigev_signo);
773 break; }
774
775 case AUDIT_MQ_GETSETATTR: {
776 struct audit_aux_data_mq_getsetattr *axi = (void *)aux;
777 audit_log_format(ab,
778 "mqdes=%d mq_flags=0x%lx mq_maxmsg=%ld mq_msgsize=%ld "
779 "mq_curmsgs=%ld ",
780 axi->mqdes,
781 axi->mqstat.mq_flags, axi->mqstat.mq_maxmsg,
782 axi->mqstat.mq_msgsize, axi->mqstat.mq_curmsgs);
783 break; }
784
633 case AUDIT_IPC: { 785 case AUDIT_IPC: {
634 struct audit_aux_data_ipcctl *axi = (void *)aux; 786 struct audit_aux_data_ipcctl *axi = (void *)aux;
635 audit_log_format(ab, 787 audit_log_format(ab,
636 " qbytes=%lx iuid=%u igid=%u mode=%x", 788 "ouid=%u ogid=%u mode=%x",
637 axi->qbytes, axi->uid, axi->gid, axi->mode); 789 axi->uid, axi->gid, axi->mode);
638 if (axi->osid != 0) { 790 if (axi->osid != 0) {
639 char *ctx = NULL; 791 char *ctx = NULL;
640 u32 len; 792 u32 len;
@@ -652,19 +804,18 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
652 case AUDIT_IPC_SET_PERM: { 804 case AUDIT_IPC_SET_PERM: {
653 struct audit_aux_data_ipcctl *axi = (void *)aux; 805 struct audit_aux_data_ipcctl *axi = (void *)aux;
654 audit_log_format(ab, 806 audit_log_format(ab,
655 " new qbytes=%lx new iuid=%u new igid=%u new mode=%x", 807 "qbytes=%lx ouid=%u ogid=%u mode=%x",
656 axi->qbytes, axi->uid, axi->gid, axi->mode); 808 axi->qbytes, axi->uid, axi->gid, axi->mode);
657 if (axi->osid != 0) { 809 break; }
658 char *ctx = NULL; 810
659 u32 len; 811 case AUDIT_EXECVE: {
660 if (selinux_ctxid_to_string( 812 struct audit_aux_data_execve *axi = (void *)aux;
661 axi->osid, &ctx, &len)) { 813 int i;
662 audit_log_format(ab, " osid=%u", 814 const char *p;
663 axi->osid); 815 for (i = 0, p = axi->mem; i < axi->argc; i++) {
664 call_panic = 1; 816 audit_log_format(ab, "a%d=", i);
665 } else 817 p = audit_log_untrustedstring(ab, p);
666 audit_log_format(ab, " obj=%s", ctx); 818 audit_log_format(ab, "\n");
667 kfree(ctx);
668 } 819 }
669 break; } 820 break; }
670 821
@@ -700,8 +851,7 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
700 } 851 }
701 } 852 }
702 for (i = 0; i < context->name_count; i++) { 853 for (i = 0; i < context->name_count; i++) {
703 unsigned long ino = context->names[i].ino; 854 struct audit_names *n = &context->names[i];
704 unsigned long pino = context->names[i].pino;
705 855
706 ab = audit_log_start(context, GFP_KERNEL, AUDIT_PATH); 856 ab = audit_log_start(context, GFP_KERNEL, AUDIT_PATH);
707 if (!ab) 857 if (!ab)
@@ -709,33 +859,47 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
709 859
710 audit_log_format(ab, "item=%d", i); 860 audit_log_format(ab, "item=%d", i);
711 861
712 audit_log_format(ab, " name="); 862 if (n->name) {
713 if (context->names[i].name) 863 switch(n->name_len) {
714 audit_log_untrustedstring(ab, context->names[i].name); 864 case AUDIT_NAME_FULL:
715 else 865 /* log the full path */
716 audit_log_format(ab, "(null)"); 866 audit_log_format(ab, " name=");
717 867 audit_log_untrustedstring(ab, n->name);
718 if (pino != (unsigned long)-1) 868 break;
719 audit_log_format(ab, " parent=%lu", pino); 869 case 0:
720 if (ino != (unsigned long)-1) 870 /* name was specified as a relative path and the
721 audit_log_format(ab, " inode=%lu", ino); 871 * directory component is the cwd */
722 if ((pino != (unsigned long)-1) || (ino != (unsigned long)-1)) 872 audit_log_d_path(ab, " name=", context->pwd,
723 audit_log_format(ab, " dev=%02x:%02x mode=%#o" 873 context->pwdmnt);
724 " ouid=%u ogid=%u rdev=%02x:%02x", 874 break;
725 MAJOR(context->names[i].dev), 875 default:
726 MINOR(context->names[i].dev), 876 /* log the name's directory component */
727 context->names[i].mode, 877 audit_log_format(ab, " name=");
728 context->names[i].uid, 878 audit_log_n_untrustedstring(ab, n->name_len,
729 context->names[i].gid, 879 n->name);
730 MAJOR(context->names[i].rdev), 880 }
731 MINOR(context->names[i].rdev)); 881 } else
732 if (context->names[i].osid != 0) { 882 audit_log_format(ab, " name=(null)");
883
884 if (n->ino != (unsigned long)-1) {
885 audit_log_format(ab, " inode=%lu"
886 " dev=%02x:%02x mode=%#o"
887 " ouid=%u ogid=%u rdev=%02x:%02x",
888 n->ino,
889 MAJOR(n->dev),
890 MINOR(n->dev),
891 n->mode,
892 n->uid,
893 n->gid,
894 MAJOR(n->rdev),
895 MINOR(n->rdev));
896 }
897 if (n->osid != 0) {
733 char *ctx = NULL; 898 char *ctx = NULL;
734 u32 len; 899 u32 len;
735 if (selinux_ctxid_to_string( 900 if (selinux_ctxid_to_string(
736 context->names[i].osid, &ctx, &len)) { 901 n->osid, &ctx, &len)) {
737 audit_log_format(ab, " osid=%u", 902 audit_log_format(ab, " osid=%u", n->osid);
738 context->names[i].osid);
739 call_panic = 2; 903 call_panic = 2;
740 } else 904 } else
741 audit_log_format(ab, " obj=%s", ctx); 905 audit_log_format(ab, " obj=%s", ctx);
@@ -908,11 +1072,11 @@ void audit_syscall_exit(int valid, long return_code)
908 * Add a name to the list of audit names for this context. 1072 * Add a name to the list of audit names for this context.
909 * Called from fs/namei.c:getname(). 1073 * Called from fs/namei.c:getname().
910 */ 1074 */
911void audit_getname(const char *name) 1075void __audit_getname(const char *name)
912{ 1076{
913 struct audit_context *context = current->audit_context; 1077 struct audit_context *context = current->audit_context;
914 1078
915 if (!context || IS_ERR(name) || !name) 1079 if (IS_ERR(name) || !name)
916 return; 1080 return;
917 1081
918 if (!context->in_syscall) { 1082 if (!context->in_syscall) {
@@ -925,6 +1089,8 @@ void audit_getname(const char *name)
925 } 1089 }
926 BUG_ON(context->name_count >= AUDIT_NAMES); 1090 BUG_ON(context->name_count >= AUDIT_NAMES);
927 context->names[context->name_count].name = name; 1091 context->names[context->name_count].name = name;
1092 context->names[context->name_count].name_len = AUDIT_NAME_FULL;
1093 context->names[context->name_count].name_put = 1;
928 context->names[context->name_count].ino = (unsigned long)-1; 1094 context->names[context->name_count].ino = (unsigned long)-1;
929 ++context->name_count; 1095 ++context->name_count;
930 if (!context->pwd) { 1096 if (!context->pwd) {
@@ -991,11 +1157,10 @@ static void audit_inode_context(int idx, const struct inode *inode)
991 * audit_inode - store the inode and device from a lookup 1157 * audit_inode - store the inode and device from a lookup
992 * @name: name being audited 1158 * @name: name being audited
993 * @inode: inode being audited 1159 * @inode: inode being audited
994 * @flags: lookup flags (as used in path_lookup())
995 * 1160 *
996 * Called from fs/namei.c:path_lookup(). 1161 * Called from fs/namei.c:path_lookup().
997 */ 1162 */
998void __audit_inode(const char *name, const struct inode *inode, unsigned flags) 1163void __audit_inode(const char *name, const struct inode *inode)
999{ 1164{
1000 int idx; 1165 int idx;
1001 struct audit_context *context = current->audit_context; 1166 struct audit_context *context = current->audit_context;
@@ -1021,20 +1186,13 @@ void __audit_inode(const char *name, const struct inode *inode, unsigned flags)
1021 ++context->ino_count; 1186 ++context->ino_count;
1022#endif 1187#endif
1023 } 1188 }
1189 context->names[idx].ino = inode->i_ino;
1024 context->names[idx].dev = inode->i_sb->s_dev; 1190 context->names[idx].dev = inode->i_sb->s_dev;
1025 context->names[idx].mode = inode->i_mode; 1191 context->names[idx].mode = inode->i_mode;
1026 context->names[idx].uid = inode->i_uid; 1192 context->names[idx].uid = inode->i_uid;
1027 context->names[idx].gid = inode->i_gid; 1193 context->names[idx].gid = inode->i_gid;
1028 context->names[idx].rdev = inode->i_rdev; 1194 context->names[idx].rdev = inode->i_rdev;
1029 audit_inode_context(idx, inode); 1195 audit_inode_context(idx, inode);
1030 if ((flags & LOOKUP_PARENT) && (strcmp(name, "/") != 0) &&
1031 (strcmp(name, ".") != 0)) {
1032 context->names[idx].ino = (unsigned long)-1;
1033 context->names[idx].pino = inode->i_ino;
1034 } else {
1035 context->names[idx].ino = inode->i_ino;
1036 context->names[idx].pino = (unsigned long)-1;
1037 }
1038} 1196}
1039 1197
1040/** 1198/**
@@ -1056,51 +1214,40 @@ void __audit_inode_child(const char *dname, const struct inode *inode,
1056{ 1214{
1057 int idx; 1215 int idx;
1058 struct audit_context *context = current->audit_context; 1216 struct audit_context *context = current->audit_context;
1217 const char *found_name = NULL;
1218 int dirlen = 0;
1059 1219
1060 if (!context->in_syscall) 1220 if (!context->in_syscall)
1061 return; 1221 return;
1062 1222
1063 /* determine matching parent */ 1223 /* determine matching parent */
1064 if (dname) 1224 if (!dname)
1065 for (idx = 0; idx < context->name_count; idx++) 1225 goto update_context;
1066 if (context->names[idx].pino == pino) { 1226 for (idx = 0; idx < context->name_count; idx++)
1067 const char *n; 1227 if (context->names[idx].ino == pino) {
1068 const char *name = context->names[idx].name; 1228 const char *name = context->names[idx].name;
1069 int dlen = strlen(dname); 1229
1070 int nlen = name ? strlen(name) : 0; 1230 if (!name)
1071 1231 continue;
1072 if (nlen < dlen) 1232
1073 continue; 1233 if (audit_compare_dname_path(dname, name, &dirlen) == 0) {
1074 1234 context->names[idx].name_len = dirlen;
1075 /* disregard trailing slashes */ 1235 found_name = name;
1076 n = name + nlen - 1; 1236 break;
1077 while ((*n == '/') && (n > name))
1078 n--;
1079
1080 /* find last path component */
1081 n = n - dlen + 1;
1082 if (n < name)
1083 continue;
1084 else if (n > name) {
1085 if (*--n != '/')
1086 continue;
1087 else
1088 n++;
1089 }
1090
1091 if (strncmp(n, dname, dlen) == 0)
1092 goto update_context;
1093 } 1237 }
1238 }
1094 1239
1095 /* catch-all in case match not found */ 1240update_context:
1096 idx = context->name_count++; 1241 idx = context->name_count++;
1097 context->names[idx].name = NULL;
1098 context->names[idx].pino = pino;
1099#if AUDIT_DEBUG 1242#if AUDIT_DEBUG
1100 context->ino_count++; 1243 context->ino_count++;
1101#endif 1244#endif
1245 /* Re-use the name belonging to the slot for a matching parent directory.
1246 * All names for this context are relinquished in audit_free_names() */
1247 context->names[idx].name = found_name;
1248 context->names[idx].name_len = AUDIT_NAME_FULL;
1249 context->names[idx].name_put = 0; /* don't call __putname() */
1102 1250
1103update_context:
1104 if (inode) { 1251 if (inode) {
1105 context->names[idx].ino = inode->i_ino; 1252 context->names[idx].ino = inode->i_ino;
1106 context->names[idx].dev = inode->i_sb->s_dev; 1253 context->names[idx].dev = inode->i_sb->s_dev;
@@ -1109,7 +1256,8 @@ update_context:
1109 context->names[idx].gid = inode->i_gid; 1256 context->names[idx].gid = inode->i_gid;
1110 context->names[idx].rdev = inode->i_rdev; 1257 context->names[idx].rdev = inode->i_rdev;
1111 audit_inode_context(idx, inode); 1258 audit_inode_context(idx, inode);
1112 } 1259 } else
1260 context->names[idx].ino = (unsigned long)-1;
1113} 1261}
1114 1262
1115/** 1263/**
@@ -1142,18 +1290,23 @@ void auditsc_get_stamp(struct audit_context *ctx,
1142 */ 1290 */
1143int audit_set_loginuid(struct task_struct *task, uid_t loginuid) 1291int audit_set_loginuid(struct task_struct *task, uid_t loginuid)
1144{ 1292{
1145 if (task->audit_context) { 1293 struct audit_context *context = task->audit_context;
1146 struct audit_buffer *ab; 1294
1147 1295 if (context) {
1148 ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_LOGIN); 1296 /* Only log if audit is enabled */
1149 if (ab) { 1297 if (context->in_syscall) {
1150 audit_log_format(ab, "login pid=%d uid=%u " 1298 struct audit_buffer *ab;
1151 "old auid=%u new auid=%u", 1299
1152 task->pid, task->uid, 1300 ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_LOGIN);
1153 task->audit_context->loginuid, loginuid); 1301 if (ab) {
1154 audit_log_end(ab); 1302 audit_log_format(ab, "login pid=%d uid=%u "
1303 "old auid=%u new auid=%u",
1304 task->pid, task->uid,
1305 context->loginuid, loginuid);
1306 audit_log_end(ab);
1307 }
1155 } 1308 }
1156 task->audit_context->loginuid = loginuid; 1309 context->loginuid = loginuid;
1157 } 1310 }
1158 return 0; 1311 return 0;
1159} 1312}
@@ -1170,16 +1323,193 @@ uid_t audit_get_loginuid(struct audit_context *ctx)
1170} 1323}
1171 1324
1172/** 1325/**
1173 * audit_ipc_obj - record audit data for ipc object 1326 * __audit_mq_open - record audit data for a POSIX MQ open
1174 * @ipcp: ipc permissions 1327 * @oflag: open flag
1328 * @mode: mode bits
1329 * @u_attr: queue attributes
1175 * 1330 *
1176 * Returns 0 for success or NULL context or < 0 on error. 1331 * Returns 0 for success or NULL context or < 0 on error.
1177 */ 1332 */
1178int audit_ipc_obj(struct kern_ipc_perm *ipcp) 1333int __audit_mq_open(int oflag, mode_t mode, struct mq_attr __user *u_attr)
1179{ 1334{
1180 struct audit_aux_data_ipcctl *ax; 1335 struct audit_aux_data_mq_open *ax;
1336 struct audit_context *context = current->audit_context;
1337
1338 if (!audit_enabled)
1339 return 0;
1340
1341 if (likely(!context))
1342 return 0;
1343
1344 ax = kmalloc(sizeof(*ax), GFP_ATOMIC);
1345 if (!ax)
1346 return -ENOMEM;
1347
1348 if (u_attr != NULL) {
1349 if (copy_from_user(&ax->attr, u_attr, sizeof(ax->attr))) {
1350 kfree(ax);
1351 return -EFAULT;
1352 }
1353 } else
1354 memset(&ax->attr, 0, sizeof(ax->attr));
1355
1356 ax->oflag = oflag;
1357 ax->mode = mode;
1358
1359 ax->d.type = AUDIT_MQ_OPEN;
1360 ax->d.next = context->aux;
1361 context->aux = (void *)ax;
1362 return 0;
1363}
1364
1365/**
1366 * __audit_mq_timedsend - record audit data for a POSIX MQ timed send
1367 * @mqdes: MQ descriptor
1368 * @msg_len: Message length
1369 * @msg_prio: Message priority
1370 * @abs_timeout: Message timeout in absolute time
1371 *
1372 * Returns 0 for success or NULL context or < 0 on error.
1373 */
1374int __audit_mq_timedsend(mqd_t mqdes, size_t msg_len, unsigned int msg_prio,
1375 const struct timespec __user *u_abs_timeout)
1376{
1377 struct audit_aux_data_mq_sendrecv *ax;
1378 struct audit_context *context = current->audit_context;
1379
1380 if (!audit_enabled)
1381 return 0;
1382
1383 if (likely(!context))
1384 return 0;
1385
1386 ax = kmalloc(sizeof(*ax), GFP_ATOMIC);
1387 if (!ax)
1388 return -ENOMEM;
1389
1390 if (u_abs_timeout != NULL) {
1391 if (copy_from_user(&ax->abs_timeout, u_abs_timeout, sizeof(ax->abs_timeout))) {
1392 kfree(ax);
1393 return -EFAULT;
1394 }
1395 } else
1396 memset(&ax->abs_timeout, 0, sizeof(ax->abs_timeout));
1397
1398 ax->mqdes = mqdes;
1399 ax->msg_len = msg_len;
1400 ax->msg_prio = msg_prio;
1401
1402 ax->d.type = AUDIT_MQ_SENDRECV;
1403 ax->d.next = context->aux;
1404 context->aux = (void *)ax;
1405 return 0;
1406}
1407
1408/**
1409 * __audit_mq_timedreceive - record audit data for a POSIX MQ timed receive
1410 * @mqdes: MQ descriptor
1411 * @msg_len: Message length
1412 * @msg_prio: Message priority
1413 * @abs_timeout: Message timeout in absolute time
1414 *
1415 * Returns 0 for success or NULL context or < 0 on error.
1416 */
1417int __audit_mq_timedreceive(mqd_t mqdes, size_t msg_len,
1418 unsigned int __user *u_msg_prio,
1419 const struct timespec __user *u_abs_timeout)
1420{
1421 struct audit_aux_data_mq_sendrecv *ax;
1422 struct audit_context *context = current->audit_context;
1423
1424 if (!audit_enabled)
1425 return 0;
1426
1427 if (likely(!context))
1428 return 0;
1429
1430 ax = kmalloc(sizeof(*ax), GFP_ATOMIC);
1431 if (!ax)
1432 return -ENOMEM;
1433
1434 if (u_msg_prio != NULL) {
1435 if (get_user(ax->msg_prio, u_msg_prio)) {
1436 kfree(ax);
1437 return -EFAULT;
1438 }
1439 } else
1440 ax->msg_prio = 0;
1441
1442 if (u_abs_timeout != NULL) {
1443 if (copy_from_user(&ax->abs_timeout, u_abs_timeout, sizeof(ax->abs_timeout))) {
1444 kfree(ax);
1445 return -EFAULT;
1446 }
1447 } else
1448 memset(&ax->abs_timeout, 0, sizeof(ax->abs_timeout));
1449
1450 ax->mqdes = mqdes;
1451 ax->msg_len = msg_len;
1452
1453 ax->d.type = AUDIT_MQ_SENDRECV;
1454 ax->d.next = context->aux;
1455 context->aux = (void *)ax;
1456 return 0;
1457}
1458
1459/**
1460 * __audit_mq_notify - record audit data for a POSIX MQ notify
1461 * @mqdes: MQ descriptor
1462 * @u_notification: Notification event
1463 *
1464 * Returns 0 for success or NULL context or < 0 on error.
1465 */
1466
1467int __audit_mq_notify(mqd_t mqdes, const struct sigevent __user *u_notification)
1468{
1469 struct audit_aux_data_mq_notify *ax;
1470 struct audit_context *context = current->audit_context;
1471
1472 if (!audit_enabled)
1473 return 0;
1474
1475 if (likely(!context))
1476 return 0;
1477
1478 ax = kmalloc(sizeof(*ax), GFP_ATOMIC);
1479 if (!ax)
1480 return -ENOMEM;
1481
1482 if (u_notification != NULL) {
1483 if (copy_from_user(&ax->notification, u_notification, sizeof(ax->notification))) {
1484 kfree(ax);
1485 return -EFAULT;
1486 }
1487 } else
1488 memset(&ax->notification, 0, sizeof(ax->notification));
1489
1490 ax->mqdes = mqdes;
1491
1492 ax->d.type = AUDIT_MQ_NOTIFY;
1493 ax->d.next = context->aux;
1494 context->aux = (void *)ax;
1495 return 0;
1496}
1497
1498/**
1499 * __audit_mq_getsetattr - record audit data for a POSIX MQ get/set attribute
1500 * @mqdes: MQ descriptor
1501 * @mqstat: MQ flags
1502 *
1503 * Returns 0 for success or NULL context or < 0 on error.
1504 */
1505int __audit_mq_getsetattr(mqd_t mqdes, struct mq_attr *mqstat)
1506{
1507 struct audit_aux_data_mq_getsetattr *ax;
1181 struct audit_context *context = current->audit_context; 1508 struct audit_context *context = current->audit_context;
1182 1509
1510 if (!audit_enabled)
1511 return 0;
1512
1183 if (likely(!context)) 1513 if (likely(!context))
1184 return 0; 1514 return 0;
1185 1515
@@ -1187,6 +1517,30 @@ int audit_ipc_obj(struct kern_ipc_perm *ipcp)
1187 if (!ax) 1517 if (!ax)
1188 return -ENOMEM; 1518 return -ENOMEM;
1189 1519
1520 ax->mqdes = mqdes;
1521 ax->mqstat = *mqstat;
1522
1523 ax->d.type = AUDIT_MQ_GETSETATTR;
1524 ax->d.next = context->aux;
1525 context->aux = (void *)ax;
1526 return 0;
1527}
1528
1529/**
1530 * audit_ipc_obj - record audit data for ipc object
1531 * @ipcp: ipc permissions
1532 *
1533 * Returns 0 for success or NULL context or < 0 on error.
1534 */
1535int __audit_ipc_obj(struct kern_ipc_perm *ipcp)
1536{
1537 struct audit_aux_data_ipcctl *ax;
1538 struct audit_context *context = current->audit_context;
1539
1540 ax = kmalloc(sizeof(*ax), GFP_ATOMIC);
1541 if (!ax)
1542 return -ENOMEM;
1543
1190 ax->uid = ipcp->uid; 1544 ax->uid = ipcp->uid;
1191 ax->gid = ipcp->gid; 1545 ax->gid = ipcp->gid;
1192 ax->mode = ipcp->mode; 1546 ax->mode = ipcp->mode;
@@ -1204,17 +1558,15 @@ int audit_ipc_obj(struct kern_ipc_perm *ipcp)
1204 * @uid: msgq user id 1558 * @uid: msgq user id
1205 * @gid: msgq group id 1559 * @gid: msgq group id
1206 * @mode: msgq mode (permissions) 1560 * @mode: msgq mode (permissions)
1561 * @ipcp: in-kernel IPC permissions
1207 * 1562 *
1208 * Returns 0 for success or NULL context or < 0 on error. 1563 * Returns 0 for success or NULL context or < 0 on error.
1209 */ 1564 */
1210int audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode, struct kern_ipc_perm *ipcp) 1565int __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode)
1211{ 1566{
1212 struct audit_aux_data_ipcctl *ax; 1567 struct audit_aux_data_ipcctl *ax;
1213 struct audit_context *context = current->audit_context; 1568 struct audit_context *context = current->audit_context;
1214 1569
1215 if (likely(!context))
1216 return 0;
1217
1218 ax = kmalloc(sizeof(*ax), GFP_ATOMIC); 1570 ax = kmalloc(sizeof(*ax), GFP_ATOMIC);
1219 if (!ax) 1571 if (!ax)
1220 return -ENOMEM; 1572 return -ENOMEM;
@@ -1223,7 +1575,6 @@ int audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode,
1223 ax->uid = uid; 1575 ax->uid = uid;
1224 ax->gid = gid; 1576 ax->gid = gid;
1225 ax->mode = mode; 1577 ax->mode = mode;
1226 selinux_get_ipc_sid(ipcp, &ax->osid);
1227 1578
1228 ax->d.type = AUDIT_IPC_SET_PERM; 1579 ax->d.type = AUDIT_IPC_SET_PERM;
1229 ax->d.next = context->aux; 1580 ax->d.next = context->aux;
@@ -1231,6 +1582,39 @@ int audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode,
1231 return 0; 1582 return 0;
1232} 1583}
1233 1584
1585int audit_bprm(struct linux_binprm *bprm)
1586{
1587 struct audit_aux_data_execve *ax;
1588 struct audit_context *context = current->audit_context;
1589 unsigned long p, next;
1590 void *to;
1591
1592 if (likely(!audit_enabled || !context))
1593 return 0;
1594
1595 ax = kmalloc(sizeof(*ax) + PAGE_SIZE * MAX_ARG_PAGES - bprm->p,
1596 GFP_KERNEL);
1597 if (!ax)
1598 return -ENOMEM;
1599
1600 ax->argc = bprm->argc;
1601 ax->envc = bprm->envc;
1602 for (p = bprm->p, to = ax->mem; p < MAX_ARG_PAGES*PAGE_SIZE; p = next) {
1603 struct page *page = bprm->page[p / PAGE_SIZE];
1604 void *kaddr = kmap(page);
1605 next = (p + PAGE_SIZE) & ~(PAGE_SIZE - 1);
1606 memcpy(to, kaddr + (p & (PAGE_SIZE - 1)), next - p);
1607 to += next - p;
1608 kunmap(page);
1609 }
1610
1611 ax->d.type = AUDIT_EXECVE;
1612 ax->d.next = context->aux;
1613 context->aux = (void *)ax;
1614 return 0;
1615}
1616
1617
1234/** 1618/**
1235 * audit_socketcall - record audit data for sys_socketcall 1619 * audit_socketcall - record audit data for sys_socketcall
1236 * @nargs: number of args 1620 * @nargs: number of args
@@ -1325,19 +1709,20 @@ int audit_avc_path(struct dentry *dentry, struct vfsmount *mnt)
1325 * If the audit subsystem is being terminated, record the task (pid) 1709 * If the audit subsystem is being terminated, record the task (pid)
1326 * and uid that is doing that. 1710 * and uid that is doing that.
1327 */ 1711 */
1328void audit_signal_info(int sig, struct task_struct *t) 1712void __audit_signal_info(int sig, struct task_struct *t)
1329{ 1713{
1330 extern pid_t audit_sig_pid; 1714 extern pid_t audit_sig_pid;
1331 extern uid_t audit_sig_uid; 1715 extern uid_t audit_sig_uid;
1332 1716 extern u32 audit_sig_sid;
1333 if (unlikely(audit_pid && t->tgid == audit_pid)) { 1717
1334 if (sig == SIGTERM || sig == SIGHUP) { 1718 if (sig == SIGTERM || sig == SIGHUP || sig == SIGUSR1) {
1335 struct audit_context *ctx = current->audit_context; 1719 struct task_struct *tsk = current;
1336 audit_sig_pid = current->pid; 1720 struct audit_context *ctx = tsk->audit_context;
1337 if (ctx) 1721 audit_sig_pid = tsk->pid;
1338 audit_sig_uid = ctx->loginuid; 1722 if (ctx)
1339 else 1723 audit_sig_uid = ctx->loginuid;
1340 audit_sig_uid = current->uid; 1724 else
1341 } 1725 audit_sig_uid = tsk->uid;
1726 selinux_get_task_sid(tsk, &audit_sig_sid);
1342 } 1727 }
1343} 1728}
diff --git a/kernel/compat.c b/kernel/compat.c
index c1601a84f8..2f67233243 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -21,6 +21,7 @@
21#include <linux/unistd.h> 21#include <linux/unistd.h>
22#include <linux/security.h> 22#include <linux/security.h>
23#include <linux/timex.h> 23#include <linux/timex.h>
24#include <linux/migrate.h>
24 25
25#include <asm/uaccess.h> 26#include <asm/uaccess.h>
26 27
@@ -934,3 +935,25 @@ asmlinkage long compat_sys_adjtimex(struct compat_timex __user *utp)
934 935
935 return ret; 936 return ret;
936} 937}
938
939#ifdef CONFIG_NUMA
940asmlinkage long compat_sys_move_pages(pid_t pid, unsigned long nr_pages,
941 compat_uptr_t __user *pages32,
942 const int __user *nodes,
943 int __user *status,
944 int flags)
945{
946 const void __user * __user *pages;
947 int i;
948
949 pages = compat_alloc_user_space(nr_pages * sizeof(void *));
950 for (i = 0; i < nr_pages; i++) {
951 compat_uptr_t p;
952
953 if (get_user(p, pages32 + i) ||
954 put_user(compat_ptr(p), pages + i))
955 return -EFAULT;
956 }
957 return sys_move_pages(pid, nr_pages, pages, nodes, status, flags);
958}
959#endif
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index ab81fdd457..b602f73fb3 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -41,6 +41,7 @@
41#include <linux/rcupdate.h> 41#include <linux/rcupdate.h>
42#include <linux/sched.h> 42#include <linux/sched.h>
43#include <linux/seq_file.h> 43#include <linux/seq_file.h>
44#include <linux/security.h>
44#include <linux/slab.h> 45#include <linux/slab.h>
45#include <linux/smp_lock.h> 46#include <linux/smp_lock.h>
46#include <linux/spinlock.h> 47#include <linux/spinlock.h>
@@ -392,11 +393,11 @@ static int cpuset_fill_super(struct super_block *sb, void *unused_data,
392 return 0; 393 return 0;
393} 394}
394 395
395static struct super_block *cpuset_get_sb(struct file_system_type *fs_type, 396static int cpuset_get_sb(struct file_system_type *fs_type,
396 int flags, const char *unused_dev_name, 397 int flags, const char *unused_dev_name,
397 void *data) 398 void *data, struct vfsmount *mnt)
398{ 399{
399 return get_sb_single(fs_type, flags, data, cpuset_fill_super); 400 return get_sb_single(fs_type, flags, data, cpuset_fill_super, mnt);
400} 401}
401 402
402static struct file_system_type cpuset_fs_type = { 403static struct file_system_type cpuset_fs_type = {
@@ -1177,6 +1178,7 @@ static int attach_task(struct cpuset *cs, char *pidbuf, char **ppathbuf)
1177 cpumask_t cpus; 1178 cpumask_t cpus;
1178 nodemask_t from, to; 1179 nodemask_t from, to;
1179 struct mm_struct *mm; 1180 struct mm_struct *mm;
1181 int retval;
1180 1182
1181 if (sscanf(pidbuf, "%d", &pid) != 1) 1183 if (sscanf(pidbuf, "%d", &pid) != 1)
1182 return -EIO; 1184 return -EIO;
@@ -1205,6 +1207,12 @@ static int attach_task(struct cpuset *cs, char *pidbuf, char **ppathbuf)
1205 get_task_struct(tsk); 1207 get_task_struct(tsk);
1206 } 1208 }
1207 1209
1210 retval = security_task_setscheduler(tsk, 0, NULL);
1211 if (retval) {
1212 put_task_struct(tsk);
1213 return retval;
1214 }
1215
1208 mutex_lock(&callback_mutex); 1216 mutex_lock(&callback_mutex);
1209 1217
1210 task_lock(tsk); 1218 task_lock(tsk);
diff --git a/kernel/exit.c b/kernel/exit.c
index e06d0c10a2..a3baf92462 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -579,7 +579,7 @@ static void exit_mm(struct task_struct * tsk)
579 down_read(&mm->mmap_sem); 579 down_read(&mm->mmap_sem);
580 } 580 }
581 atomic_inc(&mm->mm_count); 581 atomic_inc(&mm->mm_count);
582 if (mm != tsk->active_mm) BUG(); 582 BUG_ON(mm != tsk->active_mm);
583 /* more a memory barrier than a real lock */ 583 /* more a memory barrier than a real lock */
584 task_lock(tsk); 584 task_lock(tsk);
585 tsk->mm = NULL; 585 tsk->mm = NULL;
@@ -1530,8 +1530,7 @@ check_continued:
1530 if (options & __WNOTHREAD) 1530 if (options & __WNOTHREAD)
1531 break; 1531 break;
1532 tsk = next_thread(tsk); 1532 tsk = next_thread(tsk);
1533 if (tsk->signal != current->signal) 1533 BUG_ON(tsk->signal != current->signal);
1534 BUG();
1535 } while (tsk != current); 1534 } while (tsk != current);
1536 1535
1537 read_unlock(&tasklist_lock); 1536 read_unlock(&tasklist_lock);
diff --git a/kernel/fork.c b/kernel/fork.c
index ac8100e308..49adc0e8d4 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -368,6 +368,8 @@ void fastcall __mmdrop(struct mm_struct *mm)
368 */ 368 */
369void mmput(struct mm_struct *mm) 369void mmput(struct mm_struct *mm)
370{ 370{
371 might_sleep();
372
371 if (atomic_dec_and_test(&mm->mm_users)) { 373 if (atomic_dec_and_test(&mm->mm_users)) {
372 exit_aio(mm); 374 exit_aio(mm);
373 exit_mmap(mm); 375 exit_mmap(mm);
@@ -623,6 +625,7 @@ out:
623/* 625/*
624 * Allocate a new files structure and copy contents from the 626 * Allocate a new files structure and copy contents from the
625 * passed in files structure. 627 * passed in files structure.
628 * errorp will be valid only when the returned files_struct is NULL.
626 */ 629 */
627static struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) 630static struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
628{ 631{
@@ -631,6 +634,7 @@ static struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
631 int open_files, size, i, expand; 634 int open_files, size, i, expand;
632 struct fdtable *old_fdt, *new_fdt; 635 struct fdtable *old_fdt, *new_fdt;
633 636
637 *errorp = -ENOMEM;
634 newf = alloc_files(); 638 newf = alloc_files();
635 if (!newf) 639 if (!newf)
636 goto out; 640 goto out;
@@ -744,7 +748,6 @@ static int copy_files(unsigned long clone_flags, struct task_struct * tsk)
744 * break this. 748 * break this.
745 */ 749 */
746 tsk->files = NULL; 750 tsk->files = NULL;
747 error = -ENOMEM;
748 newf = dup_fd(oldf, &error); 751 newf = dup_fd(oldf, &error);
749 if (!newf) 752 if (!newf)
750 goto out; 753 goto out;
diff --git a/kernel/futex.c b/kernel/futex.c
index 5699c51205..e1a380c77a 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1056,11 +1056,11 @@ asmlinkage long sys_futex(u32 __user *uaddr, int op, int val,
1056 (unsigned long)uaddr2, val2, val3); 1056 (unsigned long)uaddr2, val2, val3);
1057} 1057}
1058 1058
1059static struct super_block * 1059static int futexfs_get_sb(struct file_system_type *fs_type,
1060futexfs_get_sb(struct file_system_type *fs_type, 1060 int flags, const char *dev_name, void *data,
1061 int flags, const char *dev_name, void *data) 1061 struct vfsmount *mnt)
1062{ 1062{
1063 return get_sb_pseudo(fs_type, "futex", NULL, 0xBAD1DEA); 1063 return get_sb_pseudo(fs_type, "futex", NULL, 0xBAD1DEA, mnt);
1064} 1064}
1065 1065
1066static struct file_system_type futex_fs_type = { 1066static struct file_system_type futex_fs_type = {
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 01fa2ae98a..1832430572 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -393,7 +393,7 @@ static void __remove_hrtimer(struct hrtimer *timer, struct hrtimer_base *base)
393 if (base->first == &timer->node) 393 if (base->first == &timer->node)
394 base->first = rb_next(&timer->node); 394 base->first = rb_next(&timer->node);
395 rb_erase(&timer->node, &base->active); 395 rb_erase(&timer->node, &base->active);
396 timer->node.rb_parent = HRTIMER_INACTIVE; 396 rb_set_parent(&timer->node, &timer->node);
397} 397}
398 398
399/* 399/*
@@ -582,7 +582,7 @@ void hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
582 clock_id = CLOCK_MONOTONIC; 582 clock_id = CLOCK_MONOTONIC;
583 583
584 timer->base = &bases[clock_id]; 584 timer->base = &bases[clock_id];
585 timer->node.rb_parent = HRTIMER_INACTIVE; 585 rb_set_parent(&timer->node, &timer->node);
586} 586}
587EXPORT_SYMBOL_GPL(hrtimer_init); 587EXPORT_SYMBOL_GPL(hrtimer_init);
588 588
diff --git a/kernel/intermodule.c b/kernel/intermodule.c
deleted file mode 100644
index 55b1e5b85d..0000000000
--- a/kernel/intermodule.c
+++ /dev/null
@@ -1,184 +0,0 @@
1/* Deprecated, do not use. Moved from module.c to here. --RR */
2
3/* Written by Keith Owens <kaos@ocs.com.au> Oct 2000 */
4#include <linux/module.h>
5#include <linux/kmod.h>
6#include <linux/spinlock.h>
7#include <linux/list.h>
8#include <linux/slab.h>
9
10/* inter_module functions are always available, even when the kernel is
11 * compiled without modules. Consumers of inter_module_xxx routines
12 * will always work, even when both are built into the kernel, this
13 * approach removes lots of #ifdefs in mainline code.
14 */
15
16static struct list_head ime_list = LIST_HEAD_INIT(ime_list);
17static DEFINE_SPINLOCK(ime_lock);
18static int kmalloc_failed;
19
20struct inter_module_entry {
21 struct list_head list;
22 const char *im_name;
23 struct module *owner;
24 const void *userdata;
25};
26
27/**
28 * inter_module_register - register a new set of inter module data.
29 * @im_name: an arbitrary string to identify the data, must be unique
30 * @owner: module that is registering the data, always use THIS_MODULE
31 * @userdata: pointer to arbitrary userdata to be registered
32 *
33 * Description: Check that the im_name has not already been registered,
34 * complain if it has. For new data, add it to the inter_module_entry
35 * list.
36 */
37void inter_module_register(const char *im_name, struct module *owner, const void *userdata)
38{
39 struct list_head *tmp;
40 struct inter_module_entry *ime, *ime_new;
41
42 if (!(ime_new = kzalloc(sizeof(*ime), GFP_KERNEL))) {
43 /* Overloaded kernel, not fatal */
44 printk(KERN_ERR
45 "Aiee, inter_module_register: cannot kmalloc entry for '%s'\n",
46 im_name);
47 kmalloc_failed = 1;
48 return;
49 }
50 ime_new->im_name = im_name;
51 ime_new->owner = owner;
52 ime_new->userdata = userdata;
53
54 spin_lock(&ime_lock);
55 list_for_each(tmp, &ime_list) {
56 ime = list_entry(tmp, struct inter_module_entry, list);
57 if (strcmp(ime->im_name, im_name) == 0) {
58 spin_unlock(&ime_lock);
59 kfree(ime_new);
60 /* Program logic error, fatal */
61 printk(KERN_ERR "inter_module_register: duplicate im_name '%s'", im_name);
62 BUG();
63 }
64 }
65 list_add(&(ime_new->list), &ime_list);
66 spin_unlock(&ime_lock);
67}
68
69/**
70 * inter_module_unregister - unregister a set of inter module data.
71 * @im_name: an arbitrary string to identify the data, must be unique
72 *
73 * Description: Check that the im_name has been registered, complain if
74 * it has not. For existing data, remove it from the
75 * inter_module_entry list.
76 */
77void inter_module_unregister(const char *im_name)
78{
79 struct list_head *tmp;
80 struct inter_module_entry *ime;
81
82 spin_lock(&ime_lock);
83 list_for_each(tmp, &ime_list) {
84 ime = list_entry(tmp, struct inter_module_entry, list);
85 if (strcmp(ime->im_name, im_name) == 0) {
86 list_del(&(ime->list));
87 spin_unlock(&ime_lock);
88 kfree(ime);
89 return;
90 }
91 }
92 spin_unlock(&ime_lock);
93 if (kmalloc_failed) {
94 printk(KERN_ERR
95 "inter_module_unregister: no entry for '%s', "
96 "probably caused by previous kmalloc failure\n",
97 im_name);
98 return;
99 }
100 else {
101 /* Program logic error, fatal */
102 printk(KERN_ERR "inter_module_unregister: no entry for '%s'", im_name);
103 BUG();
104 }
105}
106
107/**
108 * inter_module_get - return arbitrary userdata from another module.
109 * @im_name: an arbitrary string to identify the data, must be unique
110 *
111 * Description: If the im_name has not been registered, return NULL.
112 * Try to increment the use count on the owning module, if that fails
113 * then return NULL. Otherwise return the userdata.
114 */
115static const void *inter_module_get(const char *im_name)
116{
117 struct list_head *tmp;
118 struct inter_module_entry *ime;
119 const void *result = NULL;
120
121 spin_lock(&ime_lock);
122 list_for_each(tmp, &ime_list) {
123 ime = list_entry(tmp, struct inter_module_entry, list);
124 if (strcmp(ime->im_name, im_name) == 0) {
125 if (try_module_get(ime->owner))
126 result = ime->userdata;
127 break;
128 }
129 }
130 spin_unlock(&ime_lock);
131 return(result);
132}
133
134/**
135 * inter_module_get_request - im get with automatic request_module.
136 * @im_name: an arbitrary string to identify the data, must be unique
137 * @modname: module that is expected to register im_name
138 *
139 * Description: If inter_module_get fails, do request_module then retry.
140 */
141const void *inter_module_get_request(const char *im_name, const char *modname)
142{
143 const void *result = inter_module_get(im_name);
144 if (!result) {
145 request_module("%s", modname);
146 result = inter_module_get(im_name);
147 }
148 return(result);
149}
150
151/**
152 * inter_module_put - release use of data from another module.
153 * @im_name: an arbitrary string to identify the data, must be unique
154 *
155 * Description: If the im_name has not been registered, complain,
156 * otherwise decrement the use count on the owning module.
157 */
158void inter_module_put(const char *im_name)
159{
160 struct list_head *tmp;
161 struct inter_module_entry *ime;
162
163 spin_lock(&ime_lock);
164 list_for_each(tmp, &ime_list) {
165 ime = list_entry(tmp, struct inter_module_entry, list);
166 if (strcmp(ime->im_name, im_name) == 0) {
167 if (ime->owner)
168 module_put(ime->owner);
169 spin_unlock(&ime_lock);
170 return;
171 }
172 }
173 spin_unlock(&ime_lock);
174 printk(KERN_ERR "inter_module_put: no entry for '%s'", im_name);
175 BUG();
176}
177
178EXPORT_SYMBOL(inter_module_register);
179EXPORT_SYMBOL(inter_module_unregister);
180EXPORT_SYMBOL(inter_module_get_request);
181EXPORT_SYMBOL(inter_module_put);
182
183MODULE_LICENSE("GPL");
184
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 51df337b37..0f65301171 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -76,10 +76,11 @@ irqreturn_t no_action(int cpl, void *dev_id, struct pt_regs *regs)
76/* 76/*
77 * Have got an event to handle: 77 * Have got an event to handle:
78 */ 78 */
79fastcall int handle_IRQ_event(unsigned int irq, struct pt_regs *regs, 79fastcall irqreturn_t handle_IRQ_event(unsigned int irq, struct pt_regs *regs,
80 struct irqaction *action) 80 struct irqaction *action)
81{ 81{
82 int ret, retval = 0, status = 0; 82 irqreturn_t ret, retval = IRQ_NONE;
83 unsigned int status = 0;
83 84
84 if (!(action->flags & SA_INTERRUPT)) 85 if (!(action->flags & SA_INTERRUPT))
85 local_irq_enable(); 86 local_irq_enable();
diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c
index 134f9f2e0e..a12d00eb5e 100644
--- a/kernel/irq/migration.c
+++ b/kernel/irq/migration.c
@@ -30,7 +30,7 @@ void move_native_irq(int irq)
30 30
31 desc->move_irq = 0; 31 desc->move_irq = 0;
32 32
33 if (likely(cpus_empty(pending_irq_cpumask[irq]))) 33 if (unlikely(cpus_empty(pending_irq_cpumask[irq])))
34 return; 34 return;
35 35
36 if (!desc->handler->set_affinity) 36 if (!desc->handler->set_affinity)
@@ -49,7 +49,7 @@ void move_native_irq(int irq)
49 * cause some ioapics to mal-function. 49 * cause some ioapics to mal-function.
50 * Being paranoid i guess! 50 * Being paranoid i guess!
51 */ 51 */
52 if (unlikely(!cpus_empty(tmp))) { 52 if (likely(!cpus_empty(tmp))) {
53 if (likely(!(desc->status & IRQ_DISABLED))) 53 if (likely(!(desc->status & IRQ_DISABLED)))
54 desc->handler->disable(irq); 54 desc->handler->disable(irq);
55 55
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index d03b5eef8c..afacd6f585 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -24,6 +24,8 @@ static struct proc_dir_entry *smp_affinity_entry[NR_IRQS];
24#ifdef CONFIG_GENERIC_PENDING_IRQ 24#ifdef CONFIG_GENERIC_PENDING_IRQ
25void proc_set_irq_affinity(unsigned int irq, cpumask_t mask_val) 25void proc_set_irq_affinity(unsigned int irq, cpumask_t mask_val)
26{ 26{
27 set_balance_irq_affinity(irq, mask_val);
28
27 /* 29 /*
28 * Save these away for later use. Re-progam when the 30 * Save these away for later use. Re-progam when the
29 * interrupt is pending 31 * interrupt is pending
@@ -33,6 +35,7 @@ void proc_set_irq_affinity(unsigned int irq, cpumask_t mask_val)
33#else 35#else
34void proc_set_irq_affinity(unsigned int irq, cpumask_t mask_val) 36void proc_set_irq_affinity(unsigned int irq, cpumask_t mask_val)
35{ 37{
38 set_balance_irq_affinity(irq, mask_val);
36 irq_affinity[irq] = mask_val; 39 irq_affinity[irq] = mask_val;
37 irq_desc[irq].handler->set_affinity(irq, mask_val); 40 irq_desc[irq].handler->set_affinity(irq, mask_val);
38} 41}
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
index 7df9abd5ec..b2fb3c18d0 100644
--- a/kernel/irq/spurious.c
+++ b/kernel/irq/spurious.c
@@ -11,7 +11,7 @@
11#include <linux/kallsyms.h> 11#include <linux/kallsyms.h>
12#include <linux/interrupt.h> 12#include <linux/interrupt.h>
13 13
14static int irqfixup; 14static int irqfixup __read_mostly;
15 15
16/* 16/*
17 * Recovery handler for misrouted interrupts. 17 * Recovery handler for misrouted interrupts.
@@ -136,9 +136,9 @@ static void report_bad_irq(unsigned int irq, irq_desc_t *desc, irqreturn_t actio
136void note_interrupt(unsigned int irq, irq_desc_t *desc, irqreturn_t action_ret, 136void note_interrupt(unsigned int irq, irq_desc_t *desc, irqreturn_t action_ret,
137 struct pt_regs *regs) 137 struct pt_regs *regs)
138{ 138{
139 if (action_ret != IRQ_HANDLED) { 139 if (unlikely(action_ret != IRQ_HANDLED)) {
140 desc->irqs_unhandled++; 140 desc->irqs_unhandled++;
141 if (action_ret != IRQ_NONE) 141 if (unlikely(action_ret != IRQ_NONE))
142 report_bad_irq(irq, desc, action_ret); 142 report_bad_irq(irq, desc, action_ret);
143 } 143 }
144 144
@@ -152,11 +152,11 @@ void note_interrupt(unsigned int irq, irq_desc_t *desc, irqreturn_t action_ret,
152 } 152 }
153 153
154 desc->irq_count++; 154 desc->irq_count++;
155 if (desc->irq_count < 100000) 155 if (likely(desc->irq_count < 100000))
156 return; 156 return;
157 157
158 desc->irq_count = 0; 158 desc->irq_count = 0;
159 if (desc->irqs_unhandled > 99900) { 159 if (unlikely(desc->irqs_unhandled > 99900)) {
160 /* 160 /*
161 * The interrupt is stuck 161 * The interrupt is stuck
162 */ 162 */
@@ -171,7 +171,7 @@ void note_interrupt(unsigned int irq, irq_desc_t *desc, irqreturn_t action_ret,
171 desc->irqs_unhandled = 0; 171 desc->irqs_unhandled = 0;
172} 172}
173 173
174int noirqdebug; 174int noirqdebug __read_mostly;
175 175
176int __init noirqdebug_setup(char *str) 176int __init noirqdebug_setup(char *str)
177{ 177{
diff --git a/kernel/kexec.c b/kernel/kexec.c
index bf39d28e4c..58f0f38259 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -902,14 +902,14 @@ static int kimage_load_segment(struct kimage *image,
902 * kexec does not sync, or unmount filesystems so if you need 902 * kexec does not sync, or unmount filesystems so if you need
903 * that to happen you need to do that yourself. 903 * that to happen you need to do that yourself.
904 */ 904 */
905struct kimage *kexec_image = NULL; 905struct kimage *kexec_image;
906static struct kimage *kexec_crash_image = NULL; 906struct kimage *kexec_crash_image;
907/* 907/*
908 * A home grown binary mutex. 908 * A home grown binary mutex.
909 * Nothing can wait so this mutex is safe to use 909 * Nothing can wait so this mutex is safe to use
910 * in interrupt context :) 910 * in interrupt context :)
911 */ 911 */
912static int kexec_lock = 0; 912static int kexec_lock;
913 913
914asmlinkage long sys_kexec_load(unsigned long entry, unsigned long nr_segments, 914asmlinkage long sys_kexec_load(unsigned long entry, unsigned long nr_segments,
915 struct kexec_segment __user *segments, 915 struct kexec_segment __user *segments,
diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c
index f119e098e6..9e28478a17 100644
--- a/kernel/ksysfs.c
+++ b/kernel/ksysfs.c
@@ -14,6 +14,7 @@
14#include <linux/sysfs.h> 14#include <linux/sysfs.h>
15#include <linux/module.h> 15#include <linux/module.h>
16#include <linux/init.h> 16#include <linux/init.h>
17#include <linux/kexec.h>
17 18
18#define KERNEL_ATTR_RO(_name) \ 19#define KERNEL_ATTR_RO(_name) \
19static struct subsys_attribute _name##_attr = __ATTR_RO(_name) 20static struct subsys_attribute _name##_attr = __ATTR_RO(_name)
@@ -48,6 +49,20 @@ static ssize_t uevent_helper_store(struct subsystem *subsys, const char *page, s
48KERNEL_ATTR_RW(uevent_helper); 49KERNEL_ATTR_RW(uevent_helper);
49#endif 50#endif
50 51
52#ifdef CONFIG_KEXEC
53static ssize_t kexec_loaded_show(struct subsystem *subsys, char *page)
54{
55 return sprintf(page, "%d\n", !!kexec_image);
56}
57KERNEL_ATTR_RO(kexec_loaded);
58
59static ssize_t kexec_crash_loaded_show(struct subsystem *subsys, char *page)
60{
61 return sprintf(page, "%d\n", !!kexec_crash_image);
62}
63KERNEL_ATTR_RO(kexec_crash_loaded);
64#endif /* CONFIG_KEXEC */
65
51decl_subsys(kernel, NULL, NULL); 66decl_subsys(kernel, NULL, NULL);
52EXPORT_SYMBOL_GPL(kernel_subsys); 67EXPORT_SYMBOL_GPL(kernel_subsys);
53 68
@@ -56,6 +71,10 @@ static struct attribute * kernel_attrs[] = {
56 &uevent_seqnum_attr.attr, 71 &uevent_seqnum_attr.attr,
57 &uevent_helper_attr.attr, 72 &uevent_helper_attr.attr,
58#endif 73#endif
74#ifdef CONFIG_KEXEC
75 &kexec_loaded_attr.attr,
76 &kexec_crash_loaded_attr.attr,
77#endif
59 NULL 78 NULL
60}; 79};
61 80
diff --git a/kernel/power/main.c b/kernel/power/main.c
index a6d9ef4600..cdf0f07af9 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -15,7 +15,7 @@
15#include <linux/errno.h> 15#include <linux/errno.h>
16#include <linux/init.h> 16#include <linux/init.h>
17#include <linux/pm.h> 17#include <linux/pm.h>
18 18#include <linux/console.h>
19 19
20#include "power.h" 20#include "power.h"
21 21
@@ -86,6 +86,7 @@ static int suspend_prepare(suspend_state_t state)
86 goto Thaw; 86 goto Thaw;
87 } 87 }
88 88
89 suspend_console();
89 if ((error = device_suspend(PMSG_SUSPEND))) { 90 if ((error = device_suspend(PMSG_SUSPEND))) {
90 printk(KERN_ERR "Some devices failed to suspend\n"); 91 printk(KERN_ERR "Some devices failed to suspend\n");
91 goto Finish; 92 goto Finish;
@@ -133,6 +134,7 @@ int suspend_enter(suspend_state_t state)
133static void suspend_finish(suspend_state_t state) 134static void suspend_finish(suspend_state_t state)
134{ 135{
135 device_resume(); 136 device_resume();
137 resume_console();
136 thaw_processes(); 138 thaw_processes();
137 enable_nonboot_cpus(); 139 enable_nonboot_cpus();
138 if (pm_ops && pm_ops->finish) 140 if (pm_ops && pm_ops->finish)
diff --git a/kernel/power/power.h b/kernel/power/power.h
index f06f12f217..98c41423f3 100644
--- a/kernel/power/power.h
+++ b/kernel/power/power.h
@@ -55,7 +55,7 @@ struct snapshot_handle {
55 unsigned int page; 55 unsigned int page;
56 unsigned int page_offset; 56 unsigned int page_offset;
57 unsigned int prev; 57 unsigned int prev;
58 struct pbe *pbe; 58 struct pbe *pbe, *last_pbe;
59 void *buffer; 59 void *buffer;
60 unsigned int buf_offset; 60 unsigned int buf_offset;
61}; 61};
@@ -105,6 +105,10 @@ extern struct bitmap_page *alloc_bitmap(unsigned int nr_bits);
105extern unsigned long alloc_swap_page(int swap, struct bitmap_page *bitmap); 105extern unsigned long alloc_swap_page(int swap, struct bitmap_page *bitmap);
106extern void free_all_swap_pages(int swap, struct bitmap_page *bitmap); 106extern void free_all_swap_pages(int swap, struct bitmap_page *bitmap);
107 107
108extern unsigned int count_special_pages(void);
109extern int save_special_mem(void);
110extern int restore_special_mem(void);
111
108extern int swsusp_check(void); 112extern int swsusp_check(void);
109extern int swsusp_shrink_memory(void); 113extern int swsusp_shrink_memory(void);
110extern void swsusp_free(void); 114extern void swsusp_free(void);
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 3eeedbb13b..3d9284100b 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -39,8 +39,90 @@ static unsigned int nr_copy_pages;
39static unsigned int nr_meta_pages; 39static unsigned int nr_meta_pages;
40static unsigned long *buffer; 40static unsigned long *buffer;
41 41
42struct arch_saveable_page {
43 unsigned long start;
44 unsigned long end;
45 char *data;
46 struct arch_saveable_page *next;
47};
48static struct arch_saveable_page *arch_pages;
49
50int swsusp_add_arch_pages(unsigned long start, unsigned long end)
51{
52 struct arch_saveable_page *tmp;
53
54 while (start < end) {
55 tmp = kzalloc(sizeof(struct arch_saveable_page), GFP_KERNEL);
56 if (!tmp)
57 return -ENOMEM;
58 tmp->start = start;
59 tmp->end = ((start >> PAGE_SHIFT) + 1) << PAGE_SHIFT;
60 if (tmp->end > end)
61 tmp->end = end;
62 tmp->next = arch_pages;
63 start = tmp->end;
64 arch_pages = tmp;
65 }
66 return 0;
67}
68
69static unsigned int count_arch_pages(void)
70{
71 unsigned int count = 0;
72 struct arch_saveable_page *tmp = arch_pages;
73 while (tmp) {
74 count++;
75 tmp = tmp->next;
76 }
77 return count;
78}
79
80static int save_arch_mem(void)
81{
82 char *kaddr;
83 struct arch_saveable_page *tmp = arch_pages;
84 int offset;
85
86 pr_debug("swsusp: Saving arch specific memory");
87 while (tmp) {
88 tmp->data = (char *)__get_free_page(GFP_ATOMIC);
89 if (!tmp->data)
90 return -ENOMEM;
91 offset = tmp->start - (tmp->start & PAGE_MASK);
92 /* arch pages might haven't a 'struct page' */
93 kaddr = kmap_atomic_pfn(tmp->start >> PAGE_SHIFT, KM_USER0);
94 memcpy(tmp->data + offset, kaddr + offset,
95 tmp->end - tmp->start);
96 kunmap_atomic(kaddr, KM_USER0);
97
98 tmp = tmp->next;
99 }
100 return 0;
101}
102
103static int restore_arch_mem(void)
104{
105 char *kaddr;
106 struct arch_saveable_page *tmp = arch_pages;
107 int offset;
108
109 while (tmp) {
110 if (!tmp->data)
111 continue;
112 offset = tmp->start - (tmp->start & PAGE_MASK);
113 kaddr = kmap_atomic_pfn(tmp->start >> PAGE_SHIFT, KM_USER0);
114 memcpy(kaddr + offset, tmp->data + offset,
115 tmp->end - tmp->start);
116 kunmap_atomic(kaddr, KM_USER0);
117 free_page((long)tmp->data);
118 tmp->data = NULL;
119 tmp = tmp->next;
120 }
121 return 0;
122}
123
42#ifdef CONFIG_HIGHMEM 124#ifdef CONFIG_HIGHMEM
43unsigned int count_highmem_pages(void) 125static unsigned int count_highmem_pages(void)
44{ 126{
45 struct zone *zone; 127 struct zone *zone;
46 unsigned long zone_pfn; 128 unsigned long zone_pfn;
@@ -117,7 +199,7 @@ static int save_highmem_zone(struct zone *zone)
117 return 0; 199 return 0;
118} 200}
119 201
120int save_highmem(void) 202static int save_highmem(void)
121{ 203{
122 struct zone *zone; 204 struct zone *zone;
123 int res = 0; 205 int res = 0;
@@ -134,7 +216,7 @@ int save_highmem(void)
134 return 0; 216 return 0;
135} 217}
136 218
137int restore_highmem(void) 219static int restore_highmem(void)
138{ 220{
139 printk("swsusp: Restoring Highmem\n"); 221 printk("swsusp: Restoring Highmem\n");
140 while (highmem_copy) { 222 while (highmem_copy) {
@@ -150,8 +232,35 @@ int restore_highmem(void)
150 } 232 }
151 return 0; 233 return 0;
152} 234}
235#else
236static inline unsigned int count_highmem_pages(void) {return 0;}
237static inline int save_highmem(void) {return 0;}
238static inline int restore_highmem(void) {return 0;}
153#endif 239#endif
154 240
241unsigned int count_special_pages(void)
242{
243 return count_arch_pages() + count_highmem_pages();
244}
245
246int save_special_mem(void)
247{
248 int ret;
249 ret = save_arch_mem();
250 if (!ret)
251 ret = save_highmem();
252 return ret;
253}
254
255int restore_special_mem(void)
256{
257 int ret;
258 ret = restore_arch_mem();
259 if (!ret)
260 ret = restore_highmem();
261 return ret;
262}
263
155static int pfn_is_nosave(unsigned long pfn) 264static int pfn_is_nosave(unsigned long pfn)
156{ 265{
157 unsigned long nosave_begin_pfn = __pa(&__nosave_begin) >> PAGE_SHIFT; 266 unsigned long nosave_begin_pfn = __pa(&__nosave_begin) >> PAGE_SHIFT;
@@ -177,7 +286,6 @@ static int saveable(struct zone *zone, unsigned long *zone_pfn)
177 return 0; 286 return 0;
178 287
179 page = pfn_to_page(pfn); 288 page = pfn_to_page(pfn);
180 BUG_ON(PageReserved(page) && PageNosave(page));
181 if (PageNosave(page)) 289 if (PageNosave(page))
182 return 0; 290 return 0;
183 if (PageReserved(page) && pfn_is_nosave(pfn)) 291 if (PageReserved(page) && pfn_is_nosave(pfn))
@@ -293,62 +401,29 @@ static inline void create_pbe_list(struct pbe *pblist, unsigned int nr_pages)
293 } 401 }
294} 402}
295 403
296/** 404static unsigned int unsafe_pages;
297 * On resume it is necessary to trace and eventually free the unsafe
298 * pages that have been allocated, because they are needed for I/O
299 * (on x86-64 we likely will "eat" these pages once again while
300 * creating the temporary page translation tables)
301 */
302
303struct eaten_page {
304 struct eaten_page *next;
305 char padding[PAGE_SIZE - sizeof(void *)];
306};
307
308static struct eaten_page *eaten_pages = NULL;
309
310static void release_eaten_pages(void)
311{
312 struct eaten_page *p, *q;
313
314 p = eaten_pages;
315 while (p) {
316 q = p->next;
317 /* We don't want swsusp_free() to free this page again */
318 ClearPageNosave(virt_to_page(p));
319 free_page((unsigned long)p);
320 p = q;
321 }
322 eaten_pages = NULL;
323}
324 405
325/** 406/**
326 * @safe_needed - on resume, for storing the PBE list and the image, 407 * @safe_needed - on resume, for storing the PBE list and the image,
327 * we can only use memory pages that do not conflict with the pages 408 * we can only use memory pages that do not conflict with the pages
328 * which had been used before suspend. 409 * used before suspend.
329 * 410 *
330 * The unsafe pages are marked with the PG_nosave_free flag 411 * The unsafe pages are marked with the PG_nosave_free flag
331 * 412 * and we count them using unsafe_pages
332 * Allocated but unusable (ie eaten) memory pages should be marked
333 * so that swsusp_free() can release them
334 */ 413 */
335 414
336static inline void *alloc_image_page(gfp_t gfp_mask, int safe_needed) 415static inline void *alloc_image_page(gfp_t gfp_mask, int safe_needed)
337{ 416{
338 void *res; 417 void *res;
339 418
419 res = (void *)get_zeroed_page(gfp_mask);
340 if (safe_needed) 420 if (safe_needed)
341 do { 421 while (res && PageNosaveFree(virt_to_page(res))) {
422 /* The page is unsafe, mark it for swsusp_free() */
423 SetPageNosave(virt_to_page(res));
424 unsafe_pages++;
342 res = (void *)get_zeroed_page(gfp_mask); 425 res = (void *)get_zeroed_page(gfp_mask);
343 if (res && PageNosaveFree(virt_to_page(res))) { 426 }
344 /* This is for swsusp_free() */
345 SetPageNosave(virt_to_page(res));
346 ((struct eaten_page *)res)->next = eaten_pages;
347 eaten_pages = res;
348 }
349 } while (res && PageNosaveFree(virt_to_page(res)));
350 else
351 res = (void *)get_zeroed_page(gfp_mask);
352 if (res) { 427 if (res) {
353 SetPageNosave(virt_to_page(res)); 428 SetPageNosave(virt_to_page(res));
354 SetPageNosaveFree(virt_to_page(res)); 429 SetPageNosaveFree(virt_to_page(res));
@@ -374,7 +449,8 @@ unsigned long get_safe_page(gfp_t gfp_mask)
374 * On each page we set up a list of struct_pbe elements. 449 * On each page we set up a list of struct_pbe elements.
375 */ 450 */
376 451
377struct pbe *alloc_pagedir(unsigned int nr_pages, gfp_t gfp_mask, int safe_needed) 452static struct pbe *alloc_pagedir(unsigned int nr_pages, gfp_t gfp_mask,
453 int safe_needed)
378{ 454{
379 unsigned int num; 455 unsigned int num;
380 struct pbe *pblist, *pbe; 456 struct pbe *pblist, *pbe;
@@ -642,6 +718,8 @@ static int mark_unsafe_pages(struct pbe *pblist)
642 return -EFAULT; 718 return -EFAULT;
643 } 719 }
644 720
721 unsafe_pages = 0;
722
645 return 0; 723 return 0;
646} 724}
647 725
@@ -719,42 +797,99 @@ static inline struct pbe *unpack_orig_addresses(unsigned long *buf,
719} 797}
720 798
721/** 799/**
722 * create_image - use metadata contained in the PBE list 800 * prepare_image - use metadata contained in the PBE list
723 * pointed to by pagedir_nosave to mark the pages that will 801 * pointed to by pagedir_nosave to mark the pages that will
724 * be overwritten in the process of restoring the system 802 * be overwritten in the process of restoring the system
725 * memory state from the image and allocate memory for 803 * memory state from the image ("unsafe" pages) and allocate
726 * the image avoiding these pages 804 * memory for the image
805 *
806 * The idea is to allocate the PBE list first and then
807 * allocate as many pages as it's needed for the image data,
808 * but not to assign these pages to the PBEs initially.
809 * Instead, we just mark them as allocated and create a list
810 * of "safe" which will be used later
727 */ 811 */
728 812
729static int create_image(struct snapshot_handle *handle) 813struct safe_page {
814 struct safe_page *next;
815 char padding[PAGE_SIZE - sizeof(void *)];
816};
817
818static struct safe_page *safe_pages;
819
820static int prepare_image(struct snapshot_handle *handle)
730{ 821{
731 int error = 0; 822 int error = 0;
732 struct pbe *p, *pblist; 823 unsigned int nr_pages = nr_copy_pages;
824 struct pbe *p, *pblist = NULL;
733 825
734 p = pagedir_nosave; 826 p = pagedir_nosave;
735 error = mark_unsafe_pages(p); 827 error = mark_unsafe_pages(p);
736 if (!error) { 828 if (!error) {
737 pblist = alloc_pagedir(nr_copy_pages, GFP_ATOMIC, 1); 829 pblist = alloc_pagedir(nr_pages, GFP_ATOMIC, 1);
738 if (pblist) 830 if (pblist)
739 copy_page_backup_list(pblist, p); 831 copy_page_backup_list(pblist, p);
740 free_pagedir(p, 0); 832 free_pagedir(p, 0);
741 if (!pblist) 833 if (!pblist)
742 error = -ENOMEM; 834 error = -ENOMEM;
743 } 835 }
744 if (!error) 836 safe_pages = NULL;
745 error = alloc_data_pages(pblist, GFP_ATOMIC, 1); 837 if (!error && nr_pages > unsafe_pages) {
838 nr_pages -= unsafe_pages;
839 while (nr_pages--) {
840 struct safe_page *ptr;
841
842 ptr = (struct safe_page *)get_zeroed_page(GFP_ATOMIC);
843 if (!ptr) {
844 error = -ENOMEM;
845 break;
846 }
847 if (!PageNosaveFree(virt_to_page(ptr))) {
848 /* The page is "safe", add it to the list */
849 ptr->next = safe_pages;
850 safe_pages = ptr;
851 }
852 /* Mark the page as allocated */
853 SetPageNosave(virt_to_page(ptr));
854 SetPageNosaveFree(virt_to_page(ptr));
855 }
856 }
746 if (!error) { 857 if (!error) {
747 release_eaten_pages();
748 pagedir_nosave = pblist; 858 pagedir_nosave = pblist;
749 } else { 859 } else {
750 pagedir_nosave = NULL;
751 handle->pbe = NULL; 860 handle->pbe = NULL;
752 nr_copy_pages = 0; 861 swsusp_free();
753 nr_meta_pages = 0;
754 } 862 }
755 return error; 863 return error;
756} 864}
757 865
866static void *get_buffer(struct snapshot_handle *handle)
867{
868 struct pbe *pbe = handle->pbe, *last = handle->last_pbe;
869 struct page *page = virt_to_page(pbe->orig_address);
870
871 if (PageNosave(page) && PageNosaveFree(page)) {
872 /*
873 * We have allocated the "original" page frame and we can
874 * use it directly to store the read page
875 */
876 pbe->address = 0;
877 if (last && last->next)
878 last->next = NULL;
879 return (void *)pbe->orig_address;
880 }
881 /*
882 * The "original" page frame has not been allocated and we have to
883 * use a "safe" page frame to store the read page
884 */
885 pbe->address = (unsigned long)safe_pages;
886 safe_pages = safe_pages->next;
887 if (last)
888 last->next = pbe;
889 handle->last_pbe = pbe;
890 return (void *)pbe->address;
891}
892
758/** 893/**
759 * snapshot_write_next - used for writing the system memory snapshot. 894 * snapshot_write_next - used for writing the system memory snapshot.
760 * 895 *
@@ -799,15 +934,16 @@ int snapshot_write_next(struct snapshot_handle *handle, size_t count)
799 } else if (handle->prev <= nr_meta_pages) { 934 } else if (handle->prev <= nr_meta_pages) {
800 handle->pbe = unpack_orig_addresses(buffer, handle->pbe); 935 handle->pbe = unpack_orig_addresses(buffer, handle->pbe);
801 if (!handle->pbe) { 936 if (!handle->pbe) {
802 error = create_image(handle); 937 error = prepare_image(handle);
803 if (error) 938 if (error)
804 return error; 939 return error;
805 handle->pbe = pagedir_nosave; 940 handle->pbe = pagedir_nosave;
806 handle->buffer = (void *)handle->pbe->address; 941 handle->last_pbe = NULL;
942 handle->buffer = get_buffer(handle);
807 } 943 }
808 } else { 944 } else {
809 handle->pbe = handle->pbe->next; 945 handle->pbe = handle->pbe->next;
810 handle->buffer = (void *)handle->pbe->address; 946 handle->buffer = get_buffer(handle);
811 } 947 }
812 handle->prev = handle->page; 948 handle->prev = handle->page;
813 } 949 }
diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c
index c4016cbbd3..f0ee4e7780 100644
--- a/kernel/power/swsusp.c
+++ b/kernel/power/swsusp.c
@@ -62,16 +62,6 @@ unsigned long image_size = 500 * 1024 * 1024;
62 62
63int in_suspend __nosavedata = 0; 63int in_suspend __nosavedata = 0;
64 64
65#ifdef CONFIG_HIGHMEM
66unsigned int count_highmem_pages(void);
67int save_highmem(void);
68int restore_highmem(void);
69#else
70static int save_highmem(void) { return 0; }
71static int restore_highmem(void) { return 0; }
72static unsigned int count_highmem_pages(void) { return 0; }
73#endif
74
75/** 65/**
76 * The following functions are used for tracing the allocated 66 * The following functions are used for tracing the allocated
77 * swap pages, so that they can be freed in case of an error. 67 * swap pages, so that they can be freed in case of an error.
@@ -175,6 +165,12 @@ void free_all_swap_pages(int swap, struct bitmap_page *bitmap)
175 */ 165 */
176 166
177#define SHRINK_BITE 10000 167#define SHRINK_BITE 10000
168static inline unsigned long __shrink_memory(long tmp)
169{
170 if (tmp > SHRINK_BITE)
171 tmp = SHRINK_BITE;
172 return shrink_all_memory(tmp);
173}
178 174
179int swsusp_shrink_memory(void) 175int swsusp_shrink_memory(void)
180{ 176{
@@ -186,21 +182,23 @@ int swsusp_shrink_memory(void)
186 182
187 printk("Shrinking memory... "); 183 printk("Shrinking memory... ");
188 do { 184 do {
189 size = 2 * count_highmem_pages(); 185 size = 2 * count_special_pages();
190 size += size / 50 + count_data_pages(); 186 size += size / 50 + count_data_pages();
191 size += (size + PBES_PER_PAGE - 1) / PBES_PER_PAGE + 187 size += (size + PBES_PER_PAGE - 1) / PBES_PER_PAGE +
192 PAGES_FOR_IO; 188 PAGES_FOR_IO;
193 tmp = size; 189 tmp = size;
194 for_each_zone (zone) 190 for_each_zone (zone)
195 if (!is_highmem(zone)) 191 if (!is_highmem(zone) && populated_zone(zone)) {
196 tmp -= zone->free_pages; 192 tmp -= zone->free_pages;
193 tmp += zone->lowmem_reserve[ZONE_NORMAL];
194 }
197 if (tmp > 0) { 195 if (tmp > 0) {
198 tmp = shrink_all_memory(SHRINK_BITE); 196 tmp = __shrink_memory(tmp);
199 if (!tmp) 197 if (!tmp)
200 return -ENOMEM; 198 return -ENOMEM;
201 pages += tmp; 199 pages += tmp;
202 } else if (size > image_size / PAGE_SIZE) { 200 } else if (size > image_size / PAGE_SIZE) {
203 tmp = shrink_all_memory(SHRINK_BITE); 201 tmp = __shrink_memory(size - (image_size / PAGE_SIZE));
204 pages += tmp; 202 pages += tmp;
205 } 203 }
206 printk("\b%c", p[i++%4]); 204 printk("\b%c", p[i++%4]);
@@ -228,7 +226,7 @@ int swsusp_suspend(void)
228 goto Enable_irqs; 226 goto Enable_irqs;
229 } 227 }
230 228
231 if ((error = save_highmem())) { 229 if ((error = save_special_mem())) {
232 printk(KERN_ERR "swsusp: Not enough free pages for highmem\n"); 230 printk(KERN_ERR "swsusp: Not enough free pages for highmem\n");
233 goto Restore_highmem; 231 goto Restore_highmem;
234 } 232 }
@@ -239,7 +237,7 @@ int swsusp_suspend(void)
239 /* Restore control flow magically appears here */ 237 /* Restore control flow magically appears here */
240 restore_processor_state(); 238 restore_processor_state();
241Restore_highmem: 239Restore_highmem:
242 restore_highmem(); 240 restore_special_mem();
243 device_power_up(); 241 device_power_up();
244Enable_irqs: 242Enable_irqs:
245 local_irq_enable(); 243 local_irq_enable();
@@ -265,7 +263,7 @@ int swsusp_resume(void)
265 */ 263 */
266 swsusp_free(); 264 swsusp_free();
267 restore_processor_state(); 265 restore_processor_state();
268 restore_highmem(); 266 restore_special_mem();
269 touch_softlockup_watchdog(); 267 touch_softlockup_watchdog();
270 device_power_up(); 268 device_power_up();
271 local_irq_enable(); 269 local_irq_enable();
diff --git a/kernel/printk.c b/kernel/printk.c
index c056f33244..19a9556192 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -67,6 +67,7 @@ EXPORT_SYMBOL(oops_in_progress);
67 * driver system. 67 * driver system.
68 */ 68 */
69static DECLARE_MUTEX(console_sem); 69static DECLARE_MUTEX(console_sem);
70static DECLARE_MUTEX(secondary_console_sem);
70struct console *console_drivers; 71struct console *console_drivers;
71/* 72/*
72 * This is used for debugging the mess that is the VT code by 73 * This is used for debugging the mess that is the VT code by
@@ -76,7 +77,7 @@ struct console *console_drivers;
76 * path in the console code where we end up in places I want 77 * path in the console code where we end up in places I want
77 * locked without the console sempahore held 78 * locked without the console sempahore held
78 */ 79 */
79static int console_locked; 80static int console_locked, console_suspended;
80 81
81/* 82/*
82 * logbuf_lock protects log_buf, log_start, log_end, con_start and logged_chars 83 * logbuf_lock protects log_buf, log_start, log_end, con_start and logged_chars
@@ -698,6 +699,23 @@ int __init add_preferred_console(char *name, int idx, char *options)
698} 699}
699 700
700/** 701/**
702 * suspend_console - suspend the console subsystem
703 *
704 * This disables printk() while we go into suspend states
705 */
706void suspend_console(void)
707{
708 acquire_console_sem();
709 console_suspended = 1;
710}
711
712void resume_console(void)
713{
714 console_suspended = 0;
715 release_console_sem();
716}
717
718/**
701 * acquire_console_sem - lock the console system for exclusive use. 719 * acquire_console_sem - lock the console system for exclusive use.
702 * 720 *
703 * Acquires a semaphore which guarantees that the caller has 721 * Acquires a semaphore which guarantees that the caller has
@@ -708,6 +726,10 @@ int __init add_preferred_console(char *name, int idx, char *options)
708void acquire_console_sem(void) 726void acquire_console_sem(void)
709{ 727{
710 BUG_ON(in_interrupt()); 728 BUG_ON(in_interrupt());
729 if (console_suspended) {
730 down(&secondary_console_sem);
731 return;
732 }
711 down(&console_sem); 733 down(&console_sem);
712 console_locked = 1; 734 console_locked = 1;
713 console_may_schedule = 1; 735 console_may_schedule = 1;
@@ -750,6 +772,10 @@ void release_console_sem(void)
750 unsigned long _con_start, _log_end; 772 unsigned long _con_start, _log_end;
751 unsigned long wake_klogd = 0; 773 unsigned long wake_klogd = 0;
752 774
775 if (console_suspended) {
776 up(&secondary_console_sem);
777 return;
778 }
753 for ( ; ; ) { 779 for ( ; ; ) {
754 spin_lock_irqsave(&logbuf_lock, flags); 780 spin_lock_irqsave(&logbuf_lock, flags);
755 wake_klogd |= log_start - log_end; 781 wake_klogd |= log_start - log_end;
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index 2058f88c7b..20e9710fc2 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -612,14 +612,6 @@ void synchronize_rcu(void)
612 wait_for_completion(&rcu.completion); 612 wait_for_completion(&rcu.completion);
613} 613}
614 614
615/*
616 * Deprecated, use synchronize_rcu() or synchronize_sched() instead.
617 */
618void synchronize_kernel(void)
619{
620 synchronize_rcu();
621}
622
623module_param(blimit, int, 0); 615module_param(blimit, int, 0);
624module_param(qhimark, int, 0); 616module_param(qhimark, int, 0);
625module_param(qlowmark, int, 0); 617module_param(qlowmark, int, 0);
@@ -627,7 +619,6 @@ module_param(qlowmark, int, 0);
627module_param(rsinterval, int, 0); 619module_param(rsinterval, int, 0);
628#endif 620#endif
629EXPORT_SYMBOL_GPL(rcu_batches_completed); 621EXPORT_SYMBOL_GPL(rcu_batches_completed);
630EXPORT_SYMBOL_GPL_FUTURE(call_rcu); /* WARNING: GPL-only in April 2006. */ 622EXPORT_SYMBOL_GPL(call_rcu);
631EXPORT_SYMBOL_GPL_FUTURE(call_rcu_bh); /* WARNING: GPL-only in April 2006. */ 623EXPORT_SYMBOL_GPL(call_rcu_bh);
632EXPORT_SYMBOL_GPL(synchronize_rcu); 624EXPORT_SYMBOL_GPL(synchronize_rcu);
633EXPORT_SYMBOL_GPL_FUTURE(synchronize_kernel); /* WARNING: GPL-only in April 2006. */
diff --git a/kernel/sched.c b/kernel/sched.c
index c13f1bd2df..5dbc426944 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -3886,6 +3886,10 @@ long sched_setaffinity(pid_t pid, cpumask_t new_mask)
3886 !capable(CAP_SYS_NICE)) 3886 !capable(CAP_SYS_NICE))
3887 goto out_unlock; 3887 goto out_unlock;
3888 3888
3889 retval = security_task_setscheduler(p, 0, NULL);
3890 if (retval)
3891 goto out_unlock;
3892
3889 cpus_allowed = cpuset_cpus_allowed(p); 3893 cpus_allowed = cpuset_cpus_allowed(p);
3890 cpus_and(new_mask, new_mask, cpus_allowed); 3894 cpus_and(new_mask, new_mask, cpus_allowed);
3891 retval = set_cpus_allowed(p, new_mask); 3895 retval = set_cpus_allowed(p, new_mask);
@@ -3954,7 +3958,10 @@ long sched_getaffinity(pid_t pid, cpumask_t *mask)
3954 if (!p) 3958 if (!p)
3955 goto out_unlock; 3959 goto out_unlock;
3956 3960
3957 retval = 0; 3961 retval = security_task_getscheduler(p);
3962 if (retval)
3963 goto out_unlock;
3964
3958 cpus_and(*mask, p->cpus_allowed, cpu_online_map); 3965 cpus_and(*mask, p->cpus_allowed, cpu_online_map);
3959 3966
3960out_unlock: 3967out_unlock:
@@ -4046,6 +4053,9 @@ asmlinkage long sys_sched_yield(void)
4046 4053
4047static inline void __cond_resched(void) 4054static inline void __cond_resched(void)
4048{ 4055{
4056#ifdef CONFIG_DEBUG_SPINLOCK_SLEEP
4057 __might_sleep(__FILE__, __LINE__);
4058#endif
4049 /* 4059 /*
4050 * The BKS might be reacquired before we have dropped 4060 * The BKS might be reacquired before we have dropped
4051 * PREEMPT_ACTIVE, which could trigger a second 4061 * PREEMPT_ACTIVE, which could trigger a second
diff --git a/kernel/signal.c b/kernel/signal.c
index e5f8aea78f..1b3c921737 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -23,12 +23,12 @@
23#include <linux/syscalls.h> 23#include <linux/syscalls.h>
24#include <linux/ptrace.h> 24#include <linux/ptrace.h>
25#include <linux/signal.h> 25#include <linux/signal.h>
26#include <linux/audit.h>
27#include <linux/capability.h> 26#include <linux/capability.h>
28#include <asm/param.h> 27#include <asm/param.h>
29#include <asm/uaccess.h> 28#include <asm/uaccess.h>
30#include <asm/unistd.h> 29#include <asm/unistd.h>
31#include <asm/siginfo.h> 30#include <asm/siginfo.h>
31#include "audit.h" /* audit_signal_info() */
32 32
33/* 33/*
34 * SLAB caches for signal bits. 34 * SLAB caches for signal bits.
diff --git a/kernel/sys.c b/kernel/sys.c
index 0b6ec0e793..90930b28d2 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -13,7 +13,6 @@
13#include <linux/notifier.h> 13#include <linux/notifier.h>
14#include <linux/reboot.h> 14#include <linux/reboot.h>
15#include <linux/prctl.h> 15#include <linux/prctl.h>
16#include <linux/init.h>
17#include <linux/highuid.h> 16#include <linux/highuid.h>
18#include <linux/fs.h> 17#include <linux/fs.h>
19#include <linux/kernel.h> 18#include <linux/kernel.h>
@@ -57,6 +56,12 @@
57#ifndef GET_FPEXC_CTL 56#ifndef GET_FPEXC_CTL
58# define GET_FPEXC_CTL(a,b) (-EINVAL) 57# define GET_FPEXC_CTL(a,b) (-EINVAL)
59#endif 58#endif
59#ifndef GET_ENDIAN
60# define GET_ENDIAN(a,b) (-EINVAL)
61#endif
62#ifndef SET_ENDIAN
63# define SET_ENDIAN(a,b) (-EINVAL)
64#endif
60 65
61/* 66/*
62 * this is where the system-wide overflow UID and GID are defined, for 67 * this is where the system-wide overflow UID and GID are defined, for
@@ -1860,23 +1865,20 @@ out:
1860 * fields when reaping, so a sample either gets all the additions of a 1865 * fields when reaping, so a sample either gets all the additions of a
1861 * given child after it's reaped, or none so this sample is before reaping. 1866 * given child after it's reaped, or none so this sample is before reaping.
1862 * 1867 *
1863 * tasklist_lock locking optimisation: 1868 * Locking:
1864 * If we are current and single threaded, we do not need to take the tasklist 1869 * We need to take the siglock for CHILDEREN, SELF and BOTH
1865 * lock or the siglock. No one else can take our signal_struct away, 1870 * for the cases current multithreaded, non-current single threaded
1866 * no one else can reap the children to update signal->c* counters, and 1871 * non-current multithreaded. Thread traversal is now safe with
1867 * no one else can race with the signal-> fields. 1872 * the siglock held.
1868 * If we do not take the tasklist_lock, the signal-> fields could be read 1873 * Strictly speaking, we donot need to take the siglock if we are current and
1869 * out of order while another thread was just exiting. So we place a 1874 * single threaded, as no one else can take our signal_struct away, no one
1870 * read memory barrier when we avoid the lock. On the writer side, 1875 * else can reap the children to update signal->c* counters, and no one else
1871 * write memory barrier is implied in __exit_signal as __exit_signal releases 1876 * can race with the signal-> fields. If we do not take any lock, the
1872 * the siglock spinlock after updating the signal-> fields. 1877 * signal-> fields could be read out of order while another thread was just
1873 * 1878 * exiting. So we should place a read memory barrier when we avoid the lock.
1874 * We don't really need the siglock when we access the non c* fields 1879 * On the writer side, write memory barrier is implied in __exit_signal
1875 * of the signal_struct (for RUSAGE_SELF) even in multithreaded 1880 * as __exit_signal releases the siglock spinlock after updating the signal->
1876 * case, since we take the tasklist lock for read and the non c* signal-> 1881 * fields. But we don't do this yet to keep things simple.
1877 * fields are updated only in __exit_signal, which is called with
1878 * tasklist_lock taken for write, hence these two threads cannot execute
1879 * concurrently.
1880 * 1882 *
1881 */ 1883 */
1882 1884
@@ -1885,35 +1887,25 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
1885 struct task_struct *t; 1887 struct task_struct *t;
1886 unsigned long flags; 1888 unsigned long flags;
1887 cputime_t utime, stime; 1889 cputime_t utime, stime;
1888 int need_lock = 0;
1889 1890
1890 memset((char *) r, 0, sizeof *r); 1891 memset((char *) r, 0, sizeof *r);
1891 utime = stime = cputime_zero; 1892 utime = stime = cputime_zero;
1892 1893
1893 if (p != current || !thread_group_empty(p)) 1894 rcu_read_lock();
1894 need_lock = 1; 1895 if (!lock_task_sighand(p, &flags)) {
1895 1896 rcu_read_unlock();
1896 if (need_lock) { 1897 return;
1897 read_lock(&tasklist_lock); 1898 }
1898 if (unlikely(!p->signal)) {
1899 read_unlock(&tasklist_lock);
1900 return;
1901 }
1902 } else
1903 /* See locking comments above */
1904 smp_rmb();
1905 1899
1906 switch (who) { 1900 switch (who) {
1907 case RUSAGE_BOTH: 1901 case RUSAGE_BOTH:
1908 case RUSAGE_CHILDREN: 1902 case RUSAGE_CHILDREN:
1909 spin_lock_irqsave(&p->sighand->siglock, flags);
1910 utime = p->signal->cutime; 1903 utime = p->signal->cutime;
1911 stime = p->signal->cstime; 1904 stime = p->signal->cstime;
1912 r->ru_nvcsw = p->signal->cnvcsw; 1905 r->ru_nvcsw = p->signal->cnvcsw;
1913 r->ru_nivcsw = p->signal->cnivcsw; 1906 r->ru_nivcsw = p->signal->cnivcsw;
1914 r->ru_minflt = p->signal->cmin_flt; 1907 r->ru_minflt = p->signal->cmin_flt;
1915 r->ru_majflt = p->signal->cmaj_flt; 1908 r->ru_majflt = p->signal->cmaj_flt;
1916 spin_unlock_irqrestore(&p->sighand->siglock, flags);
1917 1909
1918 if (who == RUSAGE_CHILDREN) 1910 if (who == RUSAGE_CHILDREN)
1919 break; 1911 break;
@@ -1941,8 +1933,9 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
1941 BUG(); 1933 BUG();
1942 } 1934 }
1943 1935
1944 if (need_lock) 1936 unlock_task_sighand(p, &flags);
1945 read_unlock(&tasklist_lock); 1937 rcu_read_unlock();
1938
1946 cputime_to_timeval(utime, &r->ru_utime); 1939 cputime_to_timeval(utime, &r->ru_utime);
1947 cputime_to_timeval(stime, &r->ru_stime); 1940 cputime_to_timeval(stime, &r->ru_stime);
1948} 1941}
@@ -2057,6 +2050,13 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3,
2057 return -EFAULT; 2050 return -EFAULT;
2058 return 0; 2051 return 0;
2059 } 2052 }
2053 case PR_GET_ENDIAN:
2054 error = GET_ENDIAN(current, arg2);
2055 break;
2056 case PR_SET_ENDIAN:
2057 error = SET_ENDIAN(current, arg2);
2058 break;
2059
2060 default: 2060 default:
2061 error = -EINVAL; 2061 error = -EINVAL;
2062 break; 2062 break;
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 5433195040..6991bece67 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -87,6 +87,7 @@ cond_syscall(sys_inotify_init);
87cond_syscall(sys_inotify_add_watch); 87cond_syscall(sys_inotify_add_watch);
88cond_syscall(sys_inotify_rm_watch); 88cond_syscall(sys_inotify_rm_watch);
89cond_syscall(sys_migrate_pages); 89cond_syscall(sys_migrate_pages);
90cond_syscall(sys_move_pages);
90cond_syscall(sys_chown16); 91cond_syscall(sys_chown16);
91cond_syscall(sys_fchown16); 92cond_syscall(sys_fchown16);
92cond_syscall(sys_getegid16); 93cond_syscall(sys_getegid16);
@@ -132,3 +133,4 @@ cond_syscall(sys_mincore);
132cond_syscall(sys_madvise); 133cond_syscall(sys_madvise);
133cond_syscall(sys_mremap); 134cond_syscall(sys_mremap);
134cond_syscall(sys_remap_file_pages); 135cond_syscall(sys_remap_file_pages);
136cond_syscall(compat_sys_move_pages);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index e82726faee..eb8bd214e7 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -59,6 +59,7 @@ extern int proc_nr_files(ctl_table *table, int write, struct file *filp,
59extern int C_A_D; 59extern int C_A_D;
60extern int sysctl_overcommit_memory; 60extern int sysctl_overcommit_memory;
61extern int sysctl_overcommit_ratio; 61extern int sysctl_overcommit_ratio;
62extern int sysctl_panic_on_oom;
62extern int max_threads; 63extern int max_threads;
63extern int sysrq_enabled; 64extern int sysrq_enabled;
64extern int core_uses_pid; 65extern int core_uses_pid;
@@ -150,7 +151,7 @@ extern ctl_table random_table[];
150#ifdef CONFIG_UNIX98_PTYS 151#ifdef CONFIG_UNIX98_PTYS
151extern ctl_table pty_table[]; 152extern ctl_table pty_table[];
152#endif 153#endif
153#ifdef CONFIG_INOTIFY 154#ifdef CONFIG_INOTIFY_USER
154extern ctl_table inotify_table[]; 155extern ctl_table inotify_table[];
155#endif 156#endif
156 157
@@ -398,7 +399,7 @@ static ctl_table kern_table[] = {
398 .strategy = &sysctl_string, 399 .strategy = &sysctl_string,
399 }, 400 },
400#endif 401#endif
401#ifdef CONFIG_HOTPLUG 402#if defined(CONFIG_HOTPLUG) && defined(CONFIG_NET)
402 { 403 {
403 .ctl_name = KERN_HOTPLUG, 404 .ctl_name = KERN_HOTPLUG,
404 .procname = "hotplug", 405 .procname = "hotplug",
@@ -702,6 +703,14 @@ static ctl_table vm_table[] = {
702 .proc_handler = &proc_dointvec, 703 .proc_handler = &proc_dointvec,
703 }, 704 },
704 { 705 {
706 .ctl_name = VM_PANIC_ON_OOM,
707 .procname = "panic_on_oom",
708 .data = &sysctl_panic_on_oom,
709 .maxlen = sizeof(sysctl_panic_on_oom),
710 .mode = 0644,
711 .proc_handler = &proc_dointvec,
712 },
713 {
705 .ctl_name = VM_OVERCOMMIT_RATIO, 714 .ctl_name = VM_OVERCOMMIT_RATIO,
706 .procname = "overcommit_ratio", 715 .procname = "overcommit_ratio",
707 .data = &sysctl_overcommit_ratio, 716 .data = &sysctl_overcommit_ratio,
@@ -1028,7 +1037,7 @@ static ctl_table fs_table[] = {
1028 .mode = 0644, 1037 .mode = 0644,
1029 .proc_handler = &proc_doulongvec_minmax, 1038 .proc_handler = &proc_doulongvec_minmax,
1030 }, 1039 },
1031#ifdef CONFIG_INOTIFY 1040#ifdef CONFIG_INOTIFY_USER
1032 { 1041 {
1033 .ctl_name = FS_INOTIFY, 1042 .ctl_name = FS_INOTIFY,
1034 .procname = "inotify", 1043 .procname = "inotify",
diff --git a/kernel/timer.c b/kernel/timer.c
index 9e49deed46..f35b3939e9 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -383,23 +383,19 @@ EXPORT_SYMBOL(del_timer_sync);
383static int cascade(tvec_base_t *base, tvec_t *tv, int index) 383static int cascade(tvec_base_t *base, tvec_t *tv, int index)
384{ 384{
385 /* cascade all the timers from tv up one level */ 385 /* cascade all the timers from tv up one level */
386 struct list_head *head, *curr; 386 struct timer_list *timer, *tmp;
387 struct list_head tv_list;
388
389 list_replace_init(tv->vec + index, &tv_list);
387 390
388 head = tv->vec + index;
389 curr = head->next;
390 /* 391 /*
391 * We are removing _all_ timers from the list, so we don't have to 392 * We are removing _all_ timers from the list, so we
392 * detach them individually, just clear the list afterwards. 393 * don't have to detach them individually.
393 */ 394 */
394 while (curr != head) { 395 list_for_each_entry_safe(timer, tmp, &tv_list, entry) {
395 struct timer_list *tmp; 396 BUG_ON(timer->base != base);
396 397 internal_add_timer(base, timer);
397 tmp = list_entry(curr, struct timer_list, entry);
398 BUG_ON(tmp->base != base);
399 curr = curr->next;
400 internal_add_timer(base, tmp);
401 } 398 }
402 INIT_LIST_HEAD(head);
403 399
404 return index; 400 return index;
405} 401}
@@ -419,10 +415,10 @@ static inline void __run_timers(tvec_base_t *base)
419 415
420 spin_lock_irq(&base->lock); 416 spin_lock_irq(&base->lock);
421 while (time_after_eq(jiffies, base->timer_jiffies)) { 417 while (time_after_eq(jiffies, base->timer_jiffies)) {
422 struct list_head work_list = LIST_HEAD_INIT(work_list); 418 struct list_head work_list;
423 struct list_head *head = &work_list; 419 struct list_head *head = &work_list;
424 int index = base->timer_jiffies & TVR_MASK; 420 int index = base->timer_jiffies & TVR_MASK;
425 421
426 /* 422 /*
427 * Cascade timers: 423 * Cascade timers:
428 */ 424 */
@@ -431,8 +427,8 @@ static inline void __run_timers(tvec_base_t *base)
431 (!cascade(base, &base->tv3, INDEX(1))) && 427 (!cascade(base, &base->tv3, INDEX(1))) &&
432 !cascade(base, &base->tv4, INDEX(2))) 428 !cascade(base, &base->tv4, INDEX(2)))
433 cascade(base, &base->tv5, INDEX(3)); 429 cascade(base, &base->tv5, INDEX(3));
434 ++base->timer_jiffies; 430 ++base->timer_jiffies;
435 list_splice_init(base->tv1.vec + index, &work_list); 431 list_replace_init(base->tv1.vec + index, &work_list);
436 while (!list_empty(head)) { 432 while (!list_empty(head)) {
437 void (*fn)(unsigned long); 433 void (*fn)(unsigned long);
438 unsigned long data; 434 unsigned long data;
diff --git a/kernel/user.c b/kernel/user.c
index 2116642f42..6408c04242 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -140,7 +140,7 @@ struct user_struct * alloc_uid(uid_t uid)
140 atomic_set(&new->processes, 0); 140 atomic_set(&new->processes, 0);
141 atomic_set(&new->files, 0); 141 atomic_set(&new->files, 0);
142 atomic_set(&new->sigpending, 0); 142 atomic_set(&new->sigpending, 0);
143#ifdef CONFIG_INOTIFY 143#ifdef CONFIG_INOTIFY_USER
144 atomic_set(&new->inotify_watches, 0); 144 atomic_set(&new->inotify_watches, 0);
145 atomic_set(&new->inotify_devs, 0); 145 atomic_set(&new->inotify_devs, 0);
146#endif 146#endif
@@ -148,7 +148,7 @@ struct user_struct * alloc_uid(uid_t uid)
148 new->mq_bytes = 0; 148 new->mq_bytes = 0;
149 new->locked_shm = 0; 149 new->locked_shm = 0;
150 150
151 if (alloc_uid_keyring(new) < 0) { 151 if (alloc_uid_keyring(new, current) < 0) {
152 kmem_cache_free(uid_cachep, new); 152 kmem_cache_free(uid_cachep, new);
153 return NULL; 153 return NULL;
154 } 154 }
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 880fb415a8..740c5abceb 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -531,11 +531,11 @@ int current_is_keventd(void)
531static void take_over_work(struct workqueue_struct *wq, unsigned int cpu) 531static void take_over_work(struct workqueue_struct *wq, unsigned int cpu)
532{ 532{
533 struct cpu_workqueue_struct *cwq = per_cpu_ptr(wq->cpu_wq, cpu); 533 struct cpu_workqueue_struct *cwq = per_cpu_ptr(wq->cpu_wq, cpu);
534 LIST_HEAD(list); 534 struct list_head list;
535 struct work_struct *work; 535 struct work_struct *work;
536 536
537 spin_lock_irq(&cwq->lock); 537 spin_lock_irq(&cwq->lock);
538 list_splice_init(&cwq->worklist, &list); 538 list_replace_init(&cwq->worklist, &list);
539 539
540 while (!list_empty(&list)) { 540 while (!list_empty(&list)) {
541 printk("Taking work for %s\n", wq->name); 541 printk("Taking work for %s\n", wq->name);