aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile3
-rw-r--r--kernel/acct.c6
-rw-r--r--kernel/audit.c146
-rw-r--r--kernel/audit.h43
-rw-r--r--kernel/audit_tree.c66
-rw-r--r--kernel/audit_watch.c543
-rw-r--r--kernel/auditfilter.c518
-rw-r--r--kernel/auditsc.c33
-rw-r--r--kernel/exit.c1
-rw-r--r--kernel/fork.c1
-rw-r--r--kernel/futex.c45
-rw-r--r--kernel/kmod.c1
-rw-r--r--kernel/kprobes.c6
-rw-r--r--kernel/module.c6
-rw-r--r--kernel/perf_counter.c322
-rw-r--r--kernel/pid.c7
-rw-r--r--kernel/ptrace.c4
-rw-r--r--kernel/resource.c2
-rw-r--r--kernel/sched.c14
-rw-r--r--kernel/sysctl.c13
-rw-r--r--kernel/time/timer_stats.c16
-rw-r--r--kernel/timer.c2
-rw-r--r--kernel/trace/Kconfig6
-rw-r--r--kernel/trace/ftrace.c60
-rw-r--r--kernel/trace/ring_buffer.c11
-rw-r--r--kernel/trace/trace.c23
-rw-r--r--kernel/trace/trace.h7
-rw-r--r--kernel/trace/trace_event_types.h3
-rw-r--r--kernel/trace/trace_events.c28
-rw-r--r--kernel/trace/trace_functions.c3
-rw-r--r--kernel/trace/trace_output.c3
-rw-r--r--kernel/trace/trace_printk.c26
-rw-r--r--kernel/trace/trace_stack.c4
-rw-r--r--kernel/trace/trace_stat.c6
34 files changed, 1173 insertions, 805 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index 0a32cb21ec97..2093a691f1c2 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -69,7 +69,7 @@ obj-$(CONFIG_IKCONFIG) += configs.o
69obj-$(CONFIG_RESOURCE_COUNTERS) += res_counter.o 69obj-$(CONFIG_RESOURCE_COUNTERS) += res_counter.o
70obj-$(CONFIG_STOP_MACHINE) += stop_machine.o 70obj-$(CONFIG_STOP_MACHINE) += stop_machine.o
71obj-$(CONFIG_KPROBES_SANITY_TEST) += test_kprobes.o 71obj-$(CONFIG_KPROBES_SANITY_TEST) += test_kprobes.o
72obj-$(CONFIG_AUDIT) += audit.o auditfilter.o 72obj-$(CONFIG_AUDIT) += audit.o auditfilter.o audit_watch.o
73obj-$(CONFIG_AUDITSYSCALL) += auditsc.o 73obj-$(CONFIG_AUDITSYSCALL) += auditsc.o
74obj-$(CONFIG_GCOV_KERNEL) += gcov/ 74obj-$(CONFIG_GCOV_KERNEL) += gcov/
75obj-$(CONFIG_AUDIT_TREE) += audit_tree.o 75obj-$(CONFIG_AUDIT_TREE) += audit_tree.o
@@ -96,6 +96,7 @@ obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o
96obj-$(CONFIG_FUNCTION_TRACER) += trace/ 96obj-$(CONFIG_FUNCTION_TRACER) += trace/
97obj-$(CONFIG_TRACING) += trace/ 97obj-$(CONFIG_TRACING) += trace/
98obj-$(CONFIG_X86_DS) += trace/ 98obj-$(CONFIG_X86_DS) += trace/
99obj-$(CONFIG_RING_BUFFER) += trace/
99obj-$(CONFIG_SMP) += sched_cpupri.o 100obj-$(CONFIG_SMP) += sched_cpupri.o
100obj-$(CONFIG_SLOW_WORK) += slow-work.o 101obj-$(CONFIG_SLOW_WORK) += slow-work.o
101obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o 102obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o
diff --git a/kernel/acct.c b/kernel/acct.c
index 7afa31564162..9f3391090b3e 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -215,6 +215,7 @@ static void acct_file_reopen(struct bsd_acct_struct *acct, struct file *file,
215static int acct_on(char *name) 215static int acct_on(char *name)
216{ 216{
217 struct file *file; 217 struct file *file;
218 struct vfsmount *mnt;
218 int error; 219 int error;
219 struct pid_namespace *ns; 220 struct pid_namespace *ns;
220 struct bsd_acct_struct *acct = NULL; 221 struct bsd_acct_struct *acct = NULL;
@@ -256,11 +257,12 @@ static int acct_on(char *name)
256 acct = NULL; 257 acct = NULL;
257 } 258 }
258 259
259 mnt_pin(file->f_path.mnt); 260 mnt = file->f_path.mnt;
261 mnt_pin(mnt);
260 acct_file_reopen(ns->bacct, file, ns); 262 acct_file_reopen(ns->bacct, file, ns);
261 spin_unlock(&acct_lock); 263 spin_unlock(&acct_lock);
262 264
263 mntput(file->f_path.mnt); /* it's pinned, now give up active reference */ 265 mntput(mnt); /* it's pinned, now give up active reference */
264 kfree(acct); 266 kfree(acct);
265 267
266 return 0; 268 return 0;
diff --git a/kernel/audit.c b/kernel/audit.c
index 9442c3533ba9..defc2e6f1e3b 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -115,9 +115,6 @@ static atomic_t audit_lost = ATOMIC_INIT(0);
115/* The netlink socket. */ 115/* The netlink socket. */
116static struct sock *audit_sock; 116static struct sock *audit_sock;
117 117
118/* Inotify handle. */
119struct inotify_handle *audit_ih;
120
121/* Hash for inode-based rules */ 118/* Hash for inode-based rules */
122struct list_head audit_inode_hash[AUDIT_INODE_BUCKETS]; 119struct list_head audit_inode_hash[AUDIT_INODE_BUCKETS];
123 120
@@ -136,7 +133,7 @@ static DECLARE_WAIT_QUEUE_HEAD(kauditd_wait);
136static DECLARE_WAIT_QUEUE_HEAD(audit_backlog_wait); 133static DECLARE_WAIT_QUEUE_HEAD(audit_backlog_wait);
137 134
138/* Serialize requests from userspace. */ 135/* Serialize requests from userspace. */
139static DEFINE_MUTEX(audit_cmd_mutex); 136DEFINE_MUTEX(audit_cmd_mutex);
140 137
141/* AUDIT_BUFSIZ is the size of the temporary buffer used for formatting 138/* AUDIT_BUFSIZ is the size of the temporary buffer used for formatting
142 * audit records. Since printk uses a 1024 byte buffer, this buffer 139 * audit records. Since printk uses a 1024 byte buffer, this buffer
@@ -375,6 +372,25 @@ static void audit_hold_skb(struct sk_buff *skb)
375 kfree_skb(skb); 372 kfree_skb(skb);
376} 373}
377 374
375/*
376 * For one reason or another this nlh isn't getting delivered to the userspace
377 * audit daemon, just send it to printk.
378 */
379static void audit_printk_skb(struct sk_buff *skb)
380{
381 struct nlmsghdr *nlh = nlmsg_hdr(skb);
382 char *data = NLMSG_DATA(nlh);
383
384 if (nlh->nlmsg_type != AUDIT_EOE) {
385 if (printk_ratelimit())
386 printk(KERN_NOTICE "type=%d %s\n", nlh->nlmsg_type, data);
387 else
388 audit_log_lost("printk limit exceeded\n");
389 }
390
391 audit_hold_skb(skb);
392}
393
378static void kauditd_send_skb(struct sk_buff *skb) 394static void kauditd_send_skb(struct sk_buff *skb)
379{ 395{
380 int err; 396 int err;
@@ -427,14 +443,8 @@ static int kauditd_thread(void *dummy)
427 if (skb) { 443 if (skb) {
428 if (audit_pid) 444 if (audit_pid)
429 kauditd_send_skb(skb); 445 kauditd_send_skb(skb);
430 else { 446 else
431 if (printk_ratelimit()) 447 audit_printk_skb(skb);
432 printk(KERN_NOTICE "%s\n", skb->data + NLMSG_SPACE(0));
433 else
434 audit_log_lost("printk limit exceeded\n");
435
436 audit_hold_skb(skb);
437 }
438 } else { 448 } else {
439 DECLARE_WAITQUEUE(wait, current); 449 DECLARE_WAITQUEUE(wait, current);
440 set_current_state(TASK_INTERRUPTIBLE); 450 set_current_state(TASK_INTERRUPTIBLE);
@@ -495,42 +505,25 @@ int audit_send_list(void *_dest)
495 return 0; 505 return 0;
496} 506}
497 507
498#ifdef CONFIG_AUDIT_TREE
499static int prune_tree_thread(void *unused)
500{
501 mutex_lock(&audit_cmd_mutex);
502 audit_prune_trees();
503 mutex_unlock(&audit_cmd_mutex);
504 return 0;
505}
506
507void audit_schedule_prune(void)
508{
509 kthread_run(prune_tree_thread, NULL, "audit_prune_tree");
510}
511#endif
512
513struct sk_buff *audit_make_reply(int pid, int seq, int type, int done, 508struct sk_buff *audit_make_reply(int pid, int seq, int type, int done,
514 int multi, void *payload, int size) 509 int multi, void *payload, int size)
515{ 510{
516 struct sk_buff *skb; 511 struct sk_buff *skb;
517 struct nlmsghdr *nlh; 512 struct nlmsghdr *nlh;
518 int len = NLMSG_SPACE(size);
519 void *data; 513 void *data;
520 int flags = multi ? NLM_F_MULTI : 0; 514 int flags = multi ? NLM_F_MULTI : 0;
521 int t = done ? NLMSG_DONE : type; 515 int t = done ? NLMSG_DONE : type;
522 516
523 skb = alloc_skb(len, GFP_KERNEL); 517 skb = nlmsg_new(size, GFP_KERNEL);
524 if (!skb) 518 if (!skb)
525 return NULL; 519 return NULL;
526 520
527 nlh = NLMSG_PUT(skb, pid, seq, t, size); 521 nlh = NLMSG_NEW(skb, pid, seq, t, size, flags);
528 nlh->nlmsg_flags = flags; 522 data = NLMSG_DATA(nlh);
529 data = NLMSG_DATA(nlh);
530 memcpy(data, payload, size); 523 memcpy(data, payload, size);
531 return skb; 524 return skb;
532 525
533nlmsg_failure: /* Used by NLMSG_PUT */ 526nlmsg_failure: /* Used by NLMSG_NEW */
534 if (skb) 527 if (skb)
535 kfree_skb(skb); 528 kfree_skb(skb);
536 return NULL; 529 return NULL;
@@ -926,28 +919,29 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
926} 919}
927 920
928/* 921/*
929 * Get message from skb (based on rtnetlink_rcv_skb). Each message is 922 * Get message from skb. Each message is processed by audit_receive_msg.
930 * processed by audit_receive_msg. Malformed skbs with wrong length are 923 * Malformed skbs with wrong length are discarded silently.
931 * discarded silently.
932 */ 924 */
933static void audit_receive_skb(struct sk_buff *skb) 925static void audit_receive_skb(struct sk_buff *skb)
934{ 926{
935 int err; 927 struct nlmsghdr *nlh;
936 struct nlmsghdr *nlh; 928 /*
937 u32 rlen; 929 * len MUST be signed for NLMSG_NEXT to be able to dec it below 0
930 * if the nlmsg_len was not aligned
931 */
932 int len;
933 int err;
938 934
939 while (skb->len >= NLMSG_SPACE(0)) { 935 nlh = nlmsg_hdr(skb);
940 nlh = nlmsg_hdr(skb); 936 len = skb->len;
941 if (nlh->nlmsg_len < sizeof(*nlh) || skb->len < nlh->nlmsg_len) 937
942 return; 938 while (NLMSG_OK(nlh, len)) {
943 rlen = NLMSG_ALIGN(nlh->nlmsg_len); 939 err = audit_receive_msg(skb, nlh);
944 if (rlen > skb->len) 940 /* if err or if this message says it wants a response */
945 rlen = skb->len; 941 if (err || (nlh->nlmsg_flags & NLM_F_ACK))
946 if ((err = audit_receive_msg(skb, nlh))) {
947 netlink_ack(skb, nlh, err); 942 netlink_ack(skb, nlh, err);
948 } else if (nlh->nlmsg_flags & NLM_F_ACK) 943
949 netlink_ack(skb, nlh, 0); 944 nlh = NLMSG_NEXT(nlh, len);
950 skb_pull(skb, rlen);
951 } 945 }
952} 946}
953 947
@@ -959,13 +953,6 @@ static void audit_receive(struct sk_buff *skb)
959 mutex_unlock(&audit_cmd_mutex); 953 mutex_unlock(&audit_cmd_mutex);
960} 954}
961 955
962#ifdef CONFIG_AUDITSYSCALL
963static const struct inotify_operations audit_inotify_ops = {
964 .handle_event = audit_handle_ievent,
965 .destroy_watch = audit_free_parent,
966};
967#endif
968
969/* Initialize audit support at boot time. */ 956/* Initialize audit support at boot time. */
970static int __init audit_init(void) 957static int __init audit_init(void)
971{ 958{
@@ -991,12 +978,6 @@ static int __init audit_init(void)
991 978
992 audit_log(NULL, GFP_KERNEL, AUDIT_KERNEL, "initialized"); 979 audit_log(NULL, GFP_KERNEL, AUDIT_KERNEL, "initialized");
993 980
994#ifdef CONFIG_AUDITSYSCALL
995 audit_ih = inotify_init(&audit_inotify_ops);
996 if (IS_ERR(audit_ih))
997 audit_panic("cannot initialize inotify handle");
998#endif
999
1000 for (i = 0; i < AUDIT_INODE_BUCKETS; i++) 981 for (i = 0; i < AUDIT_INODE_BUCKETS; i++)
1001 INIT_LIST_HEAD(&audit_inode_hash[i]); 982 INIT_LIST_HEAD(&audit_inode_hash[i]);
1002 983
@@ -1070,18 +1051,20 @@ static struct audit_buffer * audit_buffer_alloc(struct audit_context *ctx,
1070 goto err; 1051 goto err;
1071 } 1052 }
1072 1053
1073 ab->skb = alloc_skb(AUDIT_BUFSIZ, gfp_mask);
1074 if (!ab->skb)
1075 goto err;
1076
1077 ab->ctx = ctx; 1054 ab->ctx = ctx;
1078 ab->gfp_mask = gfp_mask; 1055 ab->gfp_mask = gfp_mask;
1079 nlh = (struct nlmsghdr *)skb_put(ab->skb, NLMSG_SPACE(0)); 1056
1080 nlh->nlmsg_type = type; 1057 ab->skb = nlmsg_new(AUDIT_BUFSIZ, gfp_mask);
1081 nlh->nlmsg_flags = 0; 1058 if (!ab->skb)
1082 nlh->nlmsg_pid = 0; 1059 goto nlmsg_failure;
1083 nlh->nlmsg_seq = 0; 1060
1061 nlh = NLMSG_NEW(ab->skb, 0, 0, type, 0, 0);
1062
1084 return ab; 1063 return ab;
1064
1065nlmsg_failure: /* Used by NLMSG_NEW */
1066 kfree_skb(ab->skb);
1067 ab->skb = NULL;
1085err: 1068err:
1086 audit_buffer_free(ab); 1069 audit_buffer_free(ab);
1087 return NULL; 1070 return NULL;
@@ -1452,6 +1435,15 @@ void audit_log_d_path(struct audit_buffer *ab, const char *prefix,
1452 kfree(pathname); 1435 kfree(pathname);
1453} 1436}
1454 1437
1438void audit_log_key(struct audit_buffer *ab, char *key)
1439{
1440 audit_log_format(ab, " key=");
1441 if (key)
1442 audit_log_untrustedstring(ab, key);
1443 else
1444 audit_log_format(ab, "(null)");
1445}
1446
1455/** 1447/**
1456 * audit_log_end - end one audit record 1448 * audit_log_end - end one audit record
1457 * @ab: the audit_buffer 1449 * @ab: the audit_buffer
@@ -1475,15 +1467,7 @@ void audit_log_end(struct audit_buffer *ab)
1475 skb_queue_tail(&audit_skb_queue, ab->skb); 1467 skb_queue_tail(&audit_skb_queue, ab->skb);
1476 wake_up_interruptible(&kauditd_wait); 1468 wake_up_interruptible(&kauditd_wait);
1477 } else { 1469 } else {
1478 if (nlh->nlmsg_type != AUDIT_EOE) { 1470 audit_printk_skb(ab->skb);
1479 if (printk_ratelimit()) {
1480 printk(KERN_NOTICE "type=%d %s\n",
1481 nlh->nlmsg_type,
1482 ab->skb->data + NLMSG_SPACE(0));
1483 } else
1484 audit_log_lost("printk limit exceeded\n");
1485 }
1486 audit_hold_skb(ab->skb);
1487 } 1471 }
1488 ab->skb = NULL; 1472 ab->skb = NULL;
1489 } 1473 }
diff --git a/kernel/audit.h b/kernel/audit.h
index 16f18cac661b..208687be4f30 100644
--- a/kernel/audit.h
+++ b/kernel/audit.h
@@ -53,18 +53,7 @@ enum audit_state {
53}; 53};
54 54
55/* Rule lists */ 55/* Rule lists */
56struct audit_parent; 56struct audit_watch;
57
58struct audit_watch {
59 atomic_t count; /* reference count */
60 char *path; /* insertion path */
61 dev_t dev; /* associated superblock device */
62 unsigned long ino; /* associated inode number */
63 struct audit_parent *parent; /* associated parent */
64 struct list_head wlist; /* entry in parent->watches list */
65 struct list_head rules; /* associated rules */
66};
67
68struct audit_tree; 57struct audit_tree;
69struct audit_chunk; 58struct audit_chunk;
70 59
@@ -108,19 +97,28 @@ struct audit_netlink_list {
108 97
109int audit_send_list(void *); 98int audit_send_list(void *);
110 99
111struct inotify_watch;
112/* Inotify handle */
113extern struct inotify_handle *audit_ih;
114
115extern void audit_free_parent(struct inotify_watch *);
116extern void audit_handle_ievent(struct inotify_watch *, u32, u32, u32,
117 const char *, struct inode *);
118extern int selinux_audit_rule_update(void); 100extern int selinux_audit_rule_update(void);
119 101
120extern struct mutex audit_filter_mutex; 102extern struct mutex audit_filter_mutex;
121extern void audit_free_rule_rcu(struct rcu_head *); 103extern void audit_free_rule_rcu(struct rcu_head *);
122extern struct list_head audit_filter_list[]; 104extern struct list_head audit_filter_list[];
123 105
106/* audit watch functions */
107extern unsigned long audit_watch_inode(struct audit_watch *watch);
108extern dev_t audit_watch_dev(struct audit_watch *watch);
109extern void audit_put_watch(struct audit_watch *watch);
110extern void audit_get_watch(struct audit_watch *watch);
111extern int audit_to_watch(struct audit_krule *krule, char *path, int len, u32 op);
112extern int audit_add_watch(struct audit_krule *krule);
113extern void audit_remove_watch(struct audit_watch *watch);
114extern void audit_remove_watch_rule(struct audit_krule *krule, struct list_head *list);
115extern void audit_inotify_unregister(struct list_head *in_list);
116extern char *audit_watch_path(struct audit_watch *watch);
117extern struct list_head *audit_watch_rules(struct audit_watch *watch);
118
119extern struct audit_entry *audit_dupe_rule(struct audit_krule *old,
120 struct audit_watch *watch);
121
124#ifdef CONFIG_AUDIT_TREE 122#ifdef CONFIG_AUDIT_TREE
125extern struct audit_chunk *audit_tree_lookup(const struct inode *); 123extern struct audit_chunk *audit_tree_lookup(const struct inode *);
126extern void audit_put_chunk(struct audit_chunk *); 124extern void audit_put_chunk(struct audit_chunk *);
@@ -130,10 +128,9 @@ extern int audit_add_tree_rule(struct audit_krule *);
130extern int audit_remove_tree_rule(struct audit_krule *); 128extern int audit_remove_tree_rule(struct audit_krule *);
131extern void audit_trim_trees(void); 129extern void audit_trim_trees(void);
132extern int audit_tag_tree(char *old, char *new); 130extern int audit_tag_tree(char *old, char *new);
133extern void audit_schedule_prune(void);
134extern void audit_prune_trees(void);
135extern const char *audit_tree_path(struct audit_tree *); 131extern const char *audit_tree_path(struct audit_tree *);
136extern void audit_put_tree(struct audit_tree *); 132extern void audit_put_tree(struct audit_tree *);
133extern void audit_kill_trees(struct list_head *);
137#else 134#else
138#define audit_remove_tree_rule(rule) BUG() 135#define audit_remove_tree_rule(rule) BUG()
139#define audit_add_tree_rule(rule) -EINVAL 136#define audit_add_tree_rule(rule) -EINVAL
@@ -142,6 +139,7 @@ extern void audit_put_tree(struct audit_tree *);
142#define audit_put_tree(tree) (void)0 139#define audit_put_tree(tree) (void)0
143#define audit_tag_tree(old, new) -EINVAL 140#define audit_tag_tree(old, new) -EINVAL
144#define audit_tree_path(rule) "" /* never called */ 141#define audit_tree_path(rule) "" /* never called */
142#define audit_kill_trees(list) BUG()
145#endif 143#endif
146 144
147extern char *audit_unpack_string(void **, size_t *, size_t); 145extern char *audit_unpack_string(void **, size_t *, size_t);
@@ -160,7 +158,10 @@ static inline int audit_signal_info(int sig, struct task_struct *t)
160 return 0; 158 return 0;
161} 159}
162extern void audit_filter_inodes(struct task_struct *, struct audit_context *); 160extern void audit_filter_inodes(struct task_struct *, struct audit_context *);
161extern struct list_head *audit_killed_trees(void);
163#else 162#else
164#define audit_signal_info(s,t) AUDIT_DISABLED 163#define audit_signal_info(s,t) AUDIT_DISABLED
165#define audit_filter_inodes(t,c) AUDIT_DISABLED 164#define audit_filter_inodes(t,c) AUDIT_DISABLED
166#endif 165#endif
166
167extern struct mutex audit_cmd_mutex;
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index 1f6396d76687..2451dc6f3282 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -2,6 +2,7 @@
2#include <linux/inotify.h> 2#include <linux/inotify.h>
3#include <linux/namei.h> 3#include <linux/namei.h>
4#include <linux/mount.h> 4#include <linux/mount.h>
5#include <linux/kthread.h>
5 6
6struct audit_tree; 7struct audit_tree;
7struct audit_chunk; 8struct audit_chunk;
@@ -441,13 +442,11 @@ static void kill_rules(struct audit_tree *tree)
441 if (rule->tree) { 442 if (rule->tree) {
442 /* not a half-baked one */ 443 /* not a half-baked one */
443 ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE); 444 ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE);
444 audit_log_format(ab, "op=remove rule dir="); 445 audit_log_format(ab, "op=");
446 audit_log_string(ab, "remove rule");
447 audit_log_format(ab, " dir=");
445 audit_log_untrustedstring(ab, rule->tree->pathname); 448 audit_log_untrustedstring(ab, rule->tree->pathname);
446 if (rule->filterkey) { 449 audit_log_key(ab, rule->filterkey);
447 audit_log_format(ab, " key=");
448 audit_log_untrustedstring(ab, rule->filterkey);
449 } else
450 audit_log_format(ab, " key=(null)");
451 audit_log_format(ab, " list=%d res=1", rule->listnr); 450 audit_log_format(ab, " list=%d res=1", rule->listnr);
452 audit_log_end(ab); 451 audit_log_end(ab);
453 rule->tree = NULL; 452 rule->tree = NULL;
@@ -519,6 +518,8 @@ static void trim_marked(struct audit_tree *tree)
519 } 518 }
520} 519}
521 520
521static void audit_schedule_prune(void);
522
522/* called with audit_filter_mutex */ 523/* called with audit_filter_mutex */
523int audit_remove_tree_rule(struct audit_krule *rule) 524int audit_remove_tree_rule(struct audit_krule *rule)
524{ 525{
@@ -824,10 +825,11 @@ int audit_tag_tree(char *old, char *new)
824 825
825/* 826/*
826 * That gets run when evict_chunk() ends up needing to kill audit_tree. 827 * That gets run when evict_chunk() ends up needing to kill audit_tree.
827 * Runs from a separate thread, with audit_cmd_mutex held. 828 * Runs from a separate thread.
828 */ 829 */
829void audit_prune_trees(void) 830static int prune_tree_thread(void *unused)
830{ 831{
832 mutex_lock(&audit_cmd_mutex);
831 mutex_lock(&audit_filter_mutex); 833 mutex_lock(&audit_filter_mutex);
832 834
833 while (!list_empty(&prune_list)) { 835 while (!list_empty(&prune_list)) {
@@ -844,6 +846,40 @@ void audit_prune_trees(void)
844 } 846 }
845 847
846 mutex_unlock(&audit_filter_mutex); 848 mutex_unlock(&audit_filter_mutex);
849 mutex_unlock(&audit_cmd_mutex);
850 return 0;
851}
852
853static void audit_schedule_prune(void)
854{
855 kthread_run(prune_tree_thread, NULL, "audit_prune_tree");
856}
857
858/*
859 * ... and that one is done if evict_chunk() decides to delay until the end
860 * of syscall. Runs synchronously.
861 */
862void audit_kill_trees(struct list_head *list)
863{
864 mutex_lock(&audit_cmd_mutex);
865 mutex_lock(&audit_filter_mutex);
866
867 while (!list_empty(list)) {
868 struct audit_tree *victim;
869
870 victim = list_entry(list->next, struct audit_tree, list);
871 kill_rules(victim);
872 list_del_init(&victim->list);
873
874 mutex_unlock(&audit_filter_mutex);
875
876 prune_one(victim);
877
878 mutex_lock(&audit_filter_mutex);
879 }
880
881 mutex_unlock(&audit_filter_mutex);
882 mutex_unlock(&audit_cmd_mutex);
847} 883}
848 884
849/* 885/*
@@ -854,6 +890,8 @@ void audit_prune_trees(void)
854static void evict_chunk(struct audit_chunk *chunk) 890static void evict_chunk(struct audit_chunk *chunk)
855{ 891{
856 struct audit_tree *owner; 892 struct audit_tree *owner;
893 struct list_head *postponed = audit_killed_trees();
894 int need_prune = 0;
857 int n; 895 int n;
858 896
859 if (chunk->dead) 897 if (chunk->dead)
@@ -869,15 +907,21 @@ static void evict_chunk(struct audit_chunk *chunk)
869 owner->root = NULL; 907 owner->root = NULL;
870 list_del_init(&owner->same_root); 908 list_del_init(&owner->same_root);
871 spin_unlock(&hash_lock); 909 spin_unlock(&hash_lock);
872 kill_rules(owner); 910 if (!postponed) {
873 list_move(&owner->list, &prune_list); 911 kill_rules(owner);
874 audit_schedule_prune(); 912 list_move(&owner->list, &prune_list);
913 need_prune = 1;
914 } else {
915 list_move(&owner->list, postponed);
916 }
875 spin_lock(&hash_lock); 917 spin_lock(&hash_lock);
876 } 918 }
877 list_del_rcu(&chunk->hash); 919 list_del_rcu(&chunk->hash);
878 for (n = 0; n < chunk->count; n++) 920 for (n = 0; n < chunk->count; n++)
879 list_del_init(&chunk->owners[n].list); 921 list_del_init(&chunk->owners[n].list);
880 spin_unlock(&hash_lock); 922 spin_unlock(&hash_lock);
923 if (need_prune)
924 audit_schedule_prune();
881 mutex_unlock(&audit_filter_mutex); 925 mutex_unlock(&audit_filter_mutex);
882} 926}
883 927
diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c
new file mode 100644
index 000000000000..0e96dbc60ea9
--- /dev/null
+++ b/kernel/audit_watch.c
@@ -0,0 +1,543 @@
1/* audit_watch.c -- watching inodes
2 *
3 * Copyright 2003-2009 Red Hat, Inc.
4 * Copyright 2005 Hewlett-Packard Development Company, L.P.
5 * Copyright 2005 IBM Corporation
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 */
21
22#include <linux/kernel.h>
23#include <linux/audit.h>
24#include <linux/kthread.h>
25#include <linux/mutex.h>
26#include <linux/fs.h>
27#include <linux/namei.h>
28#include <linux/netlink.h>
29#include <linux/sched.h>
30#include <linux/inotify.h>
31#include <linux/security.h>
32#include "audit.h"
33
34/*
35 * Reference counting:
36 *
37 * audit_parent: lifetime is from audit_init_parent() to receipt of an IN_IGNORED
38 * event. Each audit_watch holds a reference to its associated parent.
39 *
40 * audit_watch: if added to lists, lifetime is from audit_init_watch() to
41 * audit_remove_watch(). Additionally, an audit_watch may exist
42 * temporarily to assist in searching existing filter data. Each
43 * audit_krule holds a reference to its associated watch.
44 */
45
46struct audit_watch {
47 atomic_t count; /* reference count */
48 char *path; /* insertion path */
49 dev_t dev; /* associated superblock device */
50 unsigned long ino; /* associated inode number */
51 struct audit_parent *parent; /* associated parent */
52 struct list_head wlist; /* entry in parent->watches list */
53 struct list_head rules; /* associated rules */
54};
55
56struct audit_parent {
57 struct list_head ilist; /* entry in inotify registration list */
58 struct list_head watches; /* associated watches */
59 struct inotify_watch wdata; /* inotify watch data */
60 unsigned flags; /* status flags */
61};
62
63/* Inotify handle. */
64struct inotify_handle *audit_ih;
65
66/*
67 * audit_parent status flags:
68 *
69 * AUDIT_PARENT_INVALID - set anytime rules/watches are auto-removed due to
70 * a filesystem event to ensure we're adding audit watches to a valid parent.
71 * Technically not needed for IN_DELETE_SELF or IN_UNMOUNT events, as we cannot
72 * receive them while we have nameidata, but must be used for IN_MOVE_SELF which
73 * we can receive while holding nameidata.
74 */
75#define AUDIT_PARENT_INVALID 0x001
76
77/* Inotify events we care about. */
78#define AUDIT_IN_WATCH IN_MOVE|IN_CREATE|IN_DELETE|IN_DELETE_SELF|IN_MOVE_SELF
79
80static void audit_free_parent(struct inotify_watch *i_watch)
81{
82 struct audit_parent *parent;
83
84 parent = container_of(i_watch, struct audit_parent, wdata);
85 WARN_ON(!list_empty(&parent->watches));
86 kfree(parent);
87}
88
89void audit_get_watch(struct audit_watch *watch)
90{
91 atomic_inc(&watch->count);
92}
93
94void audit_put_watch(struct audit_watch *watch)
95{
96 if (atomic_dec_and_test(&watch->count)) {
97 WARN_ON(watch->parent);
98 WARN_ON(!list_empty(&watch->rules));
99 kfree(watch->path);
100 kfree(watch);
101 }
102}
103
104void audit_remove_watch(struct audit_watch *watch)
105{
106 list_del(&watch->wlist);
107 put_inotify_watch(&watch->parent->wdata);
108 watch->parent = NULL;
109 audit_put_watch(watch); /* match initial get */
110}
111
112char *audit_watch_path(struct audit_watch *watch)
113{
114 return watch->path;
115}
116
117struct list_head *audit_watch_rules(struct audit_watch *watch)
118{
119 return &watch->rules;
120}
121
122unsigned long audit_watch_inode(struct audit_watch *watch)
123{
124 return watch->ino;
125}
126
127dev_t audit_watch_dev(struct audit_watch *watch)
128{
129 return watch->dev;
130}
131
132/* Initialize a parent watch entry. */
133static struct audit_parent *audit_init_parent(struct nameidata *ndp)
134{
135 struct audit_parent *parent;
136 s32 wd;
137
138 parent = kzalloc(sizeof(*parent), GFP_KERNEL);
139 if (unlikely(!parent))
140 return ERR_PTR(-ENOMEM);
141
142 INIT_LIST_HEAD(&parent->watches);
143 parent->flags = 0;
144
145 inotify_init_watch(&parent->wdata);
146 /* grab a ref so inotify watch hangs around until we take audit_filter_mutex */
147 get_inotify_watch(&parent->wdata);
148 wd = inotify_add_watch(audit_ih, &parent->wdata,
149 ndp->path.dentry->d_inode, AUDIT_IN_WATCH);
150 if (wd < 0) {
151 audit_free_parent(&parent->wdata);
152 return ERR_PTR(wd);
153 }
154
155 return parent;
156}
157
158/* Initialize a watch entry. */
159static struct audit_watch *audit_init_watch(char *path)
160{
161 struct audit_watch *watch;
162
163 watch = kzalloc(sizeof(*watch), GFP_KERNEL);
164 if (unlikely(!watch))
165 return ERR_PTR(-ENOMEM);
166
167 INIT_LIST_HEAD(&watch->rules);
168 atomic_set(&watch->count, 1);
169 watch->path = path;
170 watch->dev = (dev_t)-1;
171 watch->ino = (unsigned long)-1;
172
173 return watch;
174}
175
176/* Translate a watch string to kernel respresentation. */
177int audit_to_watch(struct audit_krule *krule, char *path, int len, u32 op)
178{
179 struct audit_watch *watch;
180
181 if (!audit_ih)
182 return -EOPNOTSUPP;
183
184 if (path[0] != '/' || path[len-1] == '/' ||
185 krule->listnr != AUDIT_FILTER_EXIT ||
186 op != Audit_equal ||
187 krule->inode_f || krule->watch || krule->tree)
188 return -EINVAL;
189
190 watch = audit_init_watch(path);
191 if (IS_ERR(watch))
192 return PTR_ERR(watch);
193
194 audit_get_watch(watch);
195 krule->watch = watch;
196
197 return 0;
198}
199
200/* Duplicate the given audit watch. The new watch's rules list is initialized
201 * to an empty list and wlist is undefined. */
202static struct audit_watch *audit_dupe_watch(struct audit_watch *old)
203{
204 char *path;
205 struct audit_watch *new;
206
207 path = kstrdup(old->path, GFP_KERNEL);
208 if (unlikely(!path))
209 return ERR_PTR(-ENOMEM);
210
211 new = audit_init_watch(path);
212 if (IS_ERR(new)) {
213 kfree(path);
214 goto out;
215 }
216
217 new->dev = old->dev;
218 new->ino = old->ino;
219 get_inotify_watch(&old->parent->wdata);
220 new->parent = old->parent;
221
222out:
223 return new;
224}
225
226static void audit_watch_log_rule_change(struct audit_krule *r, struct audit_watch *w, char *op)
227{
228 if (audit_enabled) {
229 struct audit_buffer *ab;
230 ab = audit_log_start(NULL, GFP_NOFS, AUDIT_CONFIG_CHANGE);
231 audit_log_format(ab, "auid=%u ses=%u op=",
232 audit_get_loginuid(current),
233 audit_get_sessionid(current));
234 audit_log_string(ab, op);
235 audit_log_format(ab, " path=");
236 audit_log_untrustedstring(ab, w->path);
237 audit_log_key(ab, r->filterkey);
238 audit_log_format(ab, " list=%d res=1", r->listnr);
239 audit_log_end(ab);
240 }
241}
242
243/* Update inode info in audit rules based on filesystem event. */
244static void audit_update_watch(struct audit_parent *parent,
245 const char *dname, dev_t dev,
246 unsigned long ino, unsigned invalidating)
247{
248 struct audit_watch *owatch, *nwatch, *nextw;
249 struct audit_krule *r, *nextr;
250 struct audit_entry *oentry, *nentry;
251
252 mutex_lock(&audit_filter_mutex);
253 list_for_each_entry_safe(owatch, nextw, &parent->watches, wlist) {
254 if (audit_compare_dname_path(dname, owatch->path, NULL))
255 continue;
256
257 /* If the update involves invalidating rules, do the inode-based
258 * filtering now, so we don't omit records. */
259 if (invalidating && current->audit_context)
260 audit_filter_inodes(current, current->audit_context);
261
262 nwatch = audit_dupe_watch(owatch);
263 if (IS_ERR(nwatch)) {
264 mutex_unlock(&audit_filter_mutex);
265 audit_panic("error updating watch, skipping");
266 return;
267 }
268 nwatch->dev = dev;
269 nwatch->ino = ino;
270
271 list_for_each_entry_safe(r, nextr, &owatch->rules, rlist) {
272
273 oentry = container_of(r, struct audit_entry, rule);
274 list_del(&oentry->rule.rlist);
275 list_del_rcu(&oentry->list);
276
277 nentry = audit_dupe_rule(&oentry->rule, nwatch);
278 if (IS_ERR(nentry)) {
279 list_del(&oentry->rule.list);
280 audit_panic("error updating watch, removing");
281 } else {
282 int h = audit_hash_ino((u32)ino);
283 list_add(&nentry->rule.rlist, &nwatch->rules);
284 list_add_rcu(&nentry->list, &audit_inode_hash[h]);
285 list_replace(&oentry->rule.list,
286 &nentry->rule.list);
287 }
288
289 audit_watch_log_rule_change(r, owatch, "updated rules");
290
291 call_rcu(&oentry->rcu, audit_free_rule_rcu);
292 }
293
294 audit_remove_watch(owatch);
295 goto add_watch_to_parent; /* event applies to a single watch */
296 }
297 mutex_unlock(&audit_filter_mutex);
298 return;
299
300add_watch_to_parent:
301 list_add(&nwatch->wlist, &parent->watches);
302 mutex_unlock(&audit_filter_mutex);
303 return;
304}
305
306/* Remove all watches & rules associated with a parent that is going away. */
307static void audit_remove_parent_watches(struct audit_parent *parent)
308{
309 struct audit_watch *w, *nextw;
310 struct audit_krule *r, *nextr;
311 struct audit_entry *e;
312
313 mutex_lock(&audit_filter_mutex);
314 parent->flags |= AUDIT_PARENT_INVALID;
315 list_for_each_entry_safe(w, nextw, &parent->watches, wlist) {
316 list_for_each_entry_safe(r, nextr, &w->rules, rlist) {
317 e = container_of(r, struct audit_entry, rule);
318 audit_watch_log_rule_change(r, w, "remove rule");
319 list_del(&r->rlist);
320 list_del(&r->list);
321 list_del_rcu(&e->list);
322 call_rcu(&e->rcu, audit_free_rule_rcu);
323 }
324 audit_remove_watch(w);
325 }
326 mutex_unlock(&audit_filter_mutex);
327}
328
329/* Unregister inotify watches for parents on in_list.
330 * Generates an IN_IGNORED event. */
331void audit_inotify_unregister(struct list_head *in_list)
332{
333 struct audit_parent *p, *n;
334
335 list_for_each_entry_safe(p, n, in_list, ilist) {
336 list_del(&p->ilist);
337 inotify_rm_watch(audit_ih, &p->wdata);
338 /* the unpin matching the pin in audit_do_del_rule() */
339 unpin_inotify_watch(&p->wdata);
340 }
341}
342
343/* Get path information necessary for adding watches. */
344static int audit_get_nd(char *path, struct nameidata **ndp, struct nameidata **ndw)
345{
346 struct nameidata *ndparent, *ndwatch;
347 int err;
348
349 ndparent = kmalloc(sizeof(*ndparent), GFP_KERNEL);
350 if (unlikely(!ndparent))
351 return -ENOMEM;
352
353 ndwatch = kmalloc(sizeof(*ndwatch), GFP_KERNEL);
354 if (unlikely(!ndwatch)) {
355 kfree(ndparent);
356 return -ENOMEM;
357 }
358
359 err = path_lookup(path, LOOKUP_PARENT, ndparent);
360 if (err) {
361 kfree(ndparent);
362 kfree(ndwatch);
363 return err;
364 }
365
366 err = path_lookup(path, 0, ndwatch);
367 if (err) {
368 kfree(ndwatch);
369 ndwatch = NULL;
370 }
371
372 *ndp = ndparent;
373 *ndw = ndwatch;
374
375 return 0;
376}
377
378/* Release resources used for watch path information. */
379static void audit_put_nd(struct nameidata *ndp, struct nameidata *ndw)
380{
381 if (ndp) {
382 path_put(&ndp->path);
383 kfree(ndp);
384 }
385 if (ndw) {
386 path_put(&ndw->path);
387 kfree(ndw);
388 }
389}
390
391/* Associate the given rule with an existing parent inotify_watch.
392 * Caller must hold audit_filter_mutex. */
393static void audit_add_to_parent(struct audit_krule *krule,
394 struct audit_parent *parent)
395{
396 struct audit_watch *w, *watch = krule->watch;
397 int watch_found = 0;
398
399 list_for_each_entry(w, &parent->watches, wlist) {
400 if (strcmp(watch->path, w->path))
401 continue;
402
403 watch_found = 1;
404
405 /* put krule's and initial refs to temporary watch */
406 audit_put_watch(watch);
407 audit_put_watch(watch);
408
409 audit_get_watch(w);
410 krule->watch = watch = w;
411 break;
412 }
413
414 if (!watch_found) {
415 get_inotify_watch(&parent->wdata);
416 watch->parent = parent;
417
418 list_add(&watch->wlist, &parent->watches);
419 }
420 list_add(&krule->rlist, &watch->rules);
421}
422
423/* Find a matching watch entry, or add this one.
424 * Caller must hold audit_filter_mutex. */
425int audit_add_watch(struct audit_krule *krule)
426{
427 struct audit_watch *watch = krule->watch;
428 struct inotify_watch *i_watch;
429 struct audit_parent *parent;
430 struct nameidata *ndp = NULL, *ndw = NULL;
431 int ret = 0;
432
433 mutex_unlock(&audit_filter_mutex);
434
435 /* Avoid calling path_lookup under audit_filter_mutex. */
436 ret = audit_get_nd(watch->path, &ndp, &ndw);
437 if (ret) {
438 /* caller expects mutex locked */
439 mutex_lock(&audit_filter_mutex);
440 goto error;
441 }
442
443 /* update watch filter fields */
444 if (ndw) {
445 watch->dev = ndw->path.dentry->d_inode->i_sb->s_dev;
446 watch->ino = ndw->path.dentry->d_inode->i_ino;
447 }
448
449 /* The audit_filter_mutex must not be held during inotify calls because
450 * we hold it during inotify event callback processing. If an existing
451 * inotify watch is found, inotify_find_watch() grabs a reference before
452 * returning.
453 */
454 if (inotify_find_watch(audit_ih, ndp->path.dentry->d_inode,
455 &i_watch) < 0) {
456 parent = audit_init_parent(ndp);
457 if (IS_ERR(parent)) {
458 /* caller expects mutex locked */
459 mutex_lock(&audit_filter_mutex);
460 ret = PTR_ERR(parent);
461 goto error;
462 }
463 } else
464 parent = container_of(i_watch, struct audit_parent, wdata);
465
466 mutex_lock(&audit_filter_mutex);
467
468 /* parent was moved before we took audit_filter_mutex */
469 if (parent->flags & AUDIT_PARENT_INVALID)
470 ret = -ENOENT;
471 else
472 audit_add_to_parent(krule, parent);
473
474 /* match get in audit_init_parent or inotify_find_watch */
475 put_inotify_watch(&parent->wdata);
476
477error:
478 audit_put_nd(ndp, ndw); /* NULL args OK */
479 return ret;
480
481}
482
483void audit_remove_watch_rule(struct audit_krule *krule, struct list_head *list)
484{
485 struct audit_watch *watch = krule->watch;
486 struct audit_parent *parent = watch->parent;
487
488 list_del(&krule->rlist);
489
490 if (list_empty(&watch->rules)) {
491 audit_remove_watch(watch);
492
493 if (list_empty(&parent->watches)) {
494 /* Put parent on the inotify un-registration
495 * list. Grab a reference before releasing
496 * audit_filter_mutex, to be released in
497 * audit_inotify_unregister().
498 * If filesystem is going away, just leave
499 * the sucker alone, eviction will take
500 * care of it. */
501 if (pin_inotify_watch(&parent->wdata))
502 list_add(&parent->ilist, list);
503 }
504 }
505}
506
507/* Update watch data in audit rules based on inotify events. */
508static void audit_handle_ievent(struct inotify_watch *i_watch, u32 wd, u32 mask,
509 u32 cookie, const char *dname, struct inode *inode)
510{
511 struct audit_parent *parent;
512
513 parent = container_of(i_watch, struct audit_parent, wdata);
514
515 if (mask & (IN_CREATE|IN_MOVED_TO) && inode)
516 audit_update_watch(parent, dname, inode->i_sb->s_dev,
517 inode->i_ino, 0);
518 else if (mask & (IN_DELETE|IN_MOVED_FROM))
519 audit_update_watch(parent, dname, (dev_t)-1, (unsigned long)-1, 1);
520 /* inotify automatically removes the watch and sends IN_IGNORED */
521 else if (mask & (IN_DELETE_SELF|IN_UNMOUNT))
522 audit_remove_parent_watches(parent);
523 /* inotify does not remove the watch, so remove it manually */
524 else if(mask & IN_MOVE_SELF) {
525 audit_remove_parent_watches(parent);
526 inotify_remove_watch_locked(audit_ih, i_watch);
527 } else if (mask & IN_IGNORED)
528 put_inotify_watch(i_watch);
529}
530
531static const struct inotify_operations audit_inotify_ops = {
532 .handle_event = audit_handle_ievent,
533 .destroy_watch = audit_free_parent,
534};
535
536static int __init audit_watch_init(void)
537{
538 audit_ih = inotify_init(&audit_inotify_ops);
539 if (IS_ERR(audit_ih))
540 audit_panic("cannot initialize inotify handle");
541 return 0;
542}
543subsys_initcall(audit_watch_init);
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index 713098ee5a02..a70604047f3c 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -27,7 +27,6 @@
27#include <linux/namei.h> 27#include <linux/namei.h>
28#include <linux/netlink.h> 28#include <linux/netlink.h>
29#include <linux/sched.h> 29#include <linux/sched.h>
30#include <linux/inotify.h>
31#include <linux/security.h> 30#include <linux/security.h>
32#include "audit.h" 31#include "audit.h"
33 32
@@ -44,36 +43,6 @@
44 * be written directly provided audit_filter_mutex is held. 43 * be written directly provided audit_filter_mutex is held.
45 */ 44 */
46 45
47/*
48 * Reference counting:
49 *
50 * audit_parent: lifetime is from audit_init_parent() to receipt of an IN_IGNORED
51 * event. Each audit_watch holds a reference to its associated parent.
52 *
53 * audit_watch: if added to lists, lifetime is from audit_init_watch() to
54 * audit_remove_watch(). Additionally, an audit_watch may exist
55 * temporarily to assist in searching existing filter data. Each
56 * audit_krule holds a reference to its associated watch.
57 */
58
59struct audit_parent {
60 struct list_head ilist; /* entry in inotify registration list */
61 struct list_head watches; /* associated watches */
62 struct inotify_watch wdata; /* inotify watch data */
63 unsigned flags; /* status flags */
64};
65
66/*
67 * audit_parent status flags:
68 *
69 * AUDIT_PARENT_INVALID - set anytime rules/watches are auto-removed due to
70 * a filesystem event to ensure we're adding audit watches to a valid parent.
71 * Technically not needed for IN_DELETE_SELF or IN_UNMOUNT events, as we cannot
72 * receive them while we have nameidata, but must be used for IN_MOVE_SELF which
73 * we can receive while holding nameidata.
74 */
75#define AUDIT_PARENT_INVALID 0x001
76
77/* Audit filter lists, defined in <linux/audit.h> */ 46/* Audit filter lists, defined in <linux/audit.h> */
78struct list_head audit_filter_list[AUDIT_NR_FILTERS] = { 47struct list_head audit_filter_list[AUDIT_NR_FILTERS] = {
79 LIST_HEAD_INIT(audit_filter_list[0]), 48 LIST_HEAD_INIT(audit_filter_list[0]),
@@ -97,41 +66,6 @@ static struct list_head audit_rules_list[AUDIT_NR_FILTERS] = {
97 66
98DEFINE_MUTEX(audit_filter_mutex); 67DEFINE_MUTEX(audit_filter_mutex);
99 68
100/* Inotify events we care about. */
101#define AUDIT_IN_WATCH IN_MOVE|IN_CREATE|IN_DELETE|IN_DELETE_SELF|IN_MOVE_SELF
102
103void audit_free_parent(struct inotify_watch *i_watch)
104{
105 struct audit_parent *parent;
106
107 parent = container_of(i_watch, struct audit_parent, wdata);
108 WARN_ON(!list_empty(&parent->watches));
109 kfree(parent);
110}
111
112static inline void audit_get_watch(struct audit_watch *watch)
113{
114 atomic_inc(&watch->count);
115}
116
117static void audit_put_watch(struct audit_watch *watch)
118{
119 if (atomic_dec_and_test(&watch->count)) {
120 WARN_ON(watch->parent);
121 WARN_ON(!list_empty(&watch->rules));
122 kfree(watch->path);
123 kfree(watch);
124 }
125}
126
127static void audit_remove_watch(struct audit_watch *watch)
128{
129 list_del(&watch->wlist);
130 put_inotify_watch(&watch->parent->wdata);
131 watch->parent = NULL;
132 audit_put_watch(watch); /* match initial get */
133}
134
135static inline void audit_free_rule(struct audit_entry *e) 69static inline void audit_free_rule(struct audit_entry *e)
136{ 70{
137 int i; 71 int i;
@@ -156,50 +90,6 @@ void audit_free_rule_rcu(struct rcu_head *head)
156 audit_free_rule(e); 90 audit_free_rule(e);
157} 91}
158 92
159/* Initialize a parent watch entry. */
160static struct audit_parent *audit_init_parent(struct nameidata *ndp)
161{
162 struct audit_parent *parent;
163 s32 wd;
164
165 parent = kzalloc(sizeof(*parent), GFP_KERNEL);
166 if (unlikely(!parent))
167 return ERR_PTR(-ENOMEM);
168
169 INIT_LIST_HEAD(&parent->watches);
170 parent->flags = 0;
171
172 inotify_init_watch(&parent->wdata);
173 /* grab a ref so inotify watch hangs around until we take audit_filter_mutex */
174 get_inotify_watch(&parent->wdata);
175 wd = inotify_add_watch(audit_ih, &parent->wdata,
176 ndp->path.dentry->d_inode, AUDIT_IN_WATCH);
177 if (wd < 0) {
178 audit_free_parent(&parent->wdata);
179 return ERR_PTR(wd);
180 }
181
182 return parent;
183}
184
185/* Initialize a watch entry. */
186static struct audit_watch *audit_init_watch(char *path)
187{
188 struct audit_watch *watch;
189
190 watch = kzalloc(sizeof(*watch), GFP_KERNEL);
191 if (unlikely(!watch))
192 return ERR_PTR(-ENOMEM);
193
194 INIT_LIST_HEAD(&watch->rules);
195 atomic_set(&watch->count, 1);
196 watch->path = path;
197 watch->dev = (dev_t)-1;
198 watch->ino = (unsigned long)-1;
199
200 return watch;
201}
202
203/* Initialize an audit filterlist entry. */ 93/* Initialize an audit filterlist entry. */
204static inline struct audit_entry *audit_init_entry(u32 field_count) 94static inline struct audit_entry *audit_init_entry(u32 field_count)
205{ 95{
@@ -260,31 +150,6 @@ static inline int audit_to_inode(struct audit_krule *krule,
260 return 0; 150 return 0;
261} 151}
262 152
263/* Translate a watch string to kernel respresentation. */
264static int audit_to_watch(struct audit_krule *krule, char *path, int len,
265 u32 op)
266{
267 struct audit_watch *watch;
268
269 if (!audit_ih)
270 return -EOPNOTSUPP;
271
272 if (path[0] != '/' || path[len-1] == '/' ||
273 krule->listnr != AUDIT_FILTER_EXIT ||
274 op != Audit_equal ||
275 krule->inode_f || krule->watch || krule->tree)
276 return -EINVAL;
277
278 watch = audit_init_watch(path);
279 if (IS_ERR(watch))
280 return PTR_ERR(watch);
281
282 audit_get_watch(watch);
283 krule->watch = watch;
284
285 return 0;
286}
287
288static __u32 *classes[AUDIT_SYSCALL_CLASSES]; 153static __u32 *classes[AUDIT_SYSCALL_CLASSES];
289 154
290int __init audit_register_class(int class, unsigned *list) 155int __init audit_register_class(int class, unsigned *list)
@@ -766,7 +631,8 @@ static struct audit_rule_data *audit_krule_to_data(struct audit_krule *krule)
766 break; 631 break;
767 case AUDIT_WATCH: 632 case AUDIT_WATCH:
768 data->buflen += data->values[i] = 633 data->buflen += data->values[i] =
769 audit_pack_string(&bufp, krule->watch->path); 634 audit_pack_string(&bufp,
635 audit_watch_path(krule->watch));
770 break; 636 break;
771 case AUDIT_DIR: 637 case AUDIT_DIR:
772 data->buflen += data->values[i] = 638 data->buflen += data->values[i] =
@@ -818,7 +684,8 @@ static int audit_compare_rule(struct audit_krule *a, struct audit_krule *b)
818 return 1; 684 return 1;
819 break; 685 break;
820 case AUDIT_WATCH: 686 case AUDIT_WATCH:
821 if (strcmp(a->watch->path, b->watch->path)) 687 if (strcmp(audit_watch_path(a->watch),
688 audit_watch_path(b->watch)))
822 return 1; 689 return 1;
823 break; 690 break;
824 case AUDIT_DIR: 691 case AUDIT_DIR:
@@ -844,32 +711,6 @@ static int audit_compare_rule(struct audit_krule *a, struct audit_krule *b)
844 return 0; 711 return 0;
845} 712}
846 713
847/* Duplicate the given audit watch. The new watch's rules list is initialized
848 * to an empty list and wlist is undefined. */
849static struct audit_watch *audit_dupe_watch(struct audit_watch *old)
850{
851 char *path;
852 struct audit_watch *new;
853
854 path = kstrdup(old->path, GFP_KERNEL);
855 if (unlikely(!path))
856 return ERR_PTR(-ENOMEM);
857
858 new = audit_init_watch(path);
859 if (IS_ERR(new)) {
860 kfree(path);
861 goto out;
862 }
863
864 new->dev = old->dev;
865 new->ino = old->ino;
866 get_inotify_watch(&old->parent->wdata);
867 new->parent = old->parent;
868
869out:
870 return new;
871}
872
873/* Duplicate LSM field information. The lsm_rule is opaque, so must be 714/* Duplicate LSM field information. The lsm_rule is opaque, so must be
874 * re-initialized. */ 715 * re-initialized. */
875static inline int audit_dupe_lsm_field(struct audit_field *df, 716static inline int audit_dupe_lsm_field(struct audit_field *df,
@@ -904,8 +745,8 @@ static inline int audit_dupe_lsm_field(struct audit_field *df,
904 * rule with the new rule in the filterlist, then free the old rule. 745 * rule with the new rule in the filterlist, then free the old rule.
905 * The rlist element is undefined; list manipulations are handled apart from 746 * The rlist element is undefined; list manipulations are handled apart from
906 * the initial copy. */ 747 * the initial copy. */
907static struct audit_entry *audit_dupe_rule(struct audit_krule *old, 748struct audit_entry *audit_dupe_rule(struct audit_krule *old,
908 struct audit_watch *watch) 749 struct audit_watch *watch)
909{ 750{
910 u32 fcount = old->field_count; 751 u32 fcount = old->field_count;
911 struct audit_entry *entry; 752 struct audit_entry *entry;
@@ -977,137 +818,6 @@ static struct audit_entry *audit_dupe_rule(struct audit_krule *old,
977 return entry; 818 return entry;
978} 819}
979 820
980/* Update inode info in audit rules based on filesystem event. */
981static void audit_update_watch(struct audit_parent *parent,
982 const char *dname, dev_t dev,
983 unsigned long ino, unsigned invalidating)
984{
985 struct audit_watch *owatch, *nwatch, *nextw;
986 struct audit_krule *r, *nextr;
987 struct audit_entry *oentry, *nentry;
988
989 mutex_lock(&audit_filter_mutex);
990 list_for_each_entry_safe(owatch, nextw, &parent->watches, wlist) {
991 if (audit_compare_dname_path(dname, owatch->path, NULL))
992 continue;
993
994 /* If the update involves invalidating rules, do the inode-based
995 * filtering now, so we don't omit records. */
996 if (invalidating && current->audit_context)
997 audit_filter_inodes(current, current->audit_context);
998
999 nwatch = audit_dupe_watch(owatch);
1000 if (IS_ERR(nwatch)) {
1001 mutex_unlock(&audit_filter_mutex);
1002 audit_panic("error updating watch, skipping");
1003 return;
1004 }
1005 nwatch->dev = dev;
1006 nwatch->ino = ino;
1007
1008 list_for_each_entry_safe(r, nextr, &owatch->rules, rlist) {
1009
1010 oentry = container_of(r, struct audit_entry, rule);
1011 list_del(&oentry->rule.rlist);
1012 list_del_rcu(&oentry->list);
1013
1014 nentry = audit_dupe_rule(&oentry->rule, nwatch);
1015 if (IS_ERR(nentry)) {
1016 list_del(&oentry->rule.list);
1017 audit_panic("error updating watch, removing");
1018 } else {
1019 int h = audit_hash_ino((u32)ino);
1020 list_add(&nentry->rule.rlist, &nwatch->rules);
1021 list_add_rcu(&nentry->list, &audit_inode_hash[h]);
1022 list_replace(&oentry->rule.list,
1023 &nentry->rule.list);
1024 }
1025
1026 call_rcu(&oentry->rcu, audit_free_rule_rcu);
1027 }
1028
1029 if (audit_enabled) {
1030 struct audit_buffer *ab;
1031 ab = audit_log_start(NULL, GFP_NOFS,
1032 AUDIT_CONFIG_CHANGE);
1033 audit_log_format(ab, "auid=%u ses=%u",
1034 audit_get_loginuid(current),
1035 audit_get_sessionid(current));
1036 audit_log_format(ab,
1037 " op=updated rules specifying path=");
1038 audit_log_untrustedstring(ab, owatch->path);
1039 audit_log_format(ab, " with dev=%u ino=%lu\n",
1040 dev, ino);
1041 audit_log_format(ab, " list=%d res=1", r->listnr);
1042 audit_log_end(ab);
1043 }
1044 audit_remove_watch(owatch);
1045 goto add_watch_to_parent; /* event applies to a single watch */
1046 }
1047 mutex_unlock(&audit_filter_mutex);
1048 return;
1049
1050add_watch_to_parent:
1051 list_add(&nwatch->wlist, &parent->watches);
1052 mutex_unlock(&audit_filter_mutex);
1053 return;
1054}
1055
1056/* Remove all watches & rules associated with a parent that is going away. */
1057static void audit_remove_parent_watches(struct audit_parent *parent)
1058{
1059 struct audit_watch *w, *nextw;
1060 struct audit_krule *r, *nextr;
1061 struct audit_entry *e;
1062
1063 mutex_lock(&audit_filter_mutex);
1064 parent->flags |= AUDIT_PARENT_INVALID;
1065 list_for_each_entry_safe(w, nextw, &parent->watches, wlist) {
1066 list_for_each_entry_safe(r, nextr, &w->rules, rlist) {
1067 e = container_of(r, struct audit_entry, rule);
1068 if (audit_enabled) {
1069 struct audit_buffer *ab;
1070 ab = audit_log_start(NULL, GFP_NOFS,
1071 AUDIT_CONFIG_CHANGE);
1072 audit_log_format(ab, "auid=%u ses=%u",
1073 audit_get_loginuid(current),
1074 audit_get_sessionid(current));
1075 audit_log_format(ab, " op=remove rule path=");
1076 audit_log_untrustedstring(ab, w->path);
1077 if (r->filterkey) {
1078 audit_log_format(ab, " key=");
1079 audit_log_untrustedstring(ab,
1080 r->filterkey);
1081 } else
1082 audit_log_format(ab, " key=(null)");
1083 audit_log_format(ab, " list=%d res=1",
1084 r->listnr);
1085 audit_log_end(ab);
1086 }
1087 list_del(&r->rlist);
1088 list_del(&r->list);
1089 list_del_rcu(&e->list);
1090 call_rcu(&e->rcu, audit_free_rule_rcu);
1091 }
1092 audit_remove_watch(w);
1093 }
1094 mutex_unlock(&audit_filter_mutex);
1095}
1096
1097/* Unregister inotify watches for parents on in_list.
1098 * Generates an IN_IGNORED event. */
1099static void audit_inotify_unregister(struct list_head *in_list)
1100{
1101 struct audit_parent *p, *n;
1102
1103 list_for_each_entry_safe(p, n, in_list, ilist) {
1104 list_del(&p->ilist);
1105 inotify_rm_watch(audit_ih, &p->wdata);
1106 /* the unpin matching the pin in audit_do_del_rule() */
1107 unpin_inotify_watch(&p->wdata);
1108 }
1109}
1110
1111/* Find an existing audit rule. 821/* Find an existing audit rule.
1112 * Caller must hold audit_filter_mutex to prevent stale rule data. */ 822 * Caller must hold audit_filter_mutex to prevent stale rule data. */
1113static struct audit_entry *audit_find_rule(struct audit_entry *entry, 823static struct audit_entry *audit_find_rule(struct audit_entry *entry,
@@ -1145,134 +855,6 @@ out:
1145 return found; 855 return found;
1146} 856}
1147 857
1148/* Get path information necessary for adding watches. */
1149static int audit_get_nd(char *path, struct nameidata **ndp,
1150 struct nameidata **ndw)
1151{
1152 struct nameidata *ndparent, *ndwatch;
1153 int err;
1154
1155 ndparent = kmalloc(sizeof(*ndparent), GFP_KERNEL);
1156 if (unlikely(!ndparent))
1157 return -ENOMEM;
1158
1159 ndwatch = kmalloc(sizeof(*ndwatch), GFP_KERNEL);
1160 if (unlikely(!ndwatch)) {
1161 kfree(ndparent);
1162 return -ENOMEM;
1163 }
1164
1165 err = path_lookup(path, LOOKUP_PARENT, ndparent);
1166 if (err) {
1167 kfree(ndparent);
1168 kfree(ndwatch);
1169 return err;
1170 }
1171
1172 err = path_lookup(path, 0, ndwatch);
1173 if (err) {
1174 kfree(ndwatch);
1175 ndwatch = NULL;
1176 }
1177
1178 *ndp = ndparent;
1179 *ndw = ndwatch;
1180
1181 return 0;
1182}
1183
1184/* Release resources used for watch path information. */
1185static void audit_put_nd(struct nameidata *ndp, struct nameidata *ndw)
1186{
1187 if (ndp) {
1188 path_put(&ndp->path);
1189 kfree(ndp);
1190 }
1191 if (ndw) {
1192 path_put(&ndw->path);
1193 kfree(ndw);
1194 }
1195}
1196
1197/* Associate the given rule with an existing parent inotify_watch.
1198 * Caller must hold audit_filter_mutex. */
1199static void audit_add_to_parent(struct audit_krule *krule,
1200 struct audit_parent *parent)
1201{
1202 struct audit_watch *w, *watch = krule->watch;
1203 int watch_found = 0;
1204
1205 list_for_each_entry(w, &parent->watches, wlist) {
1206 if (strcmp(watch->path, w->path))
1207 continue;
1208
1209 watch_found = 1;
1210
1211 /* put krule's and initial refs to temporary watch */
1212 audit_put_watch(watch);
1213 audit_put_watch(watch);
1214
1215 audit_get_watch(w);
1216 krule->watch = watch = w;
1217 break;
1218 }
1219
1220 if (!watch_found) {
1221 get_inotify_watch(&parent->wdata);
1222 watch->parent = parent;
1223
1224 list_add(&watch->wlist, &parent->watches);
1225 }
1226 list_add(&krule->rlist, &watch->rules);
1227}
1228
1229/* Find a matching watch entry, or add this one.
1230 * Caller must hold audit_filter_mutex. */
1231static int audit_add_watch(struct audit_krule *krule, struct nameidata *ndp,
1232 struct nameidata *ndw)
1233{
1234 struct audit_watch *watch = krule->watch;
1235 struct inotify_watch *i_watch;
1236 struct audit_parent *parent;
1237 int ret = 0;
1238
1239 /* update watch filter fields */
1240 if (ndw) {
1241 watch->dev = ndw->path.dentry->d_inode->i_sb->s_dev;
1242 watch->ino = ndw->path.dentry->d_inode->i_ino;
1243 }
1244
1245 /* The audit_filter_mutex must not be held during inotify calls because
1246 * we hold it during inotify event callback processing. If an existing
1247 * inotify watch is found, inotify_find_watch() grabs a reference before
1248 * returning.
1249 */
1250 mutex_unlock(&audit_filter_mutex);
1251
1252 if (inotify_find_watch(audit_ih, ndp->path.dentry->d_inode,
1253 &i_watch) < 0) {
1254 parent = audit_init_parent(ndp);
1255 if (IS_ERR(parent)) {
1256 /* caller expects mutex locked */
1257 mutex_lock(&audit_filter_mutex);
1258 return PTR_ERR(parent);
1259 }
1260 } else
1261 parent = container_of(i_watch, struct audit_parent, wdata);
1262
1263 mutex_lock(&audit_filter_mutex);
1264
1265 /* parent was moved before we took audit_filter_mutex */
1266 if (parent->flags & AUDIT_PARENT_INVALID)
1267 ret = -ENOENT;
1268 else
1269 audit_add_to_parent(krule, parent);
1270
1271 /* match get in audit_init_parent or inotify_find_watch */
1272 put_inotify_watch(&parent->wdata);
1273 return ret;
1274}
1275
1276static u64 prio_low = ~0ULL/2; 858static u64 prio_low = ~0ULL/2;
1277static u64 prio_high = ~0ULL/2 - 1; 859static u64 prio_high = ~0ULL/2 - 1;
1278 860
@@ -1282,7 +864,6 @@ static inline int audit_add_rule(struct audit_entry *entry)
1282 struct audit_entry *e; 864 struct audit_entry *e;
1283 struct audit_watch *watch = entry->rule.watch; 865 struct audit_watch *watch = entry->rule.watch;
1284 struct audit_tree *tree = entry->rule.tree; 866 struct audit_tree *tree = entry->rule.tree;
1285 struct nameidata *ndp = NULL, *ndw = NULL;
1286 struct list_head *list; 867 struct list_head *list;
1287 int h, err; 868 int h, err;
1288#ifdef CONFIG_AUDITSYSCALL 869#ifdef CONFIG_AUDITSYSCALL
@@ -1296,8 +877,8 @@ static inline int audit_add_rule(struct audit_entry *entry)
1296 877
1297 mutex_lock(&audit_filter_mutex); 878 mutex_lock(&audit_filter_mutex);
1298 e = audit_find_rule(entry, &list); 879 e = audit_find_rule(entry, &list);
1299 mutex_unlock(&audit_filter_mutex);
1300 if (e) { 880 if (e) {
881 mutex_unlock(&audit_filter_mutex);
1301 err = -EEXIST; 882 err = -EEXIST;
1302 /* normally audit_add_tree_rule() will free it on failure */ 883 /* normally audit_add_tree_rule() will free it on failure */
1303 if (tree) 884 if (tree)
@@ -1305,22 +886,16 @@ static inline int audit_add_rule(struct audit_entry *entry)
1305 goto error; 886 goto error;
1306 } 887 }
1307 888
1308 /* Avoid calling path_lookup under audit_filter_mutex. */
1309 if (watch) {
1310 err = audit_get_nd(watch->path, &ndp, &ndw);
1311 if (err)
1312 goto error;
1313 }
1314
1315 mutex_lock(&audit_filter_mutex);
1316 if (watch) { 889 if (watch) {
1317 /* audit_filter_mutex is dropped and re-taken during this call */ 890 /* audit_filter_mutex is dropped and re-taken during this call */
1318 err = audit_add_watch(&entry->rule, ndp, ndw); 891 err = audit_add_watch(&entry->rule);
1319 if (err) { 892 if (err) {
1320 mutex_unlock(&audit_filter_mutex); 893 mutex_unlock(&audit_filter_mutex);
1321 goto error; 894 goto error;
1322 } 895 }
1323 h = audit_hash_ino((u32)watch->ino); 896 /* entry->rule.watch may have changed during audit_add_watch() */
897 watch = entry->rule.watch;
898 h = audit_hash_ino((u32)audit_watch_inode(watch));
1324 list = &audit_inode_hash[h]; 899 list = &audit_inode_hash[h];
1325 } 900 }
1326 if (tree) { 901 if (tree) {
@@ -1358,11 +933,9 @@ static inline int audit_add_rule(struct audit_entry *entry)
1358#endif 933#endif
1359 mutex_unlock(&audit_filter_mutex); 934 mutex_unlock(&audit_filter_mutex);
1360 935
1361 audit_put_nd(ndp, ndw); /* NULL args OK */
1362 return 0; 936 return 0;
1363 937
1364error: 938error:
1365 audit_put_nd(ndp, ndw); /* NULL args OK */
1366 if (watch) 939 if (watch)
1367 audit_put_watch(watch); /* tmp watch, matches initial get */ 940 audit_put_watch(watch); /* tmp watch, matches initial get */
1368 return err; 941 return err;
@@ -1372,7 +945,7 @@ error:
1372static inline int audit_del_rule(struct audit_entry *entry) 945static inline int audit_del_rule(struct audit_entry *entry)
1373{ 946{
1374 struct audit_entry *e; 947 struct audit_entry *e;
1375 struct audit_watch *watch, *tmp_watch = entry->rule.watch; 948 struct audit_watch *watch = entry->rule.watch;
1376 struct audit_tree *tree = entry->rule.tree; 949 struct audit_tree *tree = entry->rule.tree;
1377 struct list_head *list; 950 struct list_head *list;
1378 LIST_HEAD(inotify_list); 951 LIST_HEAD(inotify_list);
@@ -1394,29 +967,8 @@ static inline int audit_del_rule(struct audit_entry *entry)
1394 goto out; 967 goto out;
1395 } 968 }
1396 969
1397 watch = e->rule.watch; 970 if (e->rule.watch)
1398 if (watch) { 971 audit_remove_watch_rule(&e->rule, &inotify_list);
1399 struct audit_parent *parent = watch->parent;
1400
1401 list_del(&e->rule.rlist);
1402
1403 if (list_empty(&watch->rules)) {
1404 audit_remove_watch(watch);
1405
1406 if (list_empty(&parent->watches)) {
1407 /* Put parent on the inotify un-registration
1408 * list. Grab a reference before releasing
1409 * audit_filter_mutex, to be released in
1410 * audit_inotify_unregister().
1411 * If filesystem is going away, just leave
1412 * the sucker alone, eviction will take
1413 * care of it.
1414 */
1415 if (pin_inotify_watch(&parent->wdata))
1416 list_add(&parent->ilist, &inotify_list);
1417 }
1418 }
1419 }
1420 972
1421 if (e->rule.tree) 973 if (e->rule.tree)
1422 audit_remove_tree_rule(&e->rule); 974 audit_remove_tree_rule(&e->rule);
@@ -1438,8 +990,8 @@ static inline int audit_del_rule(struct audit_entry *entry)
1438 audit_inotify_unregister(&inotify_list); 990 audit_inotify_unregister(&inotify_list);
1439 991
1440out: 992out:
1441 if (tmp_watch) 993 if (watch)
1442 audit_put_watch(tmp_watch); /* match initial get */ 994 audit_put_watch(watch); /* match initial get */
1443 if (tree) 995 if (tree)
1444 audit_put_tree(tree); /* that's the temporary one */ 996 audit_put_tree(tree); /* that's the temporary one */
1445 997
@@ -1527,11 +1079,9 @@ static void audit_log_rule_change(uid_t loginuid, u32 sessionid, u32 sid,
1527 security_release_secctx(ctx, len); 1079 security_release_secctx(ctx, len);
1528 } 1080 }
1529 } 1081 }
1530 audit_log_format(ab, " op=%s rule key=", action); 1082 audit_log_format(ab, " op=");
1531 if (rule->filterkey) 1083 audit_log_string(ab, action);
1532 audit_log_untrustedstring(ab, rule->filterkey); 1084 audit_log_key(ab, rule->filterkey);
1533 else
1534 audit_log_format(ab, "(null)");
1535 audit_log_format(ab, " list=%d res=%d", rule->listnr, res); 1085 audit_log_format(ab, " list=%d res=%d", rule->listnr, res);
1536 audit_log_end(ab); 1086 audit_log_end(ab);
1537} 1087}
@@ -1595,7 +1145,7 @@ int audit_receive_filter(int type, int pid, int uid, int seq, void *data,
1595 return PTR_ERR(entry); 1145 return PTR_ERR(entry);
1596 1146
1597 err = audit_add_rule(entry); 1147 err = audit_add_rule(entry);
1598 audit_log_rule_change(loginuid, sessionid, sid, "add", 1148 audit_log_rule_change(loginuid, sessionid, sid, "add rule",
1599 &entry->rule, !err); 1149 &entry->rule, !err);
1600 1150
1601 if (err) 1151 if (err)
@@ -1611,7 +1161,7 @@ int audit_receive_filter(int type, int pid, int uid, int seq, void *data,
1611 return PTR_ERR(entry); 1161 return PTR_ERR(entry);
1612 1162
1613 err = audit_del_rule(entry); 1163 err = audit_del_rule(entry);
1614 audit_log_rule_change(loginuid, sessionid, sid, "remove", 1164 audit_log_rule_change(loginuid, sessionid, sid, "remove rule",
1615 &entry->rule, !err); 1165 &entry->rule, !err);
1616 1166
1617 audit_free_rule(entry); 1167 audit_free_rule(entry);
@@ -1793,7 +1343,7 @@ static int update_lsm_rule(struct audit_krule *r)
1793 list_del(&r->list); 1343 list_del(&r->list);
1794 } else { 1344 } else {
1795 if (watch) { 1345 if (watch) {
1796 list_add(&nentry->rule.rlist, &watch->rules); 1346 list_add(&nentry->rule.rlist, audit_watch_rules(watch));
1797 list_del(&r->rlist); 1347 list_del(&r->rlist);
1798 } else if (tree) 1348 } else if (tree)
1799 list_replace_init(&r->rlist, &nentry->rule.rlist); 1349 list_replace_init(&r->rlist, &nentry->rule.rlist);
@@ -1829,27 +1379,3 @@ int audit_update_lsm_rules(void)
1829 1379
1830 return err; 1380 return err;
1831} 1381}
1832
1833/* Update watch data in audit rules based on inotify events. */
1834void audit_handle_ievent(struct inotify_watch *i_watch, u32 wd, u32 mask,
1835 u32 cookie, const char *dname, struct inode *inode)
1836{
1837 struct audit_parent *parent;
1838
1839 parent = container_of(i_watch, struct audit_parent, wdata);
1840
1841 if (mask & (IN_CREATE|IN_MOVED_TO) && inode)
1842 audit_update_watch(parent, dname, inode->i_sb->s_dev,
1843 inode->i_ino, 0);
1844 else if (mask & (IN_DELETE|IN_MOVED_FROM))
1845 audit_update_watch(parent, dname, (dev_t)-1, (unsigned long)-1, 1);
1846 /* inotify automatically removes the watch and sends IN_IGNORED */
1847 else if (mask & (IN_DELETE_SELF|IN_UNMOUNT))
1848 audit_remove_parent_watches(parent);
1849 /* inotify does not remove the watch, so remove it manually */
1850 else if(mask & IN_MOVE_SELF) {
1851 audit_remove_parent_watches(parent);
1852 inotify_remove_watch_locked(audit_ih, i_watch);
1853 } else if (mask & IN_IGNORED)
1854 put_inotify_watch(i_watch);
1855}
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 7d6ac7c1f414..68d3c6a0ecd6 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -199,6 +199,7 @@ struct audit_context {
199 199
200 struct audit_tree_refs *trees, *first_trees; 200 struct audit_tree_refs *trees, *first_trees;
201 int tree_count; 201 int tree_count;
202 struct list_head killed_trees;
202 203
203 int type; 204 int type;
204 union { 205 union {
@@ -548,9 +549,9 @@ static int audit_filter_rules(struct task_struct *tsk,
548 } 549 }
549 break; 550 break;
550 case AUDIT_WATCH: 551 case AUDIT_WATCH:
551 if (name && rule->watch->ino != (unsigned long)-1) 552 if (name && audit_watch_inode(rule->watch) != (unsigned long)-1)
552 result = (name->dev == rule->watch->dev && 553 result = (name->dev == audit_watch_dev(rule->watch) &&
553 name->ino == rule->watch->ino); 554 name->ino == audit_watch_inode(rule->watch));
554 break; 555 break;
555 case AUDIT_DIR: 556 case AUDIT_DIR:
556 if (ctx) 557 if (ctx)
@@ -853,6 +854,7 @@ static inline struct audit_context *audit_alloc_context(enum audit_state state)
853 if (!(context = kmalloc(sizeof(*context), GFP_KERNEL))) 854 if (!(context = kmalloc(sizeof(*context), GFP_KERNEL)))
854 return NULL; 855 return NULL;
855 audit_zero_context(context, state); 856 audit_zero_context(context, state);
857 INIT_LIST_HEAD(&context->killed_trees);
856 return context; 858 return context;
857} 859}
858 860
@@ -1024,8 +1026,8 @@ static int audit_log_single_execve_arg(struct audit_context *context,
1024{ 1026{
1025 char arg_num_len_buf[12]; 1027 char arg_num_len_buf[12];
1026 const char __user *tmp_p = p; 1028 const char __user *tmp_p = p;
1027 /* how many digits are in arg_num? 3 is the length of " a=" */ 1029 /* how many digits are in arg_num? 5 is the length of ' a=""' */
1028 size_t arg_num_len = snprintf(arg_num_len_buf, 12, "%d", arg_num) + 3; 1030 size_t arg_num_len = snprintf(arg_num_len_buf, 12, "%d", arg_num) + 5;
1029 size_t len, len_left, to_send; 1031 size_t len, len_left, to_send;
1030 size_t max_execve_audit_len = MAX_EXECVE_AUDIT_LEN; 1032 size_t max_execve_audit_len = MAX_EXECVE_AUDIT_LEN;
1031 unsigned int i, has_cntl = 0, too_long = 0; 1033 unsigned int i, has_cntl = 0, too_long = 0;
@@ -1137,7 +1139,7 @@ static int audit_log_single_execve_arg(struct audit_context *context,
1137 if (has_cntl) 1139 if (has_cntl)
1138 audit_log_n_hex(*ab, buf, to_send); 1140 audit_log_n_hex(*ab, buf, to_send);
1139 else 1141 else
1140 audit_log_format(*ab, "\"%s\"", buf); 1142 audit_log_string(*ab, buf);
1141 1143
1142 p += to_send; 1144 p += to_send;
1143 len_left -= to_send; 1145 len_left -= to_send;
@@ -1372,11 +1374,7 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
1372 1374
1373 1375
1374 audit_log_task_info(ab, tsk); 1376 audit_log_task_info(ab, tsk);
1375 if (context->filterkey) { 1377 audit_log_key(ab, context->filterkey);
1376 audit_log_format(ab, " key=");
1377 audit_log_untrustedstring(ab, context->filterkey);
1378 } else
1379 audit_log_format(ab, " key=(null)");
1380 audit_log_end(ab); 1378 audit_log_end(ab);
1381 1379
1382 for (aux = context->aux; aux; aux = aux->next) { 1380 for (aux = context->aux; aux; aux = aux->next) {
@@ -1549,6 +1547,8 @@ void audit_free(struct task_struct *tsk)
1549 /* that can happen only if we are called from do_exit() */ 1547 /* that can happen only if we are called from do_exit() */
1550 if (context->in_syscall && context->current_state == AUDIT_RECORD_CONTEXT) 1548 if (context->in_syscall && context->current_state == AUDIT_RECORD_CONTEXT)
1551 audit_log_exit(context, tsk); 1549 audit_log_exit(context, tsk);
1550 if (!list_empty(&context->killed_trees))
1551 audit_kill_trees(&context->killed_trees);
1552 1552
1553 audit_free_context(context); 1553 audit_free_context(context);
1554} 1554}
@@ -1692,6 +1692,9 @@ void audit_syscall_exit(int valid, long return_code)
1692 context->in_syscall = 0; 1692 context->in_syscall = 0;
1693 context->prio = context->state == AUDIT_RECORD_CONTEXT ? ~0ULL : 0; 1693 context->prio = context->state == AUDIT_RECORD_CONTEXT ? ~0ULL : 0;
1694 1694
1695 if (!list_empty(&context->killed_trees))
1696 audit_kill_trees(&context->killed_trees);
1697
1695 if (context->previous) { 1698 if (context->previous) {
1696 struct audit_context *new_context = context->previous; 1699 struct audit_context *new_context = context->previous;
1697 context->previous = NULL; 1700 context->previous = NULL;
@@ -2525,3 +2528,11 @@ void audit_core_dumps(long signr)
2525 audit_log_format(ab, " sig=%ld", signr); 2528 audit_log_format(ab, " sig=%ld", signr);
2526 audit_log_end(ab); 2529 audit_log_end(ab);
2527} 2530}
2531
2532struct list_head *audit_killed_trees(void)
2533{
2534 struct audit_context *ctx = current->audit_context;
2535 if (likely(!ctx || !ctx->in_syscall))
2536 return NULL;
2537 return &ctx->killed_trees;
2538}
diff --git a/kernel/exit.c b/kernel/exit.c
index 628d41f0dd54..869dc221733e 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -12,7 +12,6 @@
12#include <linux/completion.h> 12#include <linux/completion.h>
13#include <linux/personality.h> 13#include <linux/personality.h>
14#include <linux/tty.h> 14#include <linux/tty.h>
15#include <linux/mnt_namespace.h>
16#include <linux/iocontext.h> 15#include <linux/iocontext.h>
17#include <linux/key.h> 16#include <linux/key.h>
18#include <linux/security.h> 17#include <linux/security.h>
diff --git a/kernel/fork.c b/kernel/fork.c
index 467746b3f0aa..bd2959228871 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -17,7 +17,6 @@
17#include <linux/module.h> 17#include <linux/module.h>
18#include <linux/vmalloc.h> 18#include <linux/vmalloc.h>
19#include <linux/completion.h> 19#include <linux/completion.h>
20#include <linux/mnt_namespace.h>
21#include <linux/personality.h> 20#include <linux/personality.h>
22#include <linux/mempolicy.h> 21#include <linux/mempolicy.h>
23#include <linux/sem.h> 22#include <linux/sem.h>
diff --git a/kernel/futex.c b/kernel/futex.c
index 80b5ce716596..794c862125fe 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -284,6 +284,25 @@ void put_futex_key(int fshared, union futex_key *key)
284 drop_futex_key_refs(key); 284 drop_futex_key_refs(key);
285} 285}
286 286
287/*
288 * fault_in_user_writeable - fault in user address and verify RW access
289 * @uaddr: pointer to faulting user space address
290 *
291 * Slow path to fixup the fault we just took in the atomic write
292 * access to @uaddr.
293 *
294 * We have no generic implementation of a non destructive write to the
295 * user address. We know that we faulted in the atomic pagefault
296 * disabled section so we can as well avoid the #PF overhead by
297 * calling get_user_pages() right away.
298 */
299static int fault_in_user_writeable(u32 __user *uaddr)
300{
301 int ret = get_user_pages(current, current->mm, (unsigned long)uaddr,
302 1, 1, 0, NULL, NULL);
303 return ret < 0 ? ret : 0;
304}
305
287/** 306/**
288 * futex_top_waiter() - Return the highest priority waiter on a futex 307 * futex_top_waiter() - Return the highest priority waiter on a futex
289 * @hb: the hash bucket the futex_q's reside in 308 * @hb: the hash bucket the futex_q's reside in
@@ -896,7 +915,6 @@ retry:
896retry_private: 915retry_private:
897 op_ret = futex_atomic_op_inuser(op, uaddr2); 916 op_ret = futex_atomic_op_inuser(op, uaddr2);
898 if (unlikely(op_ret < 0)) { 917 if (unlikely(op_ret < 0)) {
899 u32 dummy;
900 918
901 double_unlock_hb(hb1, hb2); 919 double_unlock_hb(hb1, hb2);
902 920
@@ -914,7 +932,7 @@ retry_private:
914 goto out_put_keys; 932 goto out_put_keys;
915 } 933 }
916 934
917 ret = get_user(dummy, uaddr2); 935 ret = fault_in_user_writeable(uaddr2);
918 if (ret) 936 if (ret)
919 goto out_put_keys; 937 goto out_put_keys;
920 938
@@ -1204,7 +1222,7 @@ retry_private:
1204 double_unlock_hb(hb1, hb2); 1222 double_unlock_hb(hb1, hb2);
1205 put_futex_key(fshared, &key2); 1223 put_futex_key(fshared, &key2);
1206 put_futex_key(fshared, &key1); 1224 put_futex_key(fshared, &key1);
1207 ret = get_user(curval2, uaddr2); 1225 ret = fault_in_user_writeable(uaddr2);
1208 if (!ret) 1226 if (!ret)
1209 goto retry; 1227 goto retry;
1210 goto out; 1228 goto out;
@@ -1482,7 +1500,7 @@ retry:
1482handle_fault: 1500handle_fault:
1483 spin_unlock(q->lock_ptr); 1501 spin_unlock(q->lock_ptr);
1484 1502
1485 ret = get_user(uval, uaddr); 1503 ret = fault_in_user_writeable(uaddr);
1486 1504
1487 spin_lock(q->lock_ptr); 1505 spin_lock(q->lock_ptr);
1488 1506
@@ -1807,7 +1825,6 @@ static int futex_lock_pi(u32 __user *uaddr, int fshared,
1807{ 1825{
1808 struct hrtimer_sleeper timeout, *to = NULL; 1826 struct hrtimer_sleeper timeout, *to = NULL;
1809 struct futex_hash_bucket *hb; 1827 struct futex_hash_bucket *hb;
1810 u32 uval;
1811 struct futex_q q; 1828 struct futex_q q;
1812 int res, ret; 1829 int res, ret;
1813 1830
@@ -1909,16 +1926,9 @@ out:
1909 return ret != -EINTR ? ret : -ERESTARTNOINTR; 1926 return ret != -EINTR ? ret : -ERESTARTNOINTR;
1910 1927
1911uaddr_faulted: 1928uaddr_faulted:
1912 /*
1913 * We have to r/w *(int __user *)uaddr, and we have to modify it
1914 * atomically. Therefore, if we continue to fault after get_user()
1915 * below, we need to handle the fault ourselves, while still holding
1916 * the mmap_sem. This can occur if the uaddr is under contention as
1917 * we have to drop the mmap_sem in order to call get_user().
1918 */
1919 queue_unlock(&q, hb); 1929 queue_unlock(&q, hb);
1920 1930
1921 ret = get_user(uval, uaddr); 1931 ret = fault_in_user_writeable(uaddr);
1922 if (ret) 1932 if (ret)
1923 goto out_put_key; 1933 goto out_put_key;
1924 1934
@@ -2013,17 +2023,10 @@ out:
2013 return ret; 2023 return ret;
2014 2024
2015pi_faulted: 2025pi_faulted:
2016 /*
2017 * We have to r/w *(int __user *)uaddr, and we have to modify it
2018 * atomically. Therefore, if we continue to fault after get_user()
2019 * below, we need to handle the fault ourselves, while still holding
2020 * the mmap_sem. This can occur if the uaddr is under contention as
2021 * we have to drop the mmap_sem in order to call get_user().
2022 */
2023 spin_unlock(&hb->lock); 2026 spin_unlock(&hb->lock);
2024 put_futex_key(fshared, &key); 2027 put_futex_key(fshared, &key);
2025 2028
2026 ret = get_user(uval, uaddr); 2029 ret = fault_in_user_writeable(uaddr);
2027 if (!ret) 2030 if (!ret)
2028 goto retry; 2031 goto retry;
2029 2032
diff --git a/kernel/kmod.c b/kernel/kmod.c
index 7e95bedb2bfc..385c31a1bdbf 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -24,7 +24,6 @@
24#include <linux/unistd.h> 24#include <linux/unistd.h>
25#include <linux/kmod.h> 25#include <linux/kmod.h>
26#include <linux/slab.h> 26#include <linux/slab.h>
27#include <linux/mnt_namespace.h>
28#include <linux/completion.h> 27#include <linux/completion.h>
29#include <linux/file.h> 28#include <linux/file.h>
30#include <linux/fdtable.h> 29#include <linux/fdtable.h>
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index c0fa54b276d9..16b5739c516a 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -237,13 +237,9 @@ static int __kprobes collect_garbage_slots(void)
237{ 237{
238 struct kprobe_insn_page *kip; 238 struct kprobe_insn_page *kip;
239 struct hlist_node *pos, *next; 239 struct hlist_node *pos, *next;
240 int safety;
241 240
242 /* Ensure no-one is preepmted on the garbages */ 241 /* Ensure no-one is preepmted on the garbages */
243 mutex_unlock(&kprobe_insn_mutex); 242 if (check_safety())
244 safety = check_safety();
245 mutex_lock(&kprobe_insn_mutex);
246 if (safety != 0)
247 return -EAGAIN; 243 return -EAGAIN;
248 244
249 hlist_for_each_entry_safe(kip, pos, next, &kprobe_insn_pages, hlist) { 245 hlist_for_each_entry_safe(kip, pos, next, &kprobe_insn_pages, hlist) {
diff --git a/kernel/module.c b/kernel/module.c
index 38928fcaff2b..0a049837008e 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2451,9 +2451,9 @@ SYSCALL_DEFINE3(init_module, void __user *, umod,
2451 return ret; 2451 return ret;
2452 } 2452 }
2453 if (ret > 0) { 2453 if (ret > 0) {
2454 printk(KERN_WARNING "%s: '%s'->init suspiciously returned %d, " 2454 printk(KERN_WARNING
2455 "it should follow 0/-E convention\n" 2455"%s: '%s'->init suspiciously returned %d, it should follow 0/-E convention\n"
2456 KERN_WARNING "%s: loading module anyway...\n", 2456"%s: loading module anyway...\n",
2457 __func__, mod->name, ret, 2457 __func__, mod->name, ret,
2458 __func__); 2458 __func__);
2459 dump_stack(); 2459 dump_stack();
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 1a933a221ea4..a641eb753b8c 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -236,6 +236,8 @@ list_add_counter(struct perf_counter *counter, struct perf_counter_context *ctx)
236 236
237 list_add_rcu(&counter->event_entry, &ctx->event_list); 237 list_add_rcu(&counter->event_entry, &ctx->event_list);
238 ctx->nr_counters++; 238 ctx->nr_counters++;
239 if (counter->attr.inherit_stat)
240 ctx->nr_stat++;
239} 241}
240 242
241/* 243/*
@@ -250,6 +252,8 @@ list_del_counter(struct perf_counter *counter, struct perf_counter_context *ctx)
250 if (list_empty(&counter->list_entry)) 252 if (list_empty(&counter->list_entry))
251 return; 253 return;
252 ctx->nr_counters--; 254 ctx->nr_counters--;
255 if (counter->attr.inherit_stat)
256 ctx->nr_stat--;
253 257
254 list_del_init(&counter->list_entry); 258 list_del_init(&counter->list_entry);
255 list_del_rcu(&counter->event_entry); 259 list_del_rcu(&counter->event_entry);
@@ -1006,6 +1010,81 @@ static int context_equiv(struct perf_counter_context *ctx1,
1006 && !ctx1->pin_count && !ctx2->pin_count; 1010 && !ctx1->pin_count && !ctx2->pin_count;
1007} 1011}
1008 1012
1013static void __perf_counter_read(void *counter);
1014
1015static void __perf_counter_sync_stat(struct perf_counter *counter,
1016 struct perf_counter *next_counter)
1017{
1018 u64 value;
1019
1020 if (!counter->attr.inherit_stat)
1021 return;
1022
1023 /*
1024 * Update the counter value, we cannot use perf_counter_read()
1025 * because we're in the middle of a context switch and have IRQs
1026 * disabled, which upsets smp_call_function_single(), however
1027 * we know the counter must be on the current CPU, therefore we
1028 * don't need to use it.
1029 */
1030 switch (counter->state) {
1031 case PERF_COUNTER_STATE_ACTIVE:
1032 __perf_counter_read(counter);
1033 break;
1034
1035 case PERF_COUNTER_STATE_INACTIVE:
1036 update_counter_times(counter);
1037 break;
1038
1039 default:
1040 break;
1041 }
1042
1043 /*
1044 * In order to keep per-task stats reliable we need to flip the counter
1045 * values when we flip the contexts.
1046 */
1047 value = atomic64_read(&next_counter->count);
1048 value = atomic64_xchg(&counter->count, value);
1049 atomic64_set(&next_counter->count, value);
1050
1051 swap(counter->total_time_enabled, next_counter->total_time_enabled);
1052 swap(counter->total_time_running, next_counter->total_time_running);
1053
1054 /*
1055 * Since we swizzled the values, update the user visible data too.
1056 */
1057 perf_counter_update_userpage(counter);
1058 perf_counter_update_userpage(next_counter);
1059}
1060
1061#define list_next_entry(pos, member) \
1062 list_entry(pos->member.next, typeof(*pos), member)
1063
1064static void perf_counter_sync_stat(struct perf_counter_context *ctx,
1065 struct perf_counter_context *next_ctx)
1066{
1067 struct perf_counter *counter, *next_counter;
1068
1069 if (!ctx->nr_stat)
1070 return;
1071
1072 counter = list_first_entry(&ctx->event_list,
1073 struct perf_counter, event_entry);
1074
1075 next_counter = list_first_entry(&next_ctx->event_list,
1076 struct perf_counter, event_entry);
1077
1078 while (&counter->event_entry != &ctx->event_list &&
1079 &next_counter->event_entry != &next_ctx->event_list) {
1080
1081 __perf_counter_sync_stat(counter, next_counter);
1082
1083 counter = list_next_entry(counter, event_entry);
1084 next_counter = list_next_entry(counter, event_entry);
1085 }
1086}
1087
1009/* 1088/*
1010 * Called from scheduler to remove the counters of the current task, 1089 * Called from scheduler to remove the counters of the current task,
1011 * with interrupts disabled. 1090 * with interrupts disabled.
@@ -1061,6 +1140,8 @@ void perf_counter_task_sched_out(struct task_struct *task,
1061 ctx->task = next; 1140 ctx->task = next;
1062 next_ctx->task = task; 1141 next_ctx->task = task;
1063 do_switch = 0; 1142 do_switch = 0;
1143
1144 perf_counter_sync_stat(ctx, next_ctx);
1064 } 1145 }
1065 spin_unlock(&next_ctx->lock); 1146 spin_unlock(&next_ctx->lock);
1066 spin_unlock(&ctx->lock); 1147 spin_unlock(&ctx->lock);
@@ -1348,9 +1429,56 @@ void perf_counter_task_tick(struct task_struct *curr, int cpu)
1348} 1429}
1349 1430
1350/* 1431/*
1432 * Enable all of a task's counters that have been marked enable-on-exec.
1433 * This expects task == current.
1434 */
1435static void perf_counter_enable_on_exec(struct task_struct *task)
1436{
1437 struct perf_counter_context *ctx;
1438 struct perf_counter *counter;
1439 unsigned long flags;
1440 int enabled = 0;
1441
1442 local_irq_save(flags);
1443 ctx = task->perf_counter_ctxp;
1444 if (!ctx || !ctx->nr_counters)
1445 goto out;
1446
1447 __perf_counter_task_sched_out(ctx);
1448
1449 spin_lock(&ctx->lock);
1450
1451 list_for_each_entry(counter, &ctx->counter_list, list_entry) {
1452 if (!counter->attr.enable_on_exec)
1453 continue;
1454 counter->attr.enable_on_exec = 0;
1455 if (counter->state >= PERF_COUNTER_STATE_INACTIVE)
1456 continue;
1457 counter->state = PERF_COUNTER_STATE_INACTIVE;
1458 counter->tstamp_enabled =
1459 ctx->time - counter->total_time_enabled;
1460 enabled = 1;
1461 }
1462
1463 /*
1464 * Unclone this context if we enabled any counter.
1465 */
1466 if (enabled && ctx->parent_ctx) {
1467 put_ctx(ctx->parent_ctx);
1468 ctx->parent_ctx = NULL;
1469 }
1470
1471 spin_unlock(&ctx->lock);
1472
1473 perf_counter_task_sched_in(task, smp_processor_id());
1474 out:
1475 local_irq_restore(flags);
1476}
1477
1478/*
1351 * Cross CPU call to read the hardware counter 1479 * Cross CPU call to read the hardware counter
1352 */ 1480 */
1353static void __read(void *info) 1481static void __perf_counter_read(void *info)
1354{ 1482{
1355 struct perf_counter *counter = info; 1483 struct perf_counter *counter = info;
1356 struct perf_counter_context *ctx = counter->ctx; 1484 struct perf_counter_context *ctx = counter->ctx;
@@ -1372,7 +1500,7 @@ static u64 perf_counter_read(struct perf_counter *counter)
1372 */ 1500 */
1373 if (counter->state == PERF_COUNTER_STATE_ACTIVE) { 1501 if (counter->state == PERF_COUNTER_STATE_ACTIVE) {
1374 smp_call_function_single(counter->oncpu, 1502 smp_call_function_single(counter->oncpu,
1375 __read, counter, 1); 1503 __perf_counter_read, counter, 1);
1376 } else if (counter->state == PERF_COUNTER_STATE_INACTIVE) { 1504 } else if (counter->state == PERF_COUNTER_STATE_INACTIVE) {
1377 update_counter_times(counter); 1505 update_counter_times(counter);
1378 } 1506 }
@@ -1508,11 +1636,13 @@ static void free_counter(struct perf_counter *counter)
1508{ 1636{
1509 perf_pending_sync(counter); 1637 perf_pending_sync(counter);
1510 1638
1511 atomic_dec(&nr_counters); 1639 if (!counter->parent) {
1512 if (counter->attr.mmap) 1640 atomic_dec(&nr_counters);
1513 atomic_dec(&nr_mmap_counters); 1641 if (counter->attr.mmap)
1514 if (counter->attr.comm) 1642 atomic_dec(&nr_mmap_counters);
1515 atomic_dec(&nr_comm_counters); 1643 if (counter->attr.comm)
1644 atomic_dec(&nr_comm_counters);
1645 }
1516 1646
1517 if (counter->destroy) 1647 if (counter->destroy)
1518 counter->destroy(counter); 1648 counter->destroy(counter);
@@ -1751,6 +1881,14 @@ int perf_counter_task_disable(void)
1751 return 0; 1881 return 0;
1752} 1882}
1753 1883
1884static int perf_counter_index(struct perf_counter *counter)
1885{
1886 if (counter->state != PERF_COUNTER_STATE_ACTIVE)
1887 return 0;
1888
1889 return counter->hw.idx + 1 - PERF_COUNTER_INDEX_OFFSET;
1890}
1891
1754/* 1892/*
1755 * Callers need to ensure there can be no nesting of this function, otherwise 1893 * Callers need to ensure there can be no nesting of this function, otherwise
1756 * the seqlock logic goes bad. We can not serialize this because the arch 1894 * the seqlock logic goes bad. We can not serialize this because the arch
@@ -1775,11 +1913,17 @@ void perf_counter_update_userpage(struct perf_counter *counter)
1775 preempt_disable(); 1913 preempt_disable();
1776 ++userpg->lock; 1914 ++userpg->lock;
1777 barrier(); 1915 barrier();
1778 userpg->index = counter->hw.idx; 1916 userpg->index = perf_counter_index(counter);
1779 userpg->offset = atomic64_read(&counter->count); 1917 userpg->offset = atomic64_read(&counter->count);
1780 if (counter->state == PERF_COUNTER_STATE_ACTIVE) 1918 if (counter->state == PERF_COUNTER_STATE_ACTIVE)
1781 userpg->offset -= atomic64_read(&counter->hw.prev_count); 1919 userpg->offset -= atomic64_read(&counter->hw.prev_count);
1782 1920
1921 userpg->time_enabled = counter->total_time_enabled +
1922 atomic64_read(&counter->child_total_time_enabled);
1923
1924 userpg->time_running = counter->total_time_running +
1925 atomic64_read(&counter->child_total_time_running);
1926
1783 barrier(); 1927 barrier();
1784 ++userpg->lock; 1928 ++userpg->lock;
1785 preempt_enable(); 1929 preempt_enable();
@@ -1876,7 +2020,7 @@ fail:
1876 2020
1877static void perf_mmap_free_page(unsigned long addr) 2021static void perf_mmap_free_page(unsigned long addr)
1878{ 2022{
1879 struct page *page = virt_to_page(addr); 2023 struct page *page = virt_to_page((void *)addr);
1880 2024
1881 page->mapping = NULL; 2025 page->mapping = NULL;
1882 __free_page(page); 2026 __free_page(page);
@@ -2483,15 +2627,14 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
2483 u32 cpu, reserved; 2627 u32 cpu, reserved;
2484 } cpu_entry; 2628 } cpu_entry;
2485 2629
2486 header.type = 0; 2630 header.type = PERF_EVENT_SAMPLE;
2487 header.size = sizeof(header); 2631 header.size = sizeof(header);
2488 2632
2489 header.misc = PERF_EVENT_MISC_OVERFLOW; 2633 header.misc = 0;
2490 header.misc |= perf_misc_flags(data->regs); 2634 header.misc |= perf_misc_flags(data->regs);
2491 2635
2492 if (sample_type & PERF_SAMPLE_IP) { 2636 if (sample_type & PERF_SAMPLE_IP) {
2493 ip = perf_instruction_pointer(data->regs); 2637 ip = perf_instruction_pointer(data->regs);
2494 header.type |= PERF_SAMPLE_IP;
2495 header.size += sizeof(ip); 2638 header.size += sizeof(ip);
2496 } 2639 }
2497 2640
@@ -2500,7 +2643,6 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
2500 tid_entry.pid = perf_counter_pid(counter, current); 2643 tid_entry.pid = perf_counter_pid(counter, current);
2501 tid_entry.tid = perf_counter_tid(counter, current); 2644 tid_entry.tid = perf_counter_tid(counter, current);
2502 2645
2503 header.type |= PERF_SAMPLE_TID;
2504 header.size += sizeof(tid_entry); 2646 header.size += sizeof(tid_entry);
2505 } 2647 }
2506 2648
@@ -2510,34 +2652,25 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
2510 */ 2652 */
2511 time = sched_clock(); 2653 time = sched_clock();
2512 2654
2513 header.type |= PERF_SAMPLE_TIME;
2514 header.size += sizeof(u64); 2655 header.size += sizeof(u64);
2515 } 2656 }
2516 2657
2517 if (sample_type & PERF_SAMPLE_ADDR) { 2658 if (sample_type & PERF_SAMPLE_ADDR)
2518 header.type |= PERF_SAMPLE_ADDR;
2519 header.size += sizeof(u64); 2659 header.size += sizeof(u64);
2520 }
2521 2660
2522 if (sample_type & PERF_SAMPLE_ID) { 2661 if (sample_type & PERF_SAMPLE_ID)
2523 header.type |= PERF_SAMPLE_ID;
2524 header.size += sizeof(u64); 2662 header.size += sizeof(u64);
2525 }
2526 2663
2527 if (sample_type & PERF_SAMPLE_CPU) { 2664 if (sample_type & PERF_SAMPLE_CPU) {
2528 header.type |= PERF_SAMPLE_CPU;
2529 header.size += sizeof(cpu_entry); 2665 header.size += sizeof(cpu_entry);
2530 2666
2531 cpu_entry.cpu = raw_smp_processor_id(); 2667 cpu_entry.cpu = raw_smp_processor_id();
2532 } 2668 }
2533 2669
2534 if (sample_type & PERF_SAMPLE_PERIOD) { 2670 if (sample_type & PERF_SAMPLE_PERIOD)
2535 header.type |= PERF_SAMPLE_PERIOD;
2536 header.size += sizeof(u64); 2671 header.size += sizeof(u64);
2537 }
2538 2672
2539 if (sample_type & PERF_SAMPLE_GROUP) { 2673 if (sample_type & PERF_SAMPLE_GROUP) {
2540 header.type |= PERF_SAMPLE_GROUP;
2541 header.size += sizeof(u64) + 2674 header.size += sizeof(u64) +
2542 counter->nr_siblings * sizeof(group_entry); 2675 counter->nr_siblings * sizeof(group_entry);
2543 } 2676 }
@@ -2547,10 +2680,9 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
2547 2680
2548 if (callchain) { 2681 if (callchain) {
2549 callchain_size = (1 + callchain->nr) * sizeof(u64); 2682 callchain_size = (1 + callchain->nr) * sizeof(u64);
2550
2551 header.type |= PERF_SAMPLE_CALLCHAIN;
2552 header.size += callchain_size; 2683 header.size += callchain_size;
2553 } 2684 } else
2685 header.size += sizeof(u64);
2554 } 2686 }
2555 2687
2556 ret = perf_output_begin(&handle, counter, header.size, nmi, 1); 2688 ret = perf_output_begin(&handle, counter, header.size, nmi, 1);
@@ -2601,13 +2733,79 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
2601 } 2733 }
2602 } 2734 }
2603 2735
2604 if (callchain) 2736 if (sample_type & PERF_SAMPLE_CALLCHAIN) {
2605 perf_output_copy(&handle, callchain, callchain_size); 2737 if (callchain)
2738 perf_output_copy(&handle, callchain, callchain_size);
2739 else {
2740 u64 nr = 0;
2741 perf_output_put(&handle, nr);
2742 }
2743 }
2606 2744
2607 perf_output_end(&handle); 2745 perf_output_end(&handle);
2608} 2746}
2609 2747
2610/* 2748/*
2749 * read event
2750 */
2751
2752struct perf_read_event {
2753 struct perf_event_header header;
2754
2755 u32 pid;
2756 u32 tid;
2757 u64 value;
2758 u64 format[3];
2759};
2760
2761static void
2762perf_counter_read_event(struct perf_counter *counter,
2763 struct task_struct *task)
2764{
2765 struct perf_output_handle handle;
2766 struct perf_read_event event = {
2767 .header = {
2768 .type = PERF_EVENT_READ,
2769 .misc = 0,
2770 .size = sizeof(event) - sizeof(event.format),
2771 },
2772 .pid = perf_counter_pid(counter, task),
2773 .tid = perf_counter_tid(counter, task),
2774 .value = atomic64_read(&counter->count),
2775 };
2776 int ret, i = 0;
2777
2778 if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
2779 event.header.size += sizeof(u64);
2780 event.format[i++] = counter->total_time_enabled;
2781 }
2782
2783 if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
2784 event.header.size += sizeof(u64);
2785 event.format[i++] = counter->total_time_running;
2786 }
2787
2788 if (counter->attr.read_format & PERF_FORMAT_ID) {
2789 u64 id;
2790
2791 event.header.size += sizeof(u64);
2792 if (counter->parent)
2793 id = counter->parent->id;
2794 else
2795 id = counter->id;
2796
2797 event.format[i++] = id;
2798 }
2799
2800 ret = perf_output_begin(&handle, counter, event.header.size, 0, 0);
2801 if (ret)
2802 return;
2803
2804 perf_output_copy(&handle, &event, event.header.size);
2805 perf_output_end(&handle);
2806}
2807
2808/*
2611 * fork tracking 2809 * fork tracking
2612 */ 2810 */
2613 2811
@@ -2798,6 +2996,9 @@ void perf_counter_comm(struct task_struct *task)
2798{ 2996{
2799 struct perf_comm_event comm_event; 2997 struct perf_comm_event comm_event;
2800 2998
2999 if (task->perf_counter_ctxp)
3000 perf_counter_enable_on_exec(task);
3001
2801 if (!atomic_read(&nr_comm_counters)) 3002 if (!atomic_read(&nr_comm_counters))
2802 return; 3003 return;
2803 3004
@@ -3317,8 +3518,8 @@ out:
3317 put_cpu_var(perf_cpu_context); 3518 put_cpu_var(perf_cpu_context);
3318} 3519}
3319 3520
3320void 3521void __perf_swcounter_event(u32 event, u64 nr, int nmi,
3321perf_swcounter_event(u32 event, u64 nr, int nmi, struct pt_regs *regs, u64 addr) 3522 struct pt_regs *regs, u64 addr)
3322{ 3523{
3323 struct perf_sample_data data = { 3524 struct perf_sample_data data = {
3324 .regs = regs, 3525 .regs = regs,
@@ -3509,9 +3710,21 @@ static const struct pmu *tp_perf_counter_init(struct perf_counter *counter)
3509} 3710}
3510#endif 3711#endif
3511 3712
3713atomic_t perf_swcounter_enabled[PERF_COUNT_SW_MAX];
3714
3715static void sw_perf_counter_destroy(struct perf_counter *counter)
3716{
3717 u64 event = counter->attr.config;
3718
3719 WARN_ON(counter->parent);
3720
3721 atomic_dec(&perf_swcounter_enabled[event]);
3722}
3723
3512static const struct pmu *sw_perf_counter_init(struct perf_counter *counter) 3724static const struct pmu *sw_perf_counter_init(struct perf_counter *counter)
3513{ 3725{
3514 const struct pmu *pmu = NULL; 3726 const struct pmu *pmu = NULL;
3727 u64 event = counter->attr.config;
3515 3728
3516 /* 3729 /*
3517 * Software counters (currently) can't in general distinguish 3730 * Software counters (currently) can't in general distinguish
@@ -3520,7 +3733,7 @@ static const struct pmu *sw_perf_counter_init(struct perf_counter *counter)
3520 * to be kernel events, and page faults are never hypervisor 3733 * to be kernel events, and page faults are never hypervisor
3521 * events. 3734 * events.
3522 */ 3735 */
3523 switch (counter->attr.config) { 3736 switch (event) {
3524 case PERF_COUNT_SW_CPU_CLOCK: 3737 case PERF_COUNT_SW_CPU_CLOCK:
3525 pmu = &perf_ops_cpu_clock; 3738 pmu = &perf_ops_cpu_clock;
3526 3739
@@ -3541,6 +3754,10 @@ static const struct pmu *sw_perf_counter_init(struct perf_counter *counter)
3541 case PERF_COUNT_SW_PAGE_FAULTS_MAJ: 3754 case PERF_COUNT_SW_PAGE_FAULTS_MAJ:
3542 case PERF_COUNT_SW_CONTEXT_SWITCHES: 3755 case PERF_COUNT_SW_CONTEXT_SWITCHES:
3543 case PERF_COUNT_SW_CPU_MIGRATIONS: 3756 case PERF_COUNT_SW_CPU_MIGRATIONS:
3757 if (!counter->parent) {
3758 atomic_inc(&perf_swcounter_enabled[event]);
3759 counter->destroy = sw_perf_counter_destroy;
3760 }
3544 pmu = &perf_ops_generic; 3761 pmu = &perf_ops_generic;
3545 break; 3762 break;
3546 } 3763 }
@@ -3556,6 +3773,7 @@ perf_counter_alloc(struct perf_counter_attr *attr,
3556 int cpu, 3773 int cpu,
3557 struct perf_counter_context *ctx, 3774 struct perf_counter_context *ctx,
3558 struct perf_counter *group_leader, 3775 struct perf_counter *group_leader,
3776 struct perf_counter *parent_counter,
3559 gfp_t gfpflags) 3777 gfp_t gfpflags)
3560{ 3778{
3561 const struct pmu *pmu; 3779 const struct pmu *pmu;
@@ -3591,6 +3809,8 @@ perf_counter_alloc(struct perf_counter_attr *attr,
3591 counter->ctx = ctx; 3809 counter->ctx = ctx;
3592 counter->oncpu = -1; 3810 counter->oncpu = -1;
3593 3811
3812 counter->parent = parent_counter;
3813
3594 counter->ns = get_pid_ns(current->nsproxy->pid_ns); 3814 counter->ns = get_pid_ns(current->nsproxy->pid_ns);
3595 counter->id = atomic64_inc_return(&perf_counter_id); 3815 counter->id = atomic64_inc_return(&perf_counter_id);
3596 3816
@@ -3648,11 +3868,13 @@ done:
3648 3868
3649 counter->pmu = pmu; 3869 counter->pmu = pmu;
3650 3870
3651 atomic_inc(&nr_counters); 3871 if (!counter->parent) {
3652 if (counter->attr.mmap) 3872 atomic_inc(&nr_counters);
3653 atomic_inc(&nr_mmap_counters); 3873 if (counter->attr.mmap)
3654 if (counter->attr.comm) 3874 atomic_inc(&nr_mmap_counters);
3655 atomic_inc(&nr_comm_counters); 3875 if (counter->attr.comm)
3876 atomic_inc(&nr_comm_counters);
3877 }
3656 3878
3657 return counter; 3879 return counter;
3658} 3880}
@@ -3815,7 +4037,7 @@ SYSCALL_DEFINE5(perf_counter_open,
3815 } 4037 }
3816 4038
3817 counter = perf_counter_alloc(&attr, cpu, ctx, group_leader, 4039 counter = perf_counter_alloc(&attr, cpu, ctx, group_leader,
3818 GFP_KERNEL); 4040 NULL, GFP_KERNEL);
3819 ret = PTR_ERR(counter); 4041 ret = PTR_ERR(counter);
3820 if (IS_ERR(counter)) 4042 if (IS_ERR(counter))
3821 goto err_put_context; 4043 goto err_put_context;
@@ -3881,7 +4103,8 @@ inherit_counter(struct perf_counter *parent_counter,
3881 4103
3882 child_counter = perf_counter_alloc(&parent_counter->attr, 4104 child_counter = perf_counter_alloc(&parent_counter->attr,
3883 parent_counter->cpu, child_ctx, 4105 parent_counter->cpu, child_ctx,
3884 group_leader, GFP_KERNEL); 4106 group_leader, parent_counter,
4107 GFP_KERNEL);
3885 if (IS_ERR(child_counter)) 4108 if (IS_ERR(child_counter))
3886 return child_counter; 4109 return child_counter;
3887 get_ctx(child_ctx); 4110 get_ctx(child_ctx);
@@ -3904,12 +4127,6 @@ inherit_counter(struct perf_counter *parent_counter,
3904 */ 4127 */
3905 add_counter_to_ctx(child_counter, child_ctx); 4128 add_counter_to_ctx(child_counter, child_ctx);
3906 4129
3907 child_counter->parent = parent_counter;
3908 /*
3909 * inherit into child's child as well:
3910 */
3911 child_counter->attr.inherit = 1;
3912
3913 /* 4130 /*
3914 * Get a reference to the parent filp - we will fput it 4131 * Get a reference to the parent filp - we will fput it
3915 * when the child counter exits. This is safe to do because 4132 * when the child counter exits. This is safe to do because
@@ -3953,10 +4170,14 @@ static int inherit_group(struct perf_counter *parent_counter,
3953} 4170}
3954 4171
3955static void sync_child_counter(struct perf_counter *child_counter, 4172static void sync_child_counter(struct perf_counter *child_counter,
3956 struct perf_counter *parent_counter) 4173 struct task_struct *child)
3957{ 4174{
4175 struct perf_counter *parent_counter = child_counter->parent;
3958 u64 child_val; 4176 u64 child_val;
3959 4177
4178 if (child_counter->attr.inherit_stat)
4179 perf_counter_read_event(child_counter, child);
4180
3960 child_val = atomic64_read(&child_counter->count); 4181 child_val = atomic64_read(&child_counter->count);
3961 4182
3962 /* 4183 /*
@@ -3985,7 +4206,8 @@ static void sync_child_counter(struct perf_counter *child_counter,
3985 4206
3986static void 4207static void
3987__perf_counter_exit_task(struct perf_counter *child_counter, 4208__perf_counter_exit_task(struct perf_counter *child_counter,
3988 struct perf_counter_context *child_ctx) 4209 struct perf_counter_context *child_ctx,
4210 struct task_struct *child)
3989{ 4211{
3990 struct perf_counter *parent_counter; 4212 struct perf_counter *parent_counter;
3991 4213
@@ -3999,7 +4221,7 @@ __perf_counter_exit_task(struct perf_counter *child_counter,
3999 * counters need to be zapped - but otherwise linger. 4221 * counters need to be zapped - but otherwise linger.
4000 */ 4222 */
4001 if (parent_counter) { 4223 if (parent_counter) {
4002 sync_child_counter(child_counter, parent_counter); 4224 sync_child_counter(child_counter, child);
4003 free_counter(child_counter); 4225 free_counter(child_counter);
4004 } 4226 }
4005} 4227}
@@ -4061,7 +4283,7 @@ void perf_counter_exit_task(struct task_struct *child)
4061again: 4283again:
4062 list_for_each_entry_safe(child_counter, tmp, &child_ctx->counter_list, 4284 list_for_each_entry_safe(child_counter, tmp, &child_ctx->counter_list,
4063 list_entry) 4285 list_entry)
4064 __perf_counter_exit_task(child_counter, child_ctx); 4286 __perf_counter_exit_task(child_counter, child_ctx, child);
4065 4287
4066 /* 4288 /*
4067 * If the last counter was a group counter, it will have appended all 4289 * If the last counter was a group counter, it will have appended all
diff --git a/kernel/pid.c b/kernel/pid.c
index 31310b5d3f50..5fa1db48d8b7 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -36,6 +36,7 @@
36#include <linux/pid_namespace.h> 36#include <linux/pid_namespace.h>
37#include <linux/init_task.h> 37#include <linux/init_task.h>
38#include <linux/syscalls.h> 38#include <linux/syscalls.h>
39#include <linux/kmemleak.h>
39 40
40#define pid_hashfn(nr, ns) \ 41#define pid_hashfn(nr, ns) \
41 hash_long((unsigned long)nr + (unsigned long)ns, pidhash_shift) 42 hash_long((unsigned long)nr + (unsigned long)ns, pidhash_shift)
@@ -512,6 +513,12 @@ void __init pidhash_init(void)
512 pid_hash = alloc_bootmem(pidhash_size * sizeof(*(pid_hash))); 513 pid_hash = alloc_bootmem(pidhash_size * sizeof(*(pid_hash)));
513 if (!pid_hash) 514 if (!pid_hash)
514 panic("Could not alloc pidhash!\n"); 515 panic("Could not alloc pidhash!\n");
516 /*
517 * pid_hash contains references to allocated struct pid objects and it
518 * must be scanned by kmemleak to avoid false positives.
519 */
520 kmemleak_alloc(pid_hash, pidhash_size * sizeof(*(pid_hash)), 0,
521 GFP_KERNEL);
515 for (i = 0; i < pidhash_size; i++) 522 for (i = 0; i < pidhash_size; i++)
516 INIT_HLIST_HEAD(&pid_hash[i]); 523 INIT_HLIST_HEAD(&pid_hash[i]);
517} 524}
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 61c78b2c07ba..082c320e4dbf 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -181,8 +181,8 @@ int ptrace_attach(struct task_struct *task)
181 * interference; SUID, SGID and LSM creds get determined differently 181 * interference; SUID, SGID and LSM creds get determined differently
182 * under ptrace. 182 * under ptrace.
183 */ 183 */
184 retval = mutex_lock_interruptible(&task->cred_guard_mutex); 184 retval = -ERESTARTNOINTR;
185 if (retval < 0) 185 if (mutex_lock_interruptible(&task->cred_guard_mutex))
186 goto out; 186 goto out;
187 187
188 task_lock(task); 188 task_lock(task);
diff --git a/kernel/resource.c b/kernel/resource.c
index ac5f3a36923f..78b087221c15 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -787,7 +787,7 @@ static int __init reserve_setup(char *str)
787 static struct resource reserve[MAXRESERVE]; 787 static struct resource reserve[MAXRESERVE];
788 788
789 for (;;) { 789 for (;;) {
790 int io_start, io_num; 790 unsigned int io_start, io_num;
791 int x = reserved; 791 int x = reserved;
792 792
793 if (get_option (&str, &io_start) != 2) 793 if (get_option (&str, &io_start) != 2)
diff --git a/kernel/sched.c b/kernel/sched.c
index 7c9098d186e6..01f55ada3598 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -6541,6 +6541,11 @@ SYSCALL_DEFINE0(sched_yield)
6541 return 0; 6541 return 0;
6542} 6542}
6543 6543
6544static inline int should_resched(void)
6545{
6546 return need_resched() && !(preempt_count() & PREEMPT_ACTIVE);
6547}
6548
6544static void __cond_resched(void) 6549static void __cond_resched(void)
6545{ 6550{
6546#ifdef CONFIG_DEBUG_SPINLOCK_SLEEP 6551#ifdef CONFIG_DEBUG_SPINLOCK_SLEEP
@@ -6560,8 +6565,7 @@ static void __cond_resched(void)
6560 6565
6561int __sched _cond_resched(void) 6566int __sched _cond_resched(void)
6562{ 6567{
6563 if (need_resched() && !(preempt_count() & PREEMPT_ACTIVE) && 6568 if (should_resched()) {
6564 system_state == SYSTEM_RUNNING) {
6565 __cond_resched(); 6569 __cond_resched();
6566 return 1; 6570 return 1;
6567 } 6571 }
@@ -6579,12 +6583,12 @@ EXPORT_SYMBOL(_cond_resched);
6579 */ 6583 */
6580int cond_resched_lock(spinlock_t *lock) 6584int cond_resched_lock(spinlock_t *lock)
6581{ 6585{
6582 int resched = need_resched() && system_state == SYSTEM_RUNNING; 6586 int resched = should_resched();
6583 int ret = 0; 6587 int ret = 0;
6584 6588
6585 if (spin_needbreak(lock) || resched) { 6589 if (spin_needbreak(lock) || resched) {
6586 spin_unlock(lock); 6590 spin_unlock(lock);
6587 if (resched && need_resched()) 6591 if (resched)
6588 __cond_resched(); 6592 __cond_resched();
6589 else 6593 else
6590 cpu_relax(); 6594 cpu_relax();
@@ -6599,7 +6603,7 @@ int __sched cond_resched_softirq(void)
6599{ 6603{
6600 BUG_ON(!in_softirq()); 6604 BUG_ON(!in_softirq());
6601 6605
6602 if (need_resched() && system_state == SYSTEM_RUNNING) { 6606 if (should_resched()) {
6603 local_bh_enable(); 6607 local_bh_enable();
6604 __cond_resched(); 6608 __cond_resched();
6605 local_bh_disable(); 6609 local_bh_disable();
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 62e4ff9968b5..98e02328c67d 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -335,7 +335,10 @@ static struct ctl_table kern_table[] = {
335 .data = &sysctl_timer_migration, 335 .data = &sysctl_timer_migration,
336 .maxlen = sizeof(unsigned int), 336 .maxlen = sizeof(unsigned int),
337 .mode = 0644, 337 .mode = 0644,
338 .proc_handler = &proc_dointvec, 338 .proc_handler = &proc_dointvec_minmax,
339 .strategy = &sysctl_intvec,
340 .extra1 = &zero,
341 .extra2 = &one,
339 }, 342 },
340#endif 343#endif
341 { 344 {
@@ -744,6 +747,14 @@ static struct ctl_table kern_table[] = {
744 .proc_handler = &proc_dointvec, 747 .proc_handler = &proc_dointvec,
745 }, 748 },
746 { 749 {
750 .ctl_name = CTL_UNNUMBERED,
751 .procname = "panic_on_io_nmi",
752 .data = &panic_on_io_nmi,
753 .maxlen = sizeof(int),
754 .mode = 0644,
755 .proc_handler = &proc_dointvec,
756 },
757 {
747 .ctl_name = KERN_BOOTLOADER_TYPE, 758 .ctl_name = KERN_BOOTLOADER_TYPE,
748 .procname = "bootloader_type", 759 .procname = "bootloader_type",
749 .data = &bootloader_type, 760 .data = &bootloader_type,
diff --git a/kernel/time/timer_stats.c b/kernel/time/timer_stats.c
index c994530d166d..4cde8b9c716f 100644
--- a/kernel/time/timer_stats.c
+++ b/kernel/time/timer_stats.c
@@ -96,7 +96,7 @@ static DEFINE_MUTEX(show_mutex);
96/* 96/*
97 * Collection status, active/inactive: 97 * Collection status, active/inactive:
98 */ 98 */
99static int __read_mostly active; 99int __read_mostly timer_stats_active;
100 100
101/* 101/*
102 * Beginning/end timestamps of measurement: 102 * Beginning/end timestamps of measurement:
@@ -242,7 +242,7 @@ void timer_stats_update_stats(void *timer, pid_t pid, void *startf,
242 struct entry *entry, input; 242 struct entry *entry, input;
243 unsigned long flags; 243 unsigned long flags;
244 244
245 if (likely(!active)) 245 if (likely(!timer_stats_active))
246 return; 246 return;
247 247
248 lock = &per_cpu(lookup_lock, raw_smp_processor_id()); 248 lock = &per_cpu(lookup_lock, raw_smp_processor_id());
@@ -254,7 +254,7 @@ void timer_stats_update_stats(void *timer, pid_t pid, void *startf,
254 input.timer_flag = timer_flag; 254 input.timer_flag = timer_flag;
255 255
256 spin_lock_irqsave(lock, flags); 256 spin_lock_irqsave(lock, flags);
257 if (!active) 257 if (!timer_stats_active)
258 goto out_unlock; 258 goto out_unlock;
259 259
260 entry = tstat_lookup(&input, comm); 260 entry = tstat_lookup(&input, comm);
@@ -290,7 +290,7 @@ static int tstats_show(struct seq_file *m, void *v)
290 /* 290 /*
291 * If still active then calculate up to now: 291 * If still active then calculate up to now:
292 */ 292 */
293 if (active) 293 if (timer_stats_active)
294 time_stop = ktime_get(); 294 time_stop = ktime_get();
295 295
296 time = ktime_sub(time_stop, time_start); 296 time = ktime_sub(time_stop, time_start);
@@ -368,18 +368,18 @@ static ssize_t tstats_write(struct file *file, const char __user *buf,
368 mutex_lock(&show_mutex); 368 mutex_lock(&show_mutex);
369 switch (ctl[0]) { 369 switch (ctl[0]) {
370 case '0': 370 case '0':
371 if (active) { 371 if (timer_stats_active) {
372 active = 0; 372 timer_stats_active = 0;
373 time_stop = ktime_get(); 373 time_stop = ktime_get();
374 sync_access(); 374 sync_access();
375 } 375 }
376 break; 376 break;
377 case '1': 377 case '1':
378 if (!active) { 378 if (!timer_stats_active) {
379 reset_entries(); 379 reset_entries();
380 time_start = ktime_get(); 380 time_start = ktime_get();
381 smp_mb(); 381 smp_mb();
382 active = 1; 382 timer_stats_active = 1;
383 } 383 }
384 break; 384 break;
385 default: 385 default:
diff --git a/kernel/timer.c b/kernel/timer.c
index 54d3912f8cad..0b36b9e5cc8b 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -380,6 +380,8 @@ static void timer_stats_account_timer(struct timer_list *timer)
380{ 380{
381 unsigned int flag = 0; 381 unsigned int flag = 0;
382 382
383 if (likely(!timer->start_site))
384 return;
383 if (unlikely(tbase_get_deferrable(timer->base))) 385 if (unlikely(tbase_get_deferrable(timer->base)))
384 flag |= TIMER_STATS_FLAG_DEFERRABLE; 386 flag |= TIMER_STATS_FLAG_DEFERRABLE;
385 387
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 1551f47e7669..019f380fd764 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -226,13 +226,13 @@ config BOOT_TRACER
226 the timings of the initcalls and traces key events and the identity 226 the timings of the initcalls and traces key events and the identity
227 of tasks that can cause boot delays, such as context-switches. 227 of tasks that can cause boot delays, such as context-switches.
228 228
229 Its aim is to be parsed by the /scripts/bootgraph.pl tool to 229 Its aim is to be parsed by the scripts/bootgraph.pl tool to
230 produce pretty graphics about boot inefficiencies, giving a visual 230 produce pretty graphics about boot inefficiencies, giving a visual
231 representation of the delays during initcalls - but the raw 231 representation of the delays during initcalls - but the raw
232 /debug/tracing/trace text output is readable too. 232 /debug/tracing/trace text output is readable too.
233 233
234 You must pass in ftrace=initcall to the kernel command line 234 You must pass in initcall_debug and ftrace=initcall to the kernel
235 to enable this on bootup. 235 command line to enable this on bootup.
236 236
237config TRACE_BRANCH_PROFILING 237config TRACE_BRANCH_PROFILING
238 bool 238 bool
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 3718d55fb4c3..bce9e01a29c8 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -291,7 +291,9 @@ function_stat_next(void *v, int idx)
291 pg = (struct ftrace_profile_page *)((unsigned long)rec & PAGE_MASK); 291 pg = (struct ftrace_profile_page *)((unsigned long)rec & PAGE_MASK);
292 292
293 again: 293 again:
294 rec++; 294 if (idx != 0)
295 rec++;
296
295 if ((void *)rec >= (void *)&pg->records[pg->index]) { 297 if ((void *)rec >= (void *)&pg->records[pg->index]) {
296 pg = pg->next; 298 pg = pg->next;
297 if (!pg) 299 if (!pg)
@@ -1417,10 +1419,20 @@ static void *t_hash_start(struct seq_file *m, loff_t *pos)
1417{ 1419{
1418 struct ftrace_iterator *iter = m->private; 1420 struct ftrace_iterator *iter = m->private;
1419 void *p = NULL; 1421 void *p = NULL;
1422 loff_t l;
1423
1424 if (!(iter->flags & FTRACE_ITER_HASH))
1425 *pos = 0;
1420 1426
1421 iter->flags |= FTRACE_ITER_HASH; 1427 iter->flags |= FTRACE_ITER_HASH;
1422 1428
1423 return t_hash_next(m, p, pos); 1429 iter->hidx = 0;
1430 for (l = 0; l <= *pos; ) {
1431 p = t_hash_next(m, p, &l);
1432 if (!p)
1433 break;
1434 }
1435 return p;
1424} 1436}
1425 1437
1426static int t_hash_show(struct seq_file *m, void *v) 1438static int t_hash_show(struct seq_file *m, void *v)
@@ -1467,8 +1479,6 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
1467 iter->pg = iter->pg->next; 1479 iter->pg = iter->pg->next;
1468 iter->idx = 0; 1480 iter->idx = 0;
1469 goto retry; 1481 goto retry;
1470 } else {
1471 iter->idx = -1;
1472 } 1482 }
1473 } else { 1483 } else {
1474 rec = &iter->pg->records[iter->idx++]; 1484 rec = &iter->pg->records[iter->idx++];
@@ -1497,6 +1507,7 @@ static void *t_start(struct seq_file *m, loff_t *pos)
1497{ 1507{
1498 struct ftrace_iterator *iter = m->private; 1508 struct ftrace_iterator *iter = m->private;
1499 void *p = NULL; 1509 void *p = NULL;
1510 loff_t l;
1500 1511
1501 mutex_lock(&ftrace_lock); 1512 mutex_lock(&ftrace_lock);
1502 /* 1513 /*
@@ -1508,23 +1519,21 @@ static void *t_start(struct seq_file *m, loff_t *pos)
1508 if (*pos > 0) 1519 if (*pos > 0)
1509 return t_hash_start(m, pos); 1520 return t_hash_start(m, pos);
1510 iter->flags |= FTRACE_ITER_PRINTALL; 1521 iter->flags |= FTRACE_ITER_PRINTALL;
1511 (*pos)++;
1512 return iter; 1522 return iter;
1513 } 1523 }
1514 1524
1515 if (iter->flags & FTRACE_ITER_HASH) 1525 if (iter->flags & FTRACE_ITER_HASH)
1516 return t_hash_start(m, pos); 1526 return t_hash_start(m, pos);
1517 1527
1518 if (*pos > 0) { 1528 iter->pg = ftrace_pages_start;
1519 if (iter->idx < 0) 1529 iter->idx = 0;
1520 return p; 1530 for (l = 0; l <= *pos; ) {
1521 (*pos)--; 1531 p = t_next(m, p, &l);
1522 iter->idx--; 1532 if (!p)
1533 break;
1523 } 1534 }
1524 1535
1525 p = t_next(m, p, pos); 1536 if (!p && iter->flags & FTRACE_ITER_FILTER)
1526
1527 if (!p)
1528 return t_hash_start(m, pos); 1537 return t_hash_start(m, pos);
1529 1538
1530 return p; 1539 return p;
@@ -2500,32 +2509,31 @@ int ftrace_graph_count;
2500unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS] __read_mostly; 2509unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS] __read_mostly;
2501 2510
2502static void * 2511static void *
2503g_next(struct seq_file *m, void *v, loff_t *pos) 2512__g_next(struct seq_file *m, loff_t *pos)
2504{ 2513{
2505 unsigned long *array = m->private; 2514 unsigned long *array = m->private;
2506 int index = *pos;
2507 2515
2508 (*pos)++; 2516 if (*pos >= ftrace_graph_count)
2509
2510 if (index >= ftrace_graph_count)
2511 return NULL; 2517 return NULL;
2518 return &array[*pos];
2519}
2512 2520
2513 return &array[index]; 2521static void *
2522g_next(struct seq_file *m, void *v, loff_t *pos)
2523{
2524 (*pos)++;
2525 return __g_next(m, pos);
2514} 2526}
2515 2527
2516static void *g_start(struct seq_file *m, loff_t *pos) 2528static void *g_start(struct seq_file *m, loff_t *pos)
2517{ 2529{
2518 void *p = NULL;
2519
2520 mutex_lock(&graph_lock); 2530 mutex_lock(&graph_lock);
2521 2531
2522 /* Nothing, tell g_show to print all functions are enabled */ 2532 /* Nothing, tell g_show to print all functions are enabled */
2523 if (!ftrace_graph_count && !*pos) 2533 if (!ftrace_graph_count && !*pos)
2524 return (void *)1; 2534 return (void *)1;
2525 2535
2526 p = g_next(m, p, pos); 2536 return __g_next(m, pos);
2527
2528 return p;
2529} 2537}
2530 2538
2531static void g_stop(struct seq_file *m, void *p) 2539static void g_stop(struct seq_file *m, void *p)
@@ -3152,10 +3160,10 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
3152 3160
3153 ret = proc_dointvec(table, write, file, buffer, lenp, ppos); 3161 ret = proc_dointvec(table, write, file, buffer, lenp, ppos);
3154 3162
3155 if (ret || !write || (last_ftrace_enabled == ftrace_enabled)) 3163 if (ret || !write || (last_ftrace_enabled == !!ftrace_enabled))
3156 goto out; 3164 goto out;
3157 3165
3158 last_ftrace_enabled = ftrace_enabled; 3166 last_ftrace_enabled = !!ftrace_enabled;
3159 3167
3160 if (ftrace_enabled) { 3168 if (ftrace_enabled) {
3161 3169
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 04dac2638258..bf27bb7a63e2 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -1563,6 +1563,8 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
1563 return NULL; 1563 return NULL;
1564} 1564}
1565 1565
1566#ifdef CONFIG_TRACING
1567
1566#define TRACE_RECURSIVE_DEPTH 16 1568#define TRACE_RECURSIVE_DEPTH 16
1567 1569
1568static int trace_recursive_lock(void) 1570static int trace_recursive_lock(void)
@@ -1593,6 +1595,13 @@ static void trace_recursive_unlock(void)
1593 current->trace_recursion--; 1595 current->trace_recursion--;
1594} 1596}
1595 1597
1598#else
1599
1600#define trace_recursive_lock() (0)
1601#define trace_recursive_unlock() do { } while (0)
1602
1603#endif
1604
1596static DEFINE_PER_CPU(int, rb_need_resched); 1605static DEFINE_PER_CPU(int, rb_need_resched);
1597 1606
1598/** 1607/**
@@ -3104,6 +3113,7 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
3104} 3113}
3105EXPORT_SYMBOL_GPL(ring_buffer_read_page); 3114EXPORT_SYMBOL_GPL(ring_buffer_read_page);
3106 3115
3116#ifdef CONFIG_TRACING
3107static ssize_t 3117static ssize_t
3108rb_simple_read(struct file *filp, char __user *ubuf, 3118rb_simple_read(struct file *filp, char __user *ubuf,
3109 size_t cnt, loff_t *ppos) 3119 size_t cnt, loff_t *ppos)
@@ -3171,6 +3181,7 @@ static __init int rb_init_debugfs(void)
3171} 3181}
3172 3182
3173fs_initcall(rb_init_debugfs); 3183fs_initcall(rb_init_debugfs);
3184#endif
3174 3185
3175#ifdef CONFIG_HOTPLUG_CPU 3186#ifdef CONFIG_HOTPLUG_CPU
3176static int rb_cpu_notify(struct notifier_block *self, 3187static int rb_cpu_notify(struct notifier_block *self,
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 076fa6f0ee48..3aa0a0dfdfa8 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -284,13 +284,12 @@ void trace_wake_up(void)
284static int __init set_buf_size(char *str) 284static int __init set_buf_size(char *str)
285{ 285{
286 unsigned long buf_size; 286 unsigned long buf_size;
287 int ret;
288 287
289 if (!str) 288 if (!str)
290 return 0; 289 return 0;
291 ret = strict_strtoul(str, 0, &buf_size); 290 buf_size = memparse(str, &str);
292 /* nr_entries can not be zero */ 291 /* nr_entries can not be zero */
293 if (ret < 0 || buf_size == 0) 292 if (buf_size == 0)
294 return 0; 293 return 0;
295 trace_buf_size = buf_size; 294 trace_buf_size = buf_size;
296 return 1; 295 return 1;
@@ -2053,25 +2052,23 @@ static int tracing_open(struct inode *inode, struct file *file)
2053static void * 2052static void *
2054t_next(struct seq_file *m, void *v, loff_t *pos) 2053t_next(struct seq_file *m, void *v, loff_t *pos)
2055{ 2054{
2056 struct tracer *t = m->private; 2055 struct tracer *t = v;
2057 2056
2058 (*pos)++; 2057 (*pos)++;
2059 2058
2060 if (t) 2059 if (t)
2061 t = t->next; 2060 t = t->next;
2062 2061
2063 m->private = t;
2064
2065 return t; 2062 return t;
2066} 2063}
2067 2064
2068static void *t_start(struct seq_file *m, loff_t *pos) 2065static void *t_start(struct seq_file *m, loff_t *pos)
2069{ 2066{
2070 struct tracer *t = m->private; 2067 struct tracer *t;
2071 loff_t l = 0; 2068 loff_t l = 0;
2072 2069
2073 mutex_lock(&trace_types_lock); 2070 mutex_lock(&trace_types_lock);
2074 for (; t && l < *pos; t = t_next(m, t, &l)) 2071 for (t = trace_types; t && l < *pos; t = t_next(m, t, &l))
2075 ; 2072 ;
2076 2073
2077 return t; 2074 return t;
@@ -2107,18 +2104,10 @@ static struct seq_operations show_traces_seq_ops = {
2107 2104
2108static int show_traces_open(struct inode *inode, struct file *file) 2105static int show_traces_open(struct inode *inode, struct file *file)
2109{ 2106{
2110 int ret;
2111
2112 if (tracing_disabled) 2107 if (tracing_disabled)
2113 return -ENODEV; 2108 return -ENODEV;
2114 2109
2115 ret = seq_open(file, &show_traces_seq_ops); 2110 return seq_open(file, &show_traces_seq_ops);
2116 if (!ret) {
2117 struct seq_file *m = file->private_data;
2118 m->private = trace_types;
2119 }
2120
2121 return ret;
2122} 2111}
2123 2112
2124static ssize_t 2113static ssize_t
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 6e735d4771f8..3548ae5cc780 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -597,6 +597,7 @@ print_graph_function(struct trace_iterator *iter)
597 597
598extern struct pid *ftrace_pid_trace; 598extern struct pid *ftrace_pid_trace;
599 599
600#ifdef CONFIG_FUNCTION_TRACER
600static inline int ftrace_trace_task(struct task_struct *task) 601static inline int ftrace_trace_task(struct task_struct *task)
601{ 602{
602 if (!ftrace_pid_trace) 603 if (!ftrace_pid_trace)
@@ -604,6 +605,12 @@ static inline int ftrace_trace_task(struct task_struct *task)
604 605
605 return test_tsk_trace_trace(task); 606 return test_tsk_trace_trace(task);
606} 607}
608#else
609static inline int ftrace_trace_task(struct task_struct *task)
610{
611 return 1;
612}
613#endif
607 614
608/* 615/*
609 * trace_iterator_flags is an enumeration that defines bit 616 * trace_iterator_flags is an enumeration that defines bit
diff --git a/kernel/trace/trace_event_types.h b/kernel/trace/trace_event_types.h
index 5e32e375134d..6db005e12487 100644
--- a/kernel/trace/trace_event_types.h
+++ b/kernel/trace/trace_event_types.h
@@ -26,6 +26,9 @@ TRACE_EVENT_FORMAT(funcgraph_exit, TRACE_GRAPH_RET,
26 ftrace_graph_ret_entry, ignore, 26 ftrace_graph_ret_entry, ignore,
27 TRACE_STRUCT( 27 TRACE_STRUCT(
28 TRACE_FIELD(unsigned long, ret.func, func) 28 TRACE_FIELD(unsigned long, ret.func, func)
29 TRACE_FIELD(unsigned long long, ret.calltime, calltime)
30 TRACE_FIELD(unsigned long long, ret.rettime, rettime)
31 TRACE_FIELD(unsigned long, ret.overrun, overrun)
29 TRACE_FIELD(int, ret.depth, depth) 32 TRACE_FIELD(int, ret.depth, depth)
30 ), 33 ),
31 TP_RAW_FMT("<-- %lx (%d)") 34 TP_RAW_FMT("<-- %lx (%d)")
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index aa08be69a1b6..53c8fd376a88 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -300,10 +300,18 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
300 300
301static void *t_start(struct seq_file *m, loff_t *pos) 301static void *t_start(struct seq_file *m, loff_t *pos)
302{ 302{
303 struct ftrace_event_call *call = NULL;
304 loff_t l;
305
303 mutex_lock(&event_mutex); 306 mutex_lock(&event_mutex);
304 if (*pos == 0) 307
305 m->private = ftrace_events.next; 308 m->private = ftrace_events.next;
306 return t_next(m, NULL, pos); 309 for (l = 0; l <= *pos; ) {
310 call = t_next(m, NULL, &l);
311 if (!call)
312 break;
313 }
314 return call;
307} 315}
308 316
309static void * 317static void *
@@ -332,10 +340,18 @@ s_next(struct seq_file *m, void *v, loff_t *pos)
332 340
333static void *s_start(struct seq_file *m, loff_t *pos) 341static void *s_start(struct seq_file *m, loff_t *pos)
334{ 342{
343 struct ftrace_event_call *call = NULL;
344 loff_t l;
345
335 mutex_lock(&event_mutex); 346 mutex_lock(&event_mutex);
336 if (*pos == 0) 347
337 m->private = ftrace_events.next; 348 m->private = ftrace_events.next;
338 return s_next(m, NULL, pos); 349 for (l = 0; l <= *pos; ) {
350 call = s_next(m, NULL, &l);
351 if (!call)
352 break;
353 }
354 return call;
339} 355}
340 356
341static int t_show(struct seq_file *m, void *v) 357static int t_show(struct seq_file *m, void *v)
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index 90f134764837..7402144bff21 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -302,8 +302,7 @@ ftrace_trace_onoff_print(struct seq_file *m, unsigned long ip,
302 if (count == -1) 302 if (count == -1)
303 seq_printf(m, ":unlimited\n"); 303 seq_printf(m, ":unlimited\n");
304 else 304 else
305 seq_printf(m, ":count=%ld", count); 305 seq_printf(m, ":count=%ld\n", count);
306 seq_putc(m, '\n');
307 306
308 return 0; 307 return 0;
309} 308}
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 7938f3ae93e3..e0c2545622e8 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -27,8 +27,7 @@ void trace_print_seq(struct seq_file *m, struct trace_seq *s)
27{ 27{
28 int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len; 28 int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len;
29 29
30 s->buffer[len] = 0; 30 seq_write(m, s->buffer, len);
31 seq_puts(m, s->buffer);
32 31
33 trace_seq_init(s); 32 trace_seq_init(s);
34} 33}
diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c
index 9bece9687b62..7b6278110827 100644
--- a/kernel/trace/trace_printk.c
+++ b/kernel/trace/trace_printk.c
@@ -155,25 +155,19 @@ int __ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap)
155EXPORT_SYMBOL_GPL(__ftrace_vprintk); 155EXPORT_SYMBOL_GPL(__ftrace_vprintk);
156 156
157static void * 157static void *
158t_next(struct seq_file *m, void *v, loff_t *pos) 158t_start(struct seq_file *m, loff_t *pos)
159{ 159{
160 const char **fmt = m->private; 160 const char **fmt = __start___trace_bprintk_fmt + *pos;
161 const char **next = fmt;
162
163 (*pos)++;
164 161
165 if ((unsigned long)fmt >= (unsigned long)__stop___trace_bprintk_fmt) 162 if ((unsigned long)fmt >= (unsigned long)__stop___trace_bprintk_fmt)
166 return NULL; 163 return NULL;
167
168 next = fmt;
169 m->private = ++next;
170
171 return fmt; 164 return fmt;
172} 165}
173 166
174static void *t_start(struct seq_file *m, loff_t *pos) 167static void *t_next(struct seq_file *m, void * v, loff_t *pos)
175{ 168{
176 return t_next(m, NULL, pos); 169 (*pos)++;
170 return t_start(m, pos);
177} 171}
178 172
179static int t_show(struct seq_file *m, void *v) 173static int t_show(struct seq_file *m, void *v)
@@ -224,15 +218,7 @@ static const struct seq_operations show_format_seq_ops = {
224static int 218static int
225ftrace_formats_open(struct inode *inode, struct file *file) 219ftrace_formats_open(struct inode *inode, struct file *file)
226{ 220{
227 int ret; 221 return seq_open(file, &show_format_seq_ops);
228
229 ret = seq_open(file, &show_format_seq_ops);
230 if (!ret) {
231 struct seq_file *m = file->private_data;
232
233 m->private = __start___trace_bprintk_fmt;
234 }
235 return ret;
236} 222}
237 223
238static const struct file_operations ftrace_formats_fops = { 224static const struct file_operations ftrace_formats_fops = {
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 2d7aebd71dbd..e644af910124 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -326,10 +326,10 @@ stack_trace_sysctl(struct ctl_table *table, int write,
326 ret = proc_dointvec(table, write, file, buffer, lenp, ppos); 326 ret = proc_dointvec(table, write, file, buffer, lenp, ppos);
327 327
328 if (ret || !write || 328 if (ret || !write ||
329 (last_stack_tracer_enabled == stack_tracer_enabled)) 329 (last_stack_tracer_enabled == !!stack_tracer_enabled))
330 goto out; 330 goto out;
331 331
332 last_stack_tracer_enabled = stack_tracer_enabled; 332 last_stack_tracer_enabled = !!stack_tracer_enabled;
333 333
334 if (stack_tracer_enabled) 334 if (stack_tracer_enabled)
335 register_ftrace_function(&trace_ops); 335 register_ftrace_function(&trace_ops);
diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c
index c00643733f4c..e66f5e493342 100644
--- a/kernel/trace/trace_stat.c
+++ b/kernel/trace/trace_stat.c
@@ -199,17 +199,13 @@ static void *stat_seq_start(struct seq_file *s, loff_t *pos)
199 mutex_lock(&session->stat_mutex); 199 mutex_lock(&session->stat_mutex);
200 200
201 /* If we are in the beginning of the file, print the headers */ 201 /* If we are in the beginning of the file, print the headers */
202 if (!*pos && session->ts->stat_headers) { 202 if (!*pos && session->ts->stat_headers)
203 (*pos)++;
204 return SEQ_START_TOKEN; 203 return SEQ_START_TOKEN;
205 }
206 204
207 node = rb_first(&session->stat_root); 205 node = rb_first(&session->stat_root);
208 for (i = 0; node && i < *pos; i++) 206 for (i = 0; node && i < *pos; i++)
209 node = rb_next(node); 207 node = rb_next(node);
210 208
211 (*pos)++;
212
213 return node; 209 return node;
214} 210}
215 211