aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2013-01-23 12:31:01 -0500
committerTejun Heo <tj@kernel.org>2013-01-23 12:31:01 -0500
commitc14afb82ffff5903a701a9fb737ac20f36d1f755 (patch)
tree304dcc7b1d7b9a5f564f7e978228e61ef41fbef2 /kernel
parent0fdff3ec6d87856cdcc99e69cf42143fdd6c56b4 (diff)
parent1d8549085377674224bf30a368284c391a3ce40e (diff)
Merge branch 'master' into for-3.9-async
To receive f56c3196f251012de9b3ebaff55732a9074fdaae ("async: fix __lowest_in_progress()"). Signed-off-by: Tejun Heo <tj@kernel.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile10
-rw-r--r--kernel/async.c30
-rw-r--r--kernel/audit.c40
-rw-r--r--kernel/audit_tree.c36
-rw-r--r--kernel/audit_watch.c6
-rw-r--r--kernel/auditfilter.c1
-rw-r--r--kernel/auditsc.c20
-rw-r--r--kernel/compat.c23
-rw-r--r--kernel/cred.c27
-rw-r--r--kernel/debug/kdb/kdb_main.c2
-rw-r--r--kernel/fork.c20
-rw-r--r--kernel/irq/manage.c2
-rw-r--r--kernel/kcmp.c1
-rw-r--r--kernel/kmod.c6
-rw-r--r--kernel/modsign_certificate.S19
-rw-r--r--kernel/modsign_pubkey.c21
-rw-r--r--kernel/module.c612
-rw-r--r--kernel/pid.c15
-rw-r--r--kernel/pid_namespace.c7
-rw-r--r--kernel/posix-cpu-timers.c3
-rw-r--r--kernel/printk.c5
-rw-r--r--kernel/ptrace.c74
-rw-r--r--kernel/res_counter.c20
-rw-r--r--kernel/rwsem.c10
-rw-r--r--kernel/sched/core.c3
-rw-r--r--kernel/sched/fair.c5
-rw-r--r--kernel/signal.c103
-rw-r--r--kernel/sys_ni.c1
-rw-r--r--kernel/trace/ftrace.c2
-rw-r--r--kernel/trace/trace.c77
-rw-r--r--kernel/trace/trace_stack.c4
-rw-r--r--kernel/user_namespace.c2
-rw-r--r--kernel/utsname.c3
-rw-r--r--kernel/watchdog.c11
34 files changed, 832 insertions, 389 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index ac0d533eb7de..6c072b6da239 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -54,7 +54,7 @@ obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o
54obj-$(CONFIG_PROVE_LOCKING) += spinlock.o 54obj-$(CONFIG_PROVE_LOCKING) += spinlock.o
55obj-$(CONFIG_UID16) += uid16.o 55obj-$(CONFIG_UID16) += uid16.o
56obj-$(CONFIG_MODULES) += module.o 56obj-$(CONFIG_MODULES) += module.o
57obj-$(CONFIG_MODULE_SIG) += module_signing.o modsign_pubkey.o 57obj-$(CONFIG_MODULE_SIG) += module_signing.o modsign_pubkey.o modsign_certificate.o
58obj-$(CONFIG_KALLSYMS) += kallsyms.o 58obj-$(CONFIG_KALLSYMS) += kallsyms.o
59obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o 59obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o
60obj-$(CONFIG_KEXEC) += kexec.o 60obj-$(CONFIG_KEXEC) += kexec.o
@@ -137,10 +137,14 @@ ifeq ($(CONFIG_MODULE_SIG),y)
137# 137#
138# Pull the signing certificate and any extra certificates into the kernel 138# Pull the signing certificate and any extra certificates into the kernel
139# 139#
140
141quiet_cmd_touch = TOUCH $@
142 cmd_touch = touch $@
143
140extra_certificates: 144extra_certificates:
141 touch $@ 145 $(call cmd,touch)
142 146
143kernel/modsign_pubkey.o: signing_key.x509 extra_certificates 147kernel/modsign_certificate.o: signing_key.x509 extra_certificates
144 148
145############################################################################### 149###############################################################################
146# 150#
diff --git a/kernel/async.c b/kernel/async.c
index d9bf2a9b5cee..6c68fc3fae7b 100644
--- a/kernel/async.c
+++ b/kernel/async.c
@@ -88,18 +88,27 @@ static atomic_t entry_count;
88 */ 88 */
89static async_cookie_t __lowest_in_progress(struct async_domain *running) 89static async_cookie_t __lowest_in_progress(struct async_domain *running)
90{ 90{
91 async_cookie_t first_running = next_cookie; /* infinity value */
92 async_cookie_t first_pending = next_cookie; /* ditto */
91 struct async_entry *entry; 93 struct async_entry *entry;
92 94
95 /*
96 * Both running and pending lists are sorted but not disjoint.
97 * Take the first cookies from both and return the min.
98 */
93 if (!list_empty(&running->domain)) { 99 if (!list_empty(&running->domain)) {
94 entry = list_first_entry(&running->domain, typeof(*entry), list); 100 entry = list_first_entry(&running->domain, typeof(*entry), list);
95 return entry->cookie; 101 first_running = entry->cookie;
96 } 102 }
97 103
98 list_for_each_entry(entry, &async_pending, list) 104 list_for_each_entry(entry, &async_pending, list) {
99 if (entry->running == running) 105 if (entry->running == running) {
100 return entry->cookie; 106 first_pending = entry->cookie;
107 break;
108 }
109 }
101 110
102 return next_cookie; /* "infinity" value */ 111 return min(first_running, first_pending);
103} 112}
104 113
105static async_cookie_t lowest_in_progress(struct async_domain *running) 114static async_cookie_t lowest_in_progress(struct async_domain *running)
@@ -120,13 +129,17 @@ static void async_run_entry_fn(struct work_struct *work)
120{ 129{
121 struct async_entry *entry = 130 struct async_entry *entry =
122 container_of(work, struct async_entry, work); 131 container_of(work, struct async_entry, work);
132 struct async_entry *pos;
123 unsigned long flags; 133 unsigned long flags;
124 ktime_t uninitialized_var(calltime), delta, rettime; 134 ktime_t uninitialized_var(calltime), delta, rettime;
125 struct async_domain *running = entry->running; 135 struct async_domain *running = entry->running;
126 136
127 /* 1) move self to the running queue */ 137 /* 1) move self to the running queue, make sure it stays sorted */
128 spin_lock_irqsave(&async_lock, flags); 138 spin_lock_irqsave(&async_lock, flags);
129 list_move_tail(&entry->list, &running->domain); 139 list_for_each_entry_reverse(pos, &running->domain, list)
140 if (entry->cookie < pos->cookie)
141 break;
142 list_move_tail(&entry->list, &pos->list);
130 spin_unlock_irqrestore(&async_lock, flags); 143 spin_unlock_irqrestore(&async_lock, flags);
131 144
132 /* 2) run (and print duration) */ 145 /* 2) run (and print duration) */
@@ -198,6 +211,9 @@ static async_cookie_t __async_schedule(async_func_ptr *ptr, void *data, struct a
198 atomic_inc(&entry_count); 211 atomic_inc(&entry_count);
199 spin_unlock_irqrestore(&async_lock, flags); 212 spin_unlock_irqrestore(&async_lock, flags);
200 213
214 /* mark that this task has queued an async job, used by module init */
215 current->flags |= PF_USED_ASYNC;
216
201 /* schedule for execution */ 217 /* schedule for execution */
202 queue_work(system_unbound_wq, &entry->work); 218 queue_work(system_unbound_wq, &entry->work);
203 219
diff --git a/kernel/audit.c b/kernel/audit.c
index 40414e9143db..d596e5355f15 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -272,6 +272,8 @@ static int audit_log_config_change(char *function_name, int new, int old,
272 int rc = 0; 272 int rc = 0;
273 273
274 ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE); 274 ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE);
275 if (unlikely(!ab))
276 return rc;
275 audit_log_format(ab, "%s=%d old=%d auid=%u ses=%u", function_name, new, 277 audit_log_format(ab, "%s=%d old=%d auid=%u ses=%u", function_name, new,
276 old, from_kuid(&init_user_ns, loginuid), sessionid); 278 old, from_kuid(&init_user_ns, loginuid), sessionid);
277 if (sid) { 279 if (sid) {
@@ -619,6 +621,8 @@ static int audit_log_common_recv_msg(struct audit_buffer **ab, u16 msg_type,
619 } 621 }
620 622
621 *ab = audit_log_start(NULL, GFP_KERNEL, msg_type); 623 *ab = audit_log_start(NULL, GFP_KERNEL, msg_type);
624 if (unlikely(!*ab))
625 return rc;
622 audit_log_format(*ab, "pid=%d uid=%u auid=%u ses=%u", 626 audit_log_format(*ab, "pid=%d uid=%u auid=%u ses=%u",
623 task_tgid_vnr(current), 627 task_tgid_vnr(current),
624 from_kuid(&init_user_ns, current_uid()), 628 from_kuid(&init_user_ns, current_uid()),
@@ -1097,6 +1101,23 @@ static inline void audit_get_stamp(struct audit_context *ctx,
1097 } 1101 }
1098} 1102}
1099 1103
1104/*
1105 * Wait for auditd to drain the queue a little
1106 */
1107static void wait_for_auditd(unsigned long sleep_time)
1108{
1109 DECLARE_WAITQUEUE(wait, current);
1110 set_current_state(TASK_INTERRUPTIBLE);
1111 add_wait_queue(&audit_backlog_wait, &wait);
1112
1113 if (audit_backlog_limit &&
1114 skb_queue_len(&audit_skb_queue) > audit_backlog_limit)
1115 schedule_timeout(sleep_time);
1116
1117 __set_current_state(TASK_RUNNING);
1118 remove_wait_queue(&audit_backlog_wait, &wait);
1119}
1120
1100/* Obtain an audit buffer. This routine does locking to obtain the 1121/* Obtain an audit buffer. This routine does locking to obtain the
1101 * audit buffer, but then no locking is required for calls to 1122 * audit buffer, but then no locking is required for calls to
1102 * audit_log_*format. If the tsk is a task that is currently in a 1123 * audit_log_*format. If the tsk is a task that is currently in a
@@ -1142,20 +1163,13 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask,
1142 1163
1143 while (audit_backlog_limit 1164 while (audit_backlog_limit
1144 && skb_queue_len(&audit_skb_queue) > audit_backlog_limit + reserve) { 1165 && skb_queue_len(&audit_skb_queue) > audit_backlog_limit + reserve) {
1145 if (gfp_mask & __GFP_WAIT && audit_backlog_wait_time 1166 if (gfp_mask & __GFP_WAIT && audit_backlog_wait_time) {
1146 && time_before(jiffies, timeout_start + audit_backlog_wait_time)) { 1167 unsigned long sleep_time;
1147 1168
1148 /* Wait for auditd to drain the queue a little */ 1169 sleep_time = timeout_start + audit_backlog_wait_time -
1149 DECLARE_WAITQUEUE(wait, current); 1170 jiffies;
1150 set_current_state(TASK_INTERRUPTIBLE); 1171 if ((long)sleep_time > 0)
1151 add_wait_queue(&audit_backlog_wait, &wait); 1172 wait_for_auditd(sleep_time);
1152
1153 if (audit_backlog_limit &&
1154 skb_queue_len(&audit_skb_queue) > audit_backlog_limit)
1155 schedule_timeout(timeout_start + audit_backlog_wait_time - jiffies);
1156
1157 __set_current_state(TASK_RUNNING);
1158 remove_wait_queue(&audit_backlog_wait, &wait);
1159 continue; 1173 continue;
1160 } 1174 }
1161 if (audit_rate_check() && printk_ratelimit()) 1175 if (audit_rate_check() && printk_ratelimit())
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index ed206fd88cca..642a89c4f3d6 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -249,7 +249,7 @@ static void untag_chunk(struct node *p)
249 list_del_rcu(&chunk->hash); 249 list_del_rcu(&chunk->hash);
250 spin_unlock(&hash_lock); 250 spin_unlock(&hash_lock);
251 spin_unlock(&entry->lock); 251 spin_unlock(&entry->lock);
252 fsnotify_destroy_mark(entry); 252 fsnotify_destroy_mark(entry, audit_tree_group);
253 goto out; 253 goto out;
254 } 254 }
255 255
@@ -291,7 +291,7 @@ static void untag_chunk(struct node *p)
291 owner->root = new; 291 owner->root = new;
292 spin_unlock(&hash_lock); 292 spin_unlock(&hash_lock);
293 spin_unlock(&entry->lock); 293 spin_unlock(&entry->lock);
294 fsnotify_destroy_mark(entry); 294 fsnotify_destroy_mark(entry, audit_tree_group);
295 fsnotify_put_mark(&new->mark); /* drop initial reference */ 295 fsnotify_put_mark(&new->mark); /* drop initial reference */
296 goto out; 296 goto out;
297 297
@@ -331,7 +331,7 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree)
331 spin_unlock(&hash_lock); 331 spin_unlock(&hash_lock);
332 chunk->dead = 1; 332 chunk->dead = 1;
333 spin_unlock(&entry->lock); 333 spin_unlock(&entry->lock);
334 fsnotify_destroy_mark(entry); 334 fsnotify_destroy_mark(entry, audit_tree_group);
335 fsnotify_put_mark(entry); 335 fsnotify_put_mark(entry);
336 return 0; 336 return 0;
337 } 337 }
@@ -412,7 +412,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
412 spin_unlock(&chunk_entry->lock); 412 spin_unlock(&chunk_entry->lock);
413 spin_unlock(&old_entry->lock); 413 spin_unlock(&old_entry->lock);
414 414
415 fsnotify_destroy_mark(chunk_entry); 415 fsnotify_destroy_mark(chunk_entry, audit_tree_group);
416 416
417 fsnotify_put_mark(chunk_entry); 417 fsnotify_put_mark(chunk_entry);
418 fsnotify_put_mark(old_entry); 418 fsnotify_put_mark(old_entry);
@@ -443,17 +443,32 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
443 spin_unlock(&hash_lock); 443 spin_unlock(&hash_lock);
444 spin_unlock(&chunk_entry->lock); 444 spin_unlock(&chunk_entry->lock);
445 spin_unlock(&old_entry->lock); 445 spin_unlock(&old_entry->lock);
446 fsnotify_destroy_mark(old_entry); 446 fsnotify_destroy_mark(old_entry, audit_tree_group);
447 fsnotify_put_mark(chunk_entry); /* drop initial reference */ 447 fsnotify_put_mark(chunk_entry); /* drop initial reference */
448 fsnotify_put_mark(old_entry); /* pair to fsnotify_find mark_entry */ 448 fsnotify_put_mark(old_entry); /* pair to fsnotify_find mark_entry */
449 return 0; 449 return 0;
450} 450}
451 451
452static void audit_log_remove_rule(struct audit_krule *rule)
453{
454 struct audit_buffer *ab;
455
456 ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE);
457 if (unlikely(!ab))
458 return;
459 audit_log_format(ab, "op=");
460 audit_log_string(ab, "remove rule");
461 audit_log_format(ab, " dir=");
462 audit_log_untrustedstring(ab, rule->tree->pathname);
463 audit_log_key(ab, rule->filterkey);
464 audit_log_format(ab, " list=%d res=1", rule->listnr);
465 audit_log_end(ab);
466}
467
452static void kill_rules(struct audit_tree *tree) 468static void kill_rules(struct audit_tree *tree)
453{ 469{
454 struct audit_krule *rule, *next; 470 struct audit_krule *rule, *next;
455 struct audit_entry *entry; 471 struct audit_entry *entry;
456 struct audit_buffer *ab;
457 472
458 list_for_each_entry_safe(rule, next, &tree->rules, rlist) { 473 list_for_each_entry_safe(rule, next, &tree->rules, rlist) {
459 entry = container_of(rule, struct audit_entry, rule); 474 entry = container_of(rule, struct audit_entry, rule);
@@ -461,14 +476,7 @@ static void kill_rules(struct audit_tree *tree)
461 list_del_init(&rule->rlist); 476 list_del_init(&rule->rlist);
462 if (rule->tree) { 477 if (rule->tree) {
463 /* not a half-baked one */ 478 /* not a half-baked one */
464 ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE); 479 audit_log_remove_rule(rule);
465 audit_log_format(ab, "op=");
466 audit_log_string(ab, "remove rule");
467 audit_log_format(ab, " dir=");
468 audit_log_untrustedstring(ab, rule->tree->pathname);
469 audit_log_key(ab, rule->filterkey);
470 audit_log_format(ab, " list=%d res=1", rule->listnr);
471 audit_log_end(ab);
472 rule->tree = NULL; 480 rule->tree = NULL;
473 list_del_rcu(&entry->list); 481 list_del_rcu(&entry->list);
474 list_del(&entry->rule.list); 482 list_del(&entry->rule.list);
diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c
index 9a9ae6e3d290..22831c4d369c 100644
--- a/kernel/audit_watch.c
+++ b/kernel/audit_watch.c
@@ -240,6 +240,8 @@ static void audit_watch_log_rule_change(struct audit_krule *r, struct audit_watc
240 if (audit_enabled) { 240 if (audit_enabled) {
241 struct audit_buffer *ab; 241 struct audit_buffer *ab;
242 ab = audit_log_start(NULL, GFP_NOFS, AUDIT_CONFIG_CHANGE); 242 ab = audit_log_start(NULL, GFP_NOFS, AUDIT_CONFIG_CHANGE);
243 if (unlikely(!ab))
244 return;
243 audit_log_format(ab, "auid=%u ses=%u op=", 245 audit_log_format(ab, "auid=%u ses=%u op=",
244 from_kuid(&init_user_ns, audit_get_loginuid(current)), 246 from_kuid(&init_user_ns, audit_get_loginuid(current)),
245 audit_get_sessionid(current)); 247 audit_get_sessionid(current));
@@ -350,7 +352,7 @@ static void audit_remove_parent_watches(struct audit_parent *parent)
350 } 352 }
351 mutex_unlock(&audit_filter_mutex); 353 mutex_unlock(&audit_filter_mutex);
352 354
353 fsnotify_destroy_mark(&parent->mark); 355 fsnotify_destroy_mark(&parent->mark, audit_watch_group);
354} 356}
355 357
356/* Get path information necessary for adding watches. */ 358/* Get path information necessary for adding watches. */
@@ -457,7 +459,7 @@ void audit_remove_watch_rule(struct audit_krule *krule)
457 459
458 if (list_empty(&parent->watches)) { 460 if (list_empty(&parent->watches)) {
459 audit_get_parent(parent); 461 audit_get_parent(parent);
460 fsnotify_destroy_mark(&parent->mark); 462 fsnotify_destroy_mark(&parent->mark, audit_watch_group);
461 audit_put_parent(parent); 463 audit_put_parent(parent);
462 } 464 }
463 } 465 }
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index 7f19f23d38a3..f9fc54bbe06f 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -1144,7 +1144,6 @@ static void audit_log_rule_change(kuid_t loginuid, u32 sessionid, u32 sid,
1144 * audit_receive_filter - apply all rules to the specified message type 1144 * audit_receive_filter - apply all rules to the specified message type
1145 * @type: audit message type 1145 * @type: audit message type
1146 * @pid: target pid for netlink audit messages 1146 * @pid: target pid for netlink audit messages
1147 * @uid: target uid for netlink audit messages
1148 * @seq: netlink audit message sequence (serial) number 1147 * @seq: netlink audit message sequence (serial) number
1149 * @data: payload data 1148 * @data: payload data
1150 * @datasz: size of payload data 1149 * @datasz: size of payload data
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index e37e6a12c5e3..a371f857a0a9 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -1464,14 +1464,14 @@ static void show_special(struct audit_context *context, int *call_panic)
1464 audit_log_end(ab); 1464 audit_log_end(ab);
1465 ab = audit_log_start(context, GFP_KERNEL, 1465 ab = audit_log_start(context, GFP_KERNEL,
1466 AUDIT_IPC_SET_PERM); 1466 AUDIT_IPC_SET_PERM);
1467 if (unlikely(!ab))
1468 return;
1467 audit_log_format(ab, 1469 audit_log_format(ab,
1468 "qbytes=%lx ouid=%u ogid=%u mode=%#ho", 1470 "qbytes=%lx ouid=%u ogid=%u mode=%#ho",
1469 context->ipc.qbytes, 1471 context->ipc.qbytes,
1470 context->ipc.perm_uid, 1472 context->ipc.perm_uid,
1471 context->ipc.perm_gid, 1473 context->ipc.perm_gid,
1472 context->ipc.perm_mode); 1474 context->ipc.perm_mode);
1473 if (!ab)
1474 return;
1475 } 1475 }
1476 break; } 1476 break; }
1477 case AUDIT_MQ_OPEN: { 1477 case AUDIT_MQ_OPEN: {
@@ -2675,7 +2675,7 @@ void __audit_mmap_fd(int fd, int flags)
2675 context->type = AUDIT_MMAP; 2675 context->type = AUDIT_MMAP;
2676} 2676}
2677 2677
2678static void audit_log_abend(struct audit_buffer *ab, char *reason, long signr) 2678static void audit_log_task(struct audit_buffer *ab)
2679{ 2679{
2680 kuid_t auid, uid; 2680 kuid_t auid, uid;
2681 kgid_t gid; 2681 kgid_t gid;
@@ -2693,6 +2693,11 @@ static void audit_log_abend(struct audit_buffer *ab, char *reason, long signr)
2693 audit_log_task_context(ab); 2693 audit_log_task_context(ab);
2694 audit_log_format(ab, " pid=%d comm=", current->pid); 2694 audit_log_format(ab, " pid=%d comm=", current->pid);
2695 audit_log_untrustedstring(ab, current->comm); 2695 audit_log_untrustedstring(ab, current->comm);
2696}
2697
2698static void audit_log_abend(struct audit_buffer *ab, char *reason, long signr)
2699{
2700 audit_log_task(ab);
2696 audit_log_format(ab, " reason="); 2701 audit_log_format(ab, " reason=");
2697 audit_log_string(ab, reason); 2702 audit_log_string(ab, reason);
2698 audit_log_format(ab, " sig=%ld", signr); 2703 audit_log_format(ab, " sig=%ld", signr);
@@ -2715,6 +2720,8 @@ void audit_core_dumps(long signr)
2715 return; 2720 return;
2716 2721
2717 ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_ANOM_ABEND); 2722 ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_ANOM_ABEND);
2723 if (unlikely(!ab))
2724 return;
2718 audit_log_abend(ab, "memory violation", signr); 2725 audit_log_abend(ab, "memory violation", signr);
2719 audit_log_end(ab); 2726 audit_log_end(ab);
2720} 2727}
@@ -2723,8 +2730,11 @@ void __audit_seccomp(unsigned long syscall, long signr, int code)
2723{ 2730{
2724 struct audit_buffer *ab; 2731 struct audit_buffer *ab;
2725 2732
2726 ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_ANOM_ABEND); 2733 ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_SECCOMP);
2727 audit_log_abend(ab, "seccomp", signr); 2734 if (unlikely(!ab))
2735 return;
2736 audit_log_task(ab);
2737 audit_log_format(ab, " sig=%ld", signr);
2728 audit_log_format(ab, " syscall=%ld", syscall); 2738 audit_log_format(ab, " syscall=%ld", syscall);
2729 audit_log_format(ab, " compat=%d", is_compat_task()); 2739 audit_log_format(ab, " compat=%d", is_compat_task());
2730 audit_log_format(ab, " ip=0x%lx", KSTK_EIP(current)); 2740 audit_log_format(ab, " ip=0x%lx", KSTK_EIP(current));
diff --git a/kernel/compat.c b/kernel/compat.c
index f6150e92dfc9..36700e9e2be9 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -535,9 +535,11 @@ asmlinkage long compat_sys_getrusage(int who, struct compat_rusage __user *ru)
535 return 0; 535 return 0;
536} 536}
537 537
538asmlinkage long 538COMPAT_SYSCALL_DEFINE4(wait4,
539compat_sys_wait4(compat_pid_t pid, compat_uint_t __user *stat_addr, int options, 539 compat_pid_t, pid,
540 struct compat_rusage __user *ru) 540 compat_uint_t __user *, stat_addr,
541 int, options,
542 struct compat_rusage __user *, ru)
541{ 543{
542 if (!ru) { 544 if (!ru) {
543 return sys_wait4(pid, stat_addr, options, NULL); 545 return sys_wait4(pid, stat_addr, options, NULL);
@@ -564,9 +566,10 @@ compat_sys_wait4(compat_pid_t pid, compat_uint_t __user *stat_addr, int options,
564 } 566 }
565} 567}
566 568
567asmlinkage long compat_sys_waitid(int which, compat_pid_t pid, 569COMPAT_SYSCALL_DEFINE5(waitid,
568 struct compat_siginfo __user *uinfo, int options, 570 int, which, compat_pid_t, pid,
569 struct compat_rusage __user *uru) 571 struct compat_siginfo __user *, uinfo, int, options,
572 struct compat_rusage __user *, uru)
570{ 573{
571 siginfo_t info; 574 siginfo_t info;
572 struct rusage ru; 575 struct rusage ru;
@@ -584,7 +587,11 @@ asmlinkage long compat_sys_waitid(int which, compat_pid_t pid,
584 return ret; 587 return ret;
585 588
586 if (uru) { 589 if (uru) {
587 ret = put_compat_rusage(&ru, uru); 590 /* sys_waitid() overwrites everything in ru */
591 if (COMPAT_USE_64BIT_TIME)
592 ret = copy_to_user(uru, &ru, sizeof(ru));
593 else
594 ret = put_compat_rusage(&ru, uru);
588 if (ret) 595 if (ret)
589 return ret; 596 return ret;
590 } 597 }
@@ -994,7 +1001,7 @@ compat_sys_rt_sigtimedwait (compat_sigset_t __user *uthese,
994 sigset_from_compat(&s, &s32); 1001 sigset_from_compat(&s, &s32);
995 1002
996 if (uts) { 1003 if (uts) {
997 if (get_compat_timespec(&t, uts)) 1004 if (compat_get_timespec(&t, uts))
998 return -EFAULT; 1005 return -EFAULT;
999 } 1006 }
1000 1007
diff --git a/kernel/cred.c b/kernel/cred.c
index 8888afb846e9..e0573a43c7df 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -372,6 +372,31 @@ error_put:
372 return ret; 372 return ret;
373} 373}
374 374
375static bool cred_cap_issubset(const struct cred *set, const struct cred *subset)
376{
377 const struct user_namespace *set_ns = set->user_ns;
378 const struct user_namespace *subset_ns = subset->user_ns;
379
380 /* If the two credentials are in the same user namespace see if
381 * the capabilities of subset are a subset of set.
382 */
383 if (set_ns == subset_ns)
384 return cap_issubset(subset->cap_permitted, set->cap_permitted);
385
386 /* The credentials are in a different user namespaces
387 * therefore one is a subset of the other only if a set is an
388 * ancestor of subset and set->euid is owner of subset or one
389 * of subsets ancestors.
390 */
391 for (;subset_ns != &init_user_ns; subset_ns = subset_ns->parent) {
392 if ((set_ns == subset_ns->parent) &&
393 uid_eq(subset_ns->owner, set->euid))
394 return true;
395 }
396
397 return false;
398}
399
375/** 400/**
376 * commit_creds - Install new credentials upon the current task 401 * commit_creds - Install new credentials upon the current task
377 * @new: The credentials to be assigned 402 * @new: The credentials to be assigned
@@ -410,7 +435,7 @@ int commit_creds(struct cred *new)
410 !gid_eq(old->egid, new->egid) || 435 !gid_eq(old->egid, new->egid) ||
411 !uid_eq(old->fsuid, new->fsuid) || 436 !uid_eq(old->fsuid, new->fsuid) ||
412 !gid_eq(old->fsgid, new->fsgid) || 437 !gid_eq(old->fsgid, new->fsgid) ||
413 !cap_issubset(new->cap_permitted, old->cap_permitted)) { 438 !cred_cap_issubset(old, new)) {
414 if (task->mm) 439 if (task->mm)
415 set_dumpable(task->mm, suid_dumpable); 440 set_dumpable(task->mm, suid_dumpable);
416 task->pdeath_signal = 0; 441 task->pdeath_signal = 0;
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index 4d5f8d5612f3..8875254120b6 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -1970,6 +1970,8 @@ static int kdb_lsmod(int argc, const char **argv)
1970 1970
1971 kdb_printf("Module Size modstruct Used by\n"); 1971 kdb_printf("Module Size modstruct Used by\n");
1972 list_for_each_entry(mod, kdb_modules, list) { 1972 list_for_each_entry(mod, kdb_modules, list) {
1973 if (mod->state == MODULE_STATE_UNFORMED)
1974 continue;
1973 1975
1974 kdb_printf("%-20s%8u 0x%p ", mod->name, 1976 kdb_printf("%-20s%8u 0x%p ", mod->name,
1975 mod->core_size, (void *)mod); 1977 mod->core_size, (void *)mod);
diff --git a/kernel/fork.c b/kernel/fork.c
index c36c4e301efe..c535f33bbb9c 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -146,7 +146,7 @@ void __weak arch_release_thread_info(struct thread_info *ti)
146static struct thread_info *alloc_thread_info_node(struct task_struct *tsk, 146static struct thread_info *alloc_thread_info_node(struct task_struct *tsk,
147 int node) 147 int node)
148{ 148{
149 struct page *page = alloc_pages_node(node, THREADINFO_GFP, 149 struct page *page = alloc_pages_node(node, THREADINFO_GFP_ACCOUNTED,
150 THREAD_SIZE_ORDER); 150 THREAD_SIZE_ORDER);
151 151
152 return page ? page_address(page) : NULL; 152 return page ? page_address(page) : NULL;
@@ -154,7 +154,7 @@ static struct thread_info *alloc_thread_info_node(struct task_struct *tsk,
154 154
155static inline void free_thread_info(struct thread_info *ti) 155static inline void free_thread_info(struct thread_info *ti)
156{ 156{
157 free_pages((unsigned long)ti, THREAD_SIZE_ORDER); 157 free_memcg_kmem_pages((unsigned long)ti, THREAD_SIZE_ORDER);
158} 158}
159# else 159# else
160static struct kmem_cache *thread_info_cache; 160static struct kmem_cache *thread_info_cache;
@@ -1166,6 +1166,14 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1166 current->signal->flags & SIGNAL_UNKILLABLE) 1166 current->signal->flags & SIGNAL_UNKILLABLE)
1167 return ERR_PTR(-EINVAL); 1167 return ERR_PTR(-EINVAL);
1168 1168
1169 /*
1170 * If the new process will be in a different pid namespace
1171 * don't allow the creation of threads.
1172 */
1173 if ((clone_flags & (CLONE_VM|CLONE_NEWPID)) &&
1174 (task_active_pid_ns(current) != current->nsproxy->pid_ns))
1175 return ERR_PTR(-EINVAL);
1176
1169 retval = security_task_create(clone_flags); 1177 retval = security_task_create(clone_flags);
1170 if (retval) 1178 if (retval)
1171 goto fork_out; 1179 goto fork_out;
@@ -1613,7 +1621,6 @@ long do_fork(unsigned long clone_flags,
1613 return nr; 1621 return nr;
1614} 1622}
1615 1623
1616#ifdef CONFIG_GENERIC_KERNEL_THREAD
1617/* 1624/*
1618 * Create a kernel thread. 1625 * Create a kernel thread.
1619 */ 1626 */
@@ -1622,7 +1629,6 @@ pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
1622 return do_fork(flags|CLONE_VM|CLONE_UNTRACED, (unsigned long)fn, 1629 return do_fork(flags|CLONE_VM|CLONE_UNTRACED, (unsigned long)fn,
1623 (unsigned long)arg, NULL, NULL); 1630 (unsigned long)arg, NULL, NULL);
1624} 1631}
1625#endif
1626 1632
1627#ifdef __ARCH_WANT_SYS_FORK 1633#ifdef __ARCH_WANT_SYS_FORK
1628SYSCALL_DEFINE0(fork) 1634SYSCALL_DEFINE0(fork)
@@ -1662,8 +1668,10 @@ SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp,
1662 int, tls_val) 1668 int, tls_val)
1663#endif 1669#endif
1664{ 1670{
1665 return do_fork(clone_flags, newsp, 0, 1671 long ret = do_fork(clone_flags, newsp, 0, parent_tidptr, child_tidptr);
1666 parent_tidptr, child_tidptr); 1672 asmlinkage_protect(5, ret, clone_flags, newsp,
1673 parent_tidptr, child_tidptr, tls_val);
1674 return ret;
1667} 1675}
1668#endif 1676#endif
1669 1677
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 35c70c9e24d8..e49a288fa479 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -818,7 +818,7 @@ static void irq_thread_dtor(struct callback_head *unused)
818 action = kthread_data(tsk); 818 action = kthread_data(tsk);
819 819
820 pr_err("exiting task \"%s\" (%d) is an active IRQ thread (irq %d)\n", 820 pr_err("exiting task \"%s\" (%d) is an active IRQ thread (irq %d)\n",
821 tsk->comm ? tsk->comm : "", tsk->pid, action->irq); 821 tsk->comm, tsk->pid, action->irq);
822 822
823 823
824 desc = irq_to_desc(action->irq); 824 desc = irq_to_desc(action->irq);
diff --git a/kernel/kcmp.c b/kernel/kcmp.c
index 30b7b225306c..e30ac0fe61c3 100644
--- a/kernel/kcmp.c
+++ b/kernel/kcmp.c
@@ -4,6 +4,7 @@
4#include <linux/string.h> 4#include <linux/string.h>
5#include <linux/random.h> 5#include <linux/random.h>
6#include <linux/module.h> 6#include <linux/module.h>
7#include <linux/ptrace.h>
7#include <linux/init.h> 8#include <linux/init.h>
8#include <linux/errno.h> 9#include <linux/errno.h>
9#include <linux/cache.h> 10#include <linux/cache.h>
diff --git a/kernel/kmod.c b/kernel/kmod.c
index ecd42b484db8..56dd34976d7b 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -228,9 +228,9 @@ static int ____call_usermodehelper(void *data)
228 228
229 commit_creds(new); 229 commit_creds(new);
230 230
231 retval = kernel_execve(sub_info->path, 231 retval = do_execve(sub_info->path,
232 (const char *const *)sub_info->argv, 232 (const char __user *const __user *)sub_info->argv,
233 (const char *const *)sub_info->envp); 233 (const char __user *const __user *)sub_info->envp);
234 if (!retval) 234 if (!retval)
235 return 0; 235 return 0;
236 236
diff --git a/kernel/modsign_certificate.S b/kernel/modsign_certificate.S
new file mode 100644
index 000000000000..246b4c6e6135
--- /dev/null
+++ b/kernel/modsign_certificate.S
@@ -0,0 +1,19 @@
1/* SYMBOL_PREFIX defined on commandline from CONFIG_SYMBOL_PREFIX */
2#ifndef SYMBOL_PREFIX
3#define ASM_SYMBOL(sym) sym
4#else
5#define PASTE2(x,y) x##y
6#define PASTE(x,y) PASTE2(x,y)
7#define ASM_SYMBOL(sym) PASTE(SYMBOL_PREFIX, sym)
8#endif
9
10#define GLOBAL(name) \
11 .globl ASM_SYMBOL(name); \
12 ASM_SYMBOL(name):
13
14 .section ".init.data","aw"
15
16GLOBAL(modsign_certificate_list)
17 .incbin "signing_key.x509"
18 .incbin "extra_certificates"
19GLOBAL(modsign_certificate_list_end)
diff --git a/kernel/modsign_pubkey.c b/kernel/modsign_pubkey.c
index 767e559dfb10..2b6e69909c39 100644
--- a/kernel/modsign_pubkey.c
+++ b/kernel/modsign_pubkey.c
@@ -20,12 +20,6 @@ struct key *modsign_keyring;
20 20
21extern __initdata const u8 modsign_certificate_list[]; 21extern __initdata const u8 modsign_certificate_list[];
22extern __initdata const u8 modsign_certificate_list_end[]; 22extern __initdata const u8 modsign_certificate_list_end[];
23asm(".section .init.data,\"aw\"\n"
24 SYMBOL_PREFIX "modsign_certificate_list:\n"
25 ".incbin \"signing_key.x509\"\n"
26 ".incbin \"extra_certificates\"\n"
27 SYMBOL_PREFIX "modsign_certificate_list_end:"
28 );
29 23
30/* 24/*
31 * We need to make sure ccache doesn't cache the .o file as it doesn't notice 25 * We need to make sure ccache doesn't cache the .o file as it doesn't notice
@@ -40,18 +34,15 @@ static __init int module_verify_init(void)
40{ 34{
41 pr_notice("Initialise module verification\n"); 35 pr_notice("Initialise module verification\n");
42 36
43 modsign_keyring = key_alloc(&key_type_keyring, ".module_sign", 37 modsign_keyring = keyring_alloc(".module_sign",
44 KUIDT_INIT(0), KGIDT_INIT(0), 38 KUIDT_INIT(0), KGIDT_INIT(0),
45 current_cred(), 39 current_cred(),
46 (KEY_POS_ALL & ~KEY_POS_SETATTR) | 40 ((KEY_POS_ALL & ~KEY_POS_SETATTR) |
47 KEY_USR_VIEW | KEY_USR_READ, 41 KEY_USR_VIEW | KEY_USR_READ),
48 KEY_ALLOC_NOT_IN_QUOTA); 42 KEY_ALLOC_NOT_IN_QUOTA, NULL);
49 if (IS_ERR(modsign_keyring)) 43 if (IS_ERR(modsign_keyring))
50 panic("Can't allocate module signing keyring\n"); 44 panic("Can't allocate module signing keyring\n");
51 45
52 if (key_instantiate_and_link(modsign_keyring, NULL, 0, NULL, NULL) < 0)
53 panic("Can't instantiate module signing keyring\n");
54
55 return 0; 46 return 0;
56} 47}
57 48
diff --git a/kernel/module.c b/kernel/module.c
index 808bd62e1723..eab08274ec9b 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -21,6 +21,7 @@
21#include <linux/ftrace_event.h> 21#include <linux/ftrace_event.h>
22#include <linux/init.h> 22#include <linux/init.h>
23#include <linux/kallsyms.h> 23#include <linux/kallsyms.h>
24#include <linux/file.h>
24#include <linux/fs.h> 25#include <linux/fs.h>
25#include <linux/sysfs.h> 26#include <linux/sysfs.h>
26#include <linux/kernel.h> 27#include <linux/kernel.h>
@@ -28,6 +29,7 @@
28#include <linux/vmalloc.h> 29#include <linux/vmalloc.h>
29#include <linux/elf.h> 30#include <linux/elf.h>
30#include <linux/proc_fs.h> 31#include <linux/proc_fs.h>
32#include <linux/security.h>
31#include <linux/seq_file.h> 33#include <linux/seq_file.h>
32#include <linux/syscalls.h> 34#include <linux/syscalls.h>
33#include <linux/fcntl.h> 35#include <linux/fcntl.h>
@@ -59,6 +61,7 @@
59#include <linux/pfn.h> 61#include <linux/pfn.h>
60#include <linux/bsearch.h> 62#include <linux/bsearch.h>
61#include <linux/fips.h> 63#include <linux/fips.h>
64#include <uapi/linux/module.h>
62#include "module-internal.h" 65#include "module-internal.h"
63 66
64#define CREATE_TRACE_POINTS 67#define CREATE_TRACE_POINTS
@@ -185,6 +188,7 @@ struct load_info {
185 ongoing or failed initialization etc. */ 188 ongoing or failed initialization etc. */
186static inline int strong_try_module_get(struct module *mod) 189static inline int strong_try_module_get(struct module *mod)
187{ 190{
191 BUG_ON(mod && mod->state == MODULE_STATE_UNFORMED);
188 if (mod && mod->state == MODULE_STATE_COMING) 192 if (mod && mod->state == MODULE_STATE_COMING)
189 return -EBUSY; 193 return -EBUSY;
190 if (try_module_get(mod)) 194 if (try_module_get(mod))
@@ -340,6 +344,9 @@ bool each_symbol_section(bool (*fn)(const struct symsearch *arr,
340#endif 344#endif
341 }; 345 };
342 346
347 if (mod->state == MODULE_STATE_UNFORMED)
348 continue;
349
343 if (each_symbol_in_section(arr, ARRAY_SIZE(arr), mod, fn, data)) 350 if (each_symbol_in_section(arr, ARRAY_SIZE(arr), mod, fn, data))
344 return true; 351 return true;
345 } 352 }
@@ -447,16 +454,24 @@ const struct kernel_symbol *find_symbol(const char *name,
447EXPORT_SYMBOL_GPL(find_symbol); 454EXPORT_SYMBOL_GPL(find_symbol);
448 455
449/* Search for module by name: must hold module_mutex. */ 456/* Search for module by name: must hold module_mutex. */
450struct module *find_module(const char *name) 457static struct module *find_module_all(const char *name,
458 bool even_unformed)
451{ 459{
452 struct module *mod; 460 struct module *mod;
453 461
454 list_for_each_entry(mod, &modules, list) { 462 list_for_each_entry(mod, &modules, list) {
463 if (!even_unformed && mod->state == MODULE_STATE_UNFORMED)
464 continue;
455 if (strcmp(mod->name, name) == 0) 465 if (strcmp(mod->name, name) == 0)
456 return mod; 466 return mod;
457 } 467 }
458 return NULL; 468 return NULL;
459} 469}
470
471struct module *find_module(const char *name)
472{
473 return find_module_all(name, false);
474}
460EXPORT_SYMBOL_GPL(find_module); 475EXPORT_SYMBOL_GPL(find_module);
461 476
462#ifdef CONFIG_SMP 477#ifdef CONFIG_SMP
@@ -522,6 +537,8 @@ bool is_module_percpu_address(unsigned long addr)
522 preempt_disable(); 537 preempt_disable();
523 538
524 list_for_each_entry_rcu(mod, &modules, list) { 539 list_for_each_entry_rcu(mod, &modules, list) {
540 if (mod->state == MODULE_STATE_UNFORMED)
541 continue;
525 if (!mod->percpu_size) 542 if (!mod->percpu_size)
526 continue; 543 continue;
527 for_each_possible_cpu(cpu) { 544 for_each_possible_cpu(cpu) {
@@ -1045,6 +1062,8 @@ static ssize_t show_initstate(struct module_attribute *mattr,
1045 case MODULE_STATE_GOING: 1062 case MODULE_STATE_GOING:
1046 state = "going"; 1063 state = "going";
1047 break; 1064 break;
1065 default:
1066 BUG();
1048 } 1067 }
1049 return sprintf(buffer, "%s\n", state); 1068 return sprintf(buffer, "%s\n", state);
1050} 1069}
@@ -1783,6 +1802,8 @@ void set_all_modules_text_rw(void)
1783 1802
1784 mutex_lock(&module_mutex); 1803 mutex_lock(&module_mutex);
1785 list_for_each_entry_rcu(mod, &modules, list) { 1804 list_for_each_entry_rcu(mod, &modules, list) {
1805 if (mod->state == MODULE_STATE_UNFORMED)
1806 continue;
1786 if ((mod->module_core) && (mod->core_text_size)) { 1807 if ((mod->module_core) && (mod->core_text_size)) {
1787 set_page_attributes(mod->module_core, 1808 set_page_attributes(mod->module_core,
1788 mod->module_core + mod->core_text_size, 1809 mod->module_core + mod->core_text_size,
@@ -1804,6 +1825,8 @@ void set_all_modules_text_ro(void)
1804 1825
1805 mutex_lock(&module_mutex); 1826 mutex_lock(&module_mutex);
1806 list_for_each_entry_rcu(mod, &modules, list) { 1827 list_for_each_entry_rcu(mod, &modules, list) {
1828 if (mod->state == MODULE_STATE_UNFORMED)
1829 continue;
1807 if ((mod->module_core) && (mod->core_text_size)) { 1830 if ((mod->module_core) && (mod->core_text_size)) {
1808 set_page_attributes(mod->module_core, 1831 set_page_attributes(mod->module_core,
1809 mod->module_core + mod->core_text_size, 1832 mod->module_core + mod->core_text_size,
@@ -2279,7 +2302,7 @@ static void layout_symtab(struct module *mod, struct load_info *info)
2279 Elf_Shdr *symsect = info->sechdrs + info->index.sym; 2302 Elf_Shdr *symsect = info->sechdrs + info->index.sym;
2280 Elf_Shdr *strsect = info->sechdrs + info->index.str; 2303 Elf_Shdr *strsect = info->sechdrs + info->index.str;
2281 const Elf_Sym *src; 2304 const Elf_Sym *src;
2282 unsigned int i, nsrc, ndst, strtab_size; 2305 unsigned int i, nsrc, ndst, strtab_size = 0;
2283 2306
2284 /* Put symbol section at end of init part of module. */ 2307 /* Put symbol section at end of init part of module. */
2285 symsect->sh_flags |= SHF_ALLOC; 2308 symsect->sh_flags |= SHF_ALLOC;
@@ -2290,9 +2313,6 @@ static void layout_symtab(struct module *mod, struct load_info *info)
2290 src = (void *)info->hdr + symsect->sh_offset; 2313 src = (void *)info->hdr + symsect->sh_offset;
2291 nsrc = symsect->sh_size / sizeof(*src); 2314 nsrc = symsect->sh_size / sizeof(*src);
2292 2315
2293 /* strtab always starts with a nul, so offset 0 is the empty string. */
2294 strtab_size = 1;
2295
2296 /* Compute total space required for the core symbols' strtab. */ 2316 /* Compute total space required for the core symbols' strtab. */
2297 for (ndst = i = 0; i < nsrc; i++) { 2317 for (ndst = i = 0; i < nsrc; i++) {
2298 if (i == 0 || 2318 if (i == 0 ||
@@ -2334,7 +2354,6 @@ static void add_kallsyms(struct module *mod, const struct load_info *info)
2334 mod->core_symtab = dst = mod->module_core + info->symoffs; 2354 mod->core_symtab = dst = mod->module_core + info->symoffs;
2335 mod->core_strtab = s = mod->module_core + info->stroffs; 2355 mod->core_strtab = s = mod->module_core + info->stroffs;
2336 src = mod->symtab; 2356 src = mod->symtab;
2337 *s++ = 0;
2338 for (ndst = i = 0; i < mod->num_symtab; i++) { 2357 for (ndst = i = 0; i < mod->num_symtab; i++) {
2339 if (i == 0 || 2358 if (i == 0 ||
2340 is_core_symbol(src+i, info->sechdrs, info->hdr->e_shnum)) { 2359 is_core_symbol(src+i, info->sechdrs, info->hdr->e_shnum)) {
@@ -2375,7 +2394,7 @@ static void dynamic_debug_remove(struct _ddebug *debug)
2375 2394
2376void * __weak module_alloc(unsigned long size) 2395void * __weak module_alloc(unsigned long size)
2377{ 2396{
2378 return size == 0 ? NULL : vmalloc_exec(size); 2397 return vmalloc_exec(size);
2379} 2398}
2380 2399
2381static void *module_alloc_update_bounds(unsigned long size) 2400static void *module_alloc_update_bounds(unsigned long size)
@@ -2422,18 +2441,17 @@ static inline void kmemleak_load_module(const struct module *mod,
2422#endif 2441#endif
2423 2442
2424#ifdef CONFIG_MODULE_SIG 2443#ifdef CONFIG_MODULE_SIG
2425static int module_sig_check(struct load_info *info, 2444static int module_sig_check(struct load_info *info)
2426 const void *mod, unsigned long *_len)
2427{ 2445{
2428 int err = -ENOKEY; 2446 int err = -ENOKEY;
2429 unsigned long markerlen = sizeof(MODULE_SIG_STRING) - 1; 2447 const unsigned long markerlen = sizeof(MODULE_SIG_STRING) - 1;
2430 unsigned long len = *_len; 2448 const void *mod = info->hdr;
2431 2449
2432 if (len > markerlen && 2450 if (info->len > markerlen &&
2433 memcmp(mod + len - markerlen, MODULE_SIG_STRING, markerlen) == 0) { 2451 memcmp(mod + info->len - markerlen, MODULE_SIG_STRING, markerlen) == 0) {
2434 /* We truncate the module to discard the signature */ 2452 /* We truncate the module to discard the signature */
2435 *_len -= markerlen; 2453 info->len -= markerlen;
2436 err = mod_verify_sig(mod, _len); 2454 err = mod_verify_sig(mod, &info->len);
2437 } 2455 }
2438 2456
2439 if (!err) { 2457 if (!err) {
@@ -2451,59 +2469,114 @@ static int module_sig_check(struct load_info *info,
2451 return err; 2469 return err;
2452} 2470}
2453#else /* !CONFIG_MODULE_SIG */ 2471#else /* !CONFIG_MODULE_SIG */
2454static int module_sig_check(struct load_info *info, 2472static int module_sig_check(struct load_info *info)
2455 void *mod, unsigned long *len)
2456{ 2473{
2457 return 0; 2474 return 0;
2458} 2475}
2459#endif /* !CONFIG_MODULE_SIG */ 2476#endif /* !CONFIG_MODULE_SIG */
2460 2477
2461/* Sets info->hdr, info->len and info->sig_ok. */ 2478/* Sanity checks against invalid binaries, wrong arch, weird elf version. */
2462static int copy_and_check(struct load_info *info, 2479static int elf_header_check(struct load_info *info)
2463 const void __user *umod, unsigned long len, 2480{
2464 const char __user *uargs) 2481 if (info->len < sizeof(*(info->hdr)))
2482 return -ENOEXEC;
2483
2484 if (memcmp(info->hdr->e_ident, ELFMAG, SELFMAG) != 0
2485 || info->hdr->e_type != ET_REL
2486 || !elf_check_arch(info->hdr)
2487 || info->hdr->e_shentsize != sizeof(Elf_Shdr))
2488 return -ENOEXEC;
2489
2490 if (info->hdr->e_shoff >= info->len
2491 || (info->hdr->e_shnum * sizeof(Elf_Shdr) >
2492 info->len - info->hdr->e_shoff))
2493 return -ENOEXEC;
2494
2495 return 0;
2496}
2497
2498/* Sets info->hdr and info->len. */
2499static int copy_module_from_user(const void __user *umod, unsigned long len,
2500 struct load_info *info)
2465{ 2501{
2466 int err; 2502 int err;
2467 Elf_Ehdr *hdr;
2468 2503
2469 if (len < sizeof(*hdr)) 2504 info->len = len;
2505 if (info->len < sizeof(*(info->hdr)))
2470 return -ENOEXEC; 2506 return -ENOEXEC;
2471 2507
2508 err = security_kernel_module_from_file(NULL);
2509 if (err)
2510 return err;
2511
2472 /* Suck in entire file: we'll want most of it. */ 2512 /* Suck in entire file: we'll want most of it. */
2473 if ((hdr = vmalloc(len)) == NULL) 2513 info->hdr = vmalloc(info->len);
2514 if (!info->hdr)
2474 return -ENOMEM; 2515 return -ENOMEM;
2475 2516
2476 if (copy_from_user(hdr, umod, len) != 0) { 2517 if (copy_from_user(info->hdr, umod, info->len) != 0) {
2477 err = -EFAULT; 2518 vfree(info->hdr);
2478 goto free_hdr; 2519 return -EFAULT;
2479 } 2520 }
2480 2521
2481 err = module_sig_check(info, hdr, &len); 2522 return 0;
2523}
2524
2525/* Sets info->hdr and info->len. */
2526static int copy_module_from_fd(int fd, struct load_info *info)
2527{
2528 struct file *file;
2529 int err;
2530 struct kstat stat;
2531 loff_t pos;
2532 ssize_t bytes = 0;
2533
2534 file = fget(fd);
2535 if (!file)
2536 return -ENOEXEC;
2537
2538 err = security_kernel_module_from_file(file);
2539 if (err)
2540 goto out;
2541
2542 err = vfs_getattr(file->f_vfsmnt, file->f_dentry, &stat);
2482 if (err) 2543 if (err)
2483 goto free_hdr; 2544 goto out;
2484 2545
2485 /* Sanity checks against insmoding binaries or wrong arch, 2546 if (stat.size > INT_MAX) {
2486 weird elf version */ 2547 err = -EFBIG;
2487 if (memcmp(hdr->e_ident, ELFMAG, SELFMAG) != 0 2548 goto out;
2488 || hdr->e_type != ET_REL
2489 || !elf_check_arch(hdr)
2490 || hdr->e_shentsize != sizeof(Elf_Shdr)) {
2491 err = -ENOEXEC;
2492 goto free_hdr;
2493 } 2549 }
2494 2550
2495 if (hdr->e_shoff >= len || 2551 /* Don't hand 0 to vmalloc, it whines. */
2496 hdr->e_shnum * sizeof(Elf_Shdr) > len - hdr->e_shoff) { 2552 if (stat.size == 0) {
2497 err = -ENOEXEC; 2553 err = -EINVAL;
2498 goto free_hdr; 2554 goto out;
2499 } 2555 }
2500 2556
2501 info->hdr = hdr; 2557 info->hdr = vmalloc(stat.size);
2502 info->len = len; 2558 if (!info->hdr) {
2503 return 0; 2559 err = -ENOMEM;
2560 goto out;
2561 }
2562
2563 pos = 0;
2564 while (pos < stat.size) {
2565 bytes = kernel_read(file, pos, (char *)(info->hdr) + pos,
2566 stat.size - pos);
2567 if (bytes < 0) {
2568 vfree(info->hdr);
2569 err = bytes;
2570 goto out;
2571 }
2572 if (bytes == 0)
2573 break;
2574 pos += bytes;
2575 }
2576 info->len = pos;
2504 2577
2505free_hdr: 2578out:
2506 vfree(hdr); 2579 fput(file);
2507 return err; 2580 return err;
2508} 2581}
2509 2582
@@ -2512,7 +2585,7 @@ static void free_copy(struct load_info *info)
2512 vfree(info->hdr); 2585 vfree(info->hdr);
2513} 2586}
2514 2587
2515static int rewrite_section_headers(struct load_info *info) 2588static int rewrite_section_headers(struct load_info *info, int flags)
2516{ 2589{
2517 unsigned int i; 2590 unsigned int i;
2518 2591
@@ -2540,7 +2613,10 @@ static int rewrite_section_headers(struct load_info *info)
2540 } 2613 }
2541 2614
2542 /* Track but don't keep modinfo and version sections. */ 2615 /* Track but don't keep modinfo and version sections. */
2543 info->index.vers = find_sec(info, "__versions"); 2616 if (flags & MODULE_INIT_IGNORE_MODVERSIONS)
2617 info->index.vers = 0; /* Pretend no __versions section! */
2618 else
2619 info->index.vers = find_sec(info, "__versions");
2544 info->index.info = find_sec(info, ".modinfo"); 2620 info->index.info = find_sec(info, ".modinfo");
2545 info->sechdrs[info->index.info].sh_flags &= ~(unsigned long)SHF_ALLOC; 2621 info->sechdrs[info->index.info].sh_flags &= ~(unsigned long)SHF_ALLOC;
2546 info->sechdrs[info->index.vers].sh_flags &= ~(unsigned long)SHF_ALLOC; 2622 info->sechdrs[info->index.vers].sh_flags &= ~(unsigned long)SHF_ALLOC;
@@ -2555,7 +2631,7 @@ static int rewrite_section_headers(struct load_info *info)
2555 * Return the temporary module pointer (we'll replace it with the final 2631 * Return the temporary module pointer (we'll replace it with the final
2556 * one when we move the module sections around). 2632 * one when we move the module sections around).
2557 */ 2633 */
2558static struct module *setup_load_info(struct load_info *info) 2634static struct module *setup_load_info(struct load_info *info, int flags)
2559{ 2635{
2560 unsigned int i; 2636 unsigned int i;
2561 int err; 2637 int err;
@@ -2566,7 +2642,7 @@ static struct module *setup_load_info(struct load_info *info)
2566 info->secstrings = (void *)info->hdr 2642 info->secstrings = (void *)info->hdr
2567 + info->sechdrs[info->hdr->e_shstrndx].sh_offset; 2643 + info->sechdrs[info->hdr->e_shstrndx].sh_offset;
2568 2644
2569 err = rewrite_section_headers(info); 2645 err = rewrite_section_headers(info, flags);
2570 if (err) 2646 if (err)
2571 return ERR_PTR(err); 2647 return ERR_PTR(err);
2572 2648
@@ -2604,11 +2680,14 @@ static struct module *setup_load_info(struct load_info *info)
2604 return mod; 2680 return mod;
2605} 2681}
2606 2682
2607static int check_modinfo(struct module *mod, struct load_info *info) 2683static int check_modinfo(struct module *mod, struct load_info *info, int flags)
2608{ 2684{
2609 const char *modmagic = get_modinfo(info, "vermagic"); 2685 const char *modmagic = get_modinfo(info, "vermagic");
2610 int err; 2686 int err;
2611 2687
2688 if (flags & MODULE_INIT_IGNORE_VERMAGIC)
2689 modmagic = NULL;
2690
2612 /* This is allowed: modprobe --force will invalidate it. */ 2691 /* This is allowed: modprobe --force will invalidate it. */
2613 if (!modmagic) { 2692 if (!modmagic) {
2614 err = try_to_force_load(mod, "bad vermagic"); 2693 err = try_to_force_load(mod, "bad vermagic");
@@ -2738,20 +2817,23 @@ static int move_module(struct module *mod, struct load_info *info)
2738 memset(ptr, 0, mod->core_size); 2817 memset(ptr, 0, mod->core_size);
2739 mod->module_core = ptr; 2818 mod->module_core = ptr;
2740 2819
2741 ptr = module_alloc_update_bounds(mod->init_size); 2820 if (mod->init_size) {
2742 /* 2821 ptr = module_alloc_update_bounds(mod->init_size);
2743 * The pointer to this block is stored in the module structure 2822 /*
2744 * which is inside the block. This block doesn't need to be 2823 * The pointer to this block is stored in the module structure
2745 * scanned as it contains data and code that will be freed 2824 * which is inside the block. This block doesn't need to be
2746 * after the module is initialized. 2825 * scanned as it contains data and code that will be freed
2747 */ 2826 * after the module is initialized.
2748 kmemleak_ignore(ptr); 2827 */
2749 if (!ptr && mod->init_size) { 2828 kmemleak_ignore(ptr);
2750 module_free(mod, mod->module_core); 2829 if (!ptr) {
2751 return -ENOMEM; 2830 module_free(mod, mod->module_core);
2752 } 2831 return -ENOMEM;
2753 memset(ptr, 0, mod->init_size); 2832 }
2754 mod->module_init = ptr; 2833 memset(ptr, 0, mod->init_size);
2834 mod->module_init = ptr;
2835 } else
2836 mod->module_init = NULL;
2755 2837
2756 /* Transfer each section which specifies SHF_ALLOC */ 2838 /* Transfer each section which specifies SHF_ALLOC */
2757 pr_debug("final section addresses:\n"); 2839 pr_debug("final section addresses:\n");
@@ -2844,18 +2926,18 @@ int __weak module_frob_arch_sections(Elf_Ehdr *hdr,
2844 return 0; 2926 return 0;
2845} 2927}
2846 2928
2847static struct module *layout_and_allocate(struct load_info *info) 2929static struct module *layout_and_allocate(struct load_info *info, int flags)
2848{ 2930{
2849 /* Module within temporary copy. */ 2931 /* Module within temporary copy. */
2850 struct module *mod; 2932 struct module *mod;
2851 Elf_Shdr *pcpusec; 2933 Elf_Shdr *pcpusec;
2852 int err; 2934 int err;
2853 2935
2854 mod = setup_load_info(info); 2936 mod = setup_load_info(info, flags);
2855 if (IS_ERR(mod)) 2937 if (IS_ERR(mod))
2856 return mod; 2938 return mod;
2857 2939
2858 err = check_modinfo(mod, info); 2940 err = check_modinfo(mod, info, flags);
2859 if (err) 2941 if (err)
2860 return ERR_PTR(err); 2942 return ERR_PTR(err);
2861 2943
@@ -2935,40 +3017,181 @@ static bool finished_loading(const char *name)
2935 bool ret; 3017 bool ret;
2936 3018
2937 mutex_lock(&module_mutex); 3019 mutex_lock(&module_mutex);
2938 mod = find_module(name); 3020 mod = find_module_all(name, true);
2939 ret = !mod || mod->state != MODULE_STATE_COMING; 3021 ret = !mod || mod->state == MODULE_STATE_LIVE
3022 || mod->state == MODULE_STATE_GOING;
2940 mutex_unlock(&module_mutex); 3023 mutex_unlock(&module_mutex);
2941 3024
2942 return ret; 3025 return ret;
2943} 3026}
2944 3027
3028/* Call module constructors. */
3029static void do_mod_ctors(struct module *mod)
3030{
3031#ifdef CONFIG_CONSTRUCTORS
3032 unsigned long i;
3033
3034 for (i = 0; i < mod->num_ctors; i++)
3035 mod->ctors[i]();
3036#endif
3037}
3038
3039/* This is where the real work happens */
3040static int do_init_module(struct module *mod)
3041{
3042 int ret = 0;
3043
3044 /*
3045 * We want to find out whether @mod uses async during init. Clear
3046 * PF_USED_ASYNC. async_schedule*() will set it.
3047 */
3048 current->flags &= ~PF_USED_ASYNC;
3049
3050 blocking_notifier_call_chain(&module_notify_list,
3051 MODULE_STATE_COMING, mod);
3052
3053 /* Set RO and NX regions for core */
3054 set_section_ro_nx(mod->module_core,
3055 mod->core_text_size,
3056 mod->core_ro_size,
3057 mod->core_size);
3058
3059 /* Set RO and NX regions for init */
3060 set_section_ro_nx(mod->module_init,
3061 mod->init_text_size,
3062 mod->init_ro_size,
3063 mod->init_size);
3064
3065 do_mod_ctors(mod);
3066 /* Start the module */
3067 if (mod->init != NULL)
3068 ret = do_one_initcall(mod->init);
3069 if (ret < 0) {
3070 /* Init routine failed: abort. Try to protect us from
3071 buggy refcounters. */
3072 mod->state = MODULE_STATE_GOING;
3073 synchronize_sched();
3074 module_put(mod);
3075 blocking_notifier_call_chain(&module_notify_list,
3076 MODULE_STATE_GOING, mod);
3077 free_module(mod);
3078 wake_up_all(&module_wq);
3079 return ret;
3080 }
3081 if (ret > 0) {
3082 printk(KERN_WARNING
3083"%s: '%s'->init suspiciously returned %d, it should follow 0/-E convention\n"
3084"%s: loading module anyway...\n",
3085 __func__, mod->name, ret,
3086 __func__);
3087 dump_stack();
3088 }
3089
3090 /* Now it's a first class citizen! */
3091 mod->state = MODULE_STATE_LIVE;
3092 blocking_notifier_call_chain(&module_notify_list,
3093 MODULE_STATE_LIVE, mod);
3094
3095 /*
3096 * We need to finish all async code before the module init sequence
3097 * is done. This has potential to deadlock. For example, a newly
3098 * detected block device can trigger request_module() of the
3099 * default iosched from async probing task. Once userland helper
3100 * reaches here, async_synchronize_full() will wait on the async
3101 * task waiting on request_module() and deadlock.
3102 *
3103 * This deadlock is avoided by perfomring async_synchronize_full()
3104 * iff module init queued any async jobs. This isn't a full
3105 * solution as it will deadlock the same if module loading from
3106 * async jobs nests more than once; however, due to the various
3107 * constraints, this hack seems to be the best option for now.
3108 * Please refer to the following thread for details.
3109 *
3110 * http://thread.gmane.org/gmane.linux.kernel/1420814
3111 */
3112 if (current->flags & PF_USED_ASYNC)
3113 async_synchronize_full();
3114
3115 mutex_lock(&module_mutex);
3116 /* Drop initial reference. */
3117 module_put(mod);
3118 trim_init_extable(mod);
3119#ifdef CONFIG_KALLSYMS
3120 mod->num_symtab = mod->core_num_syms;
3121 mod->symtab = mod->core_symtab;
3122 mod->strtab = mod->core_strtab;
3123#endif
3124 unset_module_init_ro_nx(mod);
3125 module_free(mod, mod->module_init);
3126 mod->module_init = NULL;
3127 mod->init_size = 0;
3128 mod->init_ro_size = 0;
3129 mod->init_text_size = 0;
3130 mutex_unlock(&module_mutex);
3131 wake_up_all(&module_wq);
3132
3133 return 0;
3134}
3135
3136static int may_init_module(void)
3137{
3138 if (!capable(CAP_SYS_MODULE) || modules_disabled)
3139 return -EPERM;
3140
3141 return 0;
3142}
3143
2945/* Allocate and load the module: note that size of section 0 is always 3144/* Allocate and load the module: note that size of section 0 is always
2946 zero, and we rely on this for optional sections. */ 3145 zero, and we rely on this for optional sections. */
2947static struct module *load_module(void __user *umod, 3146static int load_module(struct load_info *info, const char __user *uargs,
2948 unsigned long len, 3147 int flags)
2949 const char __user *uargs)
2950{ 3148{
2951 struct load_info info = { NULL, };
2952 struct module *mod, *old; 3149 struct module *mod, *old;
2953 long err; 3150 long err;
2954 3151
2955 pr_debug("load_module: umod=%p, len=%lu, uargs=%p\n", 3152 err = module_sig_check(info);
2956 umod, len, uargs); 3153 if (err)
3154 goto free_copy;
2957 3155
2958 /* Copy in the blobs from userspace, check they are vaguely sane. */ 3156 err = elf_header_check(info);
2959 err = copy_and_check(&info, umod, len, uargs);
2960 if (err) 3157 if (err)
2961 return ERR_PTR(err); 3158 goto free_copy;
2962 3159
2963 /* Figure out module layout, and allocate all the memory. */ 3160 /* Figure out module layout, and allocate all the memory. */
2964 mod = layout_and_allocate(&info); 3161 mod = layout_and_allocate(info, flags);
2965 if (IS_ERR(mod)) { 3162 if (IS_ERR(mod)) {
2966 err = PTR_ERR(mod); 3163 err = PTR_ERR(mod);
2967 goto free_copy; 3164 goto free_copy;
2968 } 3165 }
2969 3166
3167 /*
3168 * We try to place it in the list now to make sure it's unique
3169 * before we dedicate too many resources. In particular,
3170 * temporary percpu memory exhaustion.
3171 */
3172 mod->state = MODULE_STATE_UNFORMED;
3173again:
3174 mutex_lock(&module_mutex);
3175 if ((old = find_module_all(mod->name, true)) != NULL) {
3176 if (old->state == MODULE_STATE_COMING
3177 || old->state == MODULE_STATE_UNFORMED) {
3178 /* Wait in case it fails to load. */
3179 mutex_unlock(&module_mutex);
3180 err = wait_event_interruptible(module_wq,
3181 finished_loading(mod->name));
3182 if (err)
3183 goto free_module;
3184 goto again;
3185 }
3186 err = -EEXIST;
3187 mutex_unlock(&module_mutex);
3188 goto free_module;
3189 }
3190 list_add_rcu(&mod->list, &modules);
3191 mutex_unlock(&module_mutex);
3192
2970#ifdef CONFIG_MODULE_SIG 3193#ifdef CONFIG_MODULE_SIG
2971 mod->sig_ok = info.sig_ok; 3194 mod->sig_ok = info->sig_ok;
2972 if (!mod->sig_ok) 3195 if (!mod->sig_ok)
2973 add_taint_module(mod, TAINT_FORCED_MODULE); 3196 add_taint_module(mod, TAINT_FORCED_MODULE);
2974#endif 3197#endif
@@ -2976,29 +3199,29 @@ static struct module *load_module(void __user *umod,
2976 /* Now module is in final location, initialize linked lists, etc. */ 3199 /* Now module is in final location, initialize linked lists, etc. */
2977 err = module_unload_init(mod); 3200 err = module_unload_init(mod);
2978 if (err) 3201 if (err)
2979 goto free_module; 3202 goto unlink_mod;
2980 3203
2981 /* Now we've got everything in the final locations, we can 3204 /* Now we've got everything in the final locations, we can
2982 * find optional sections. */ 3205 * find optional sections. */
2983 find_module_sections(mod, &info); 3206 find_module_sections(mod, info);
2984 3207
2985 err = check_module_license_and_versions(mod); 3208 err = check_module_license_and_versions(mod);
2986 if (err) 3209 if (err)
2987 goto free_unload; 3210 goto free_unload;
2988 3211
2989 /* Set up MODINFO_ATTR fields */ 3212 /* Set up MODINFO_ATTR fields */
2990 setup_modinfo(mod, &info); 3213 setup_modinfo(mod, info);
2991 3214
2992 /* Fix up syms, so that st_value is a pointer to location. */ 3215 /* Fix up syms, so that st_value is a pointer to location. */
2993 err = simplify_symbols(mod, &info); 3216 err = simplify_symbols(mod, info);
2994 if (err < 0) 3217 if (err < 0)
2995 goto free_modinfo; 3218 goto free_modinfo;
2996 3219
2997 err = apply_relocations(mod, &info); 3220 err = apply_relocations(mod, info);
2998 if (err < 0) 3221 if (err < 0)
2999 goto free_modinfo; 3222 goto free_modinfo;
3000 3223
3001 err = post_relocation(mod, &info); 3224 err = post_relocation(mod, info);
3002 if (err < 0) 3225 if (err < 0)
3003 goto free_modinfo; 3226 goto free_modinfo;
3004 3227
@@ -3011,72 +3234,49 @@ static struct module *load_module(void __user *umod,
3011 goto free_arch_cleanup; 3234 goto free_arch_cleanup;
3012 } 3235 }
3013 3236
3014 /* Mark state as coming so strong_try_module_get() ignores us. */ 3237 dynamic_debug_setup(info->debug, info->num_debug);
3015 mod->state = MODULE_STATE_COMING;
3016 3238
3017 /* Now sew it into the lists so we can get lockdep and oops
3018 * info during argument parsing. No one should access us, since
3019 * strong_try_module_get() will fail.
3020 * lockdep/oops can run asynchronous, so use the RCU list insertion
3021 * function to insert in a way safe to concurrent readers.
3022 * The mutex protects against concurrent writers.
3023 */
3024again:
3025 mutex_lock(&module_mutex); 3239 mutex_lock(&module_mutex);
3026 if ((old = find_module(mod->name)) != NULL) { 3240 /* Find duplicate symbols (must be called under lock). */
3027 if (old->state == MODULE_STATE_COMING) {
3028 /* Wait in case it fails to load. */
3029 mutex_unlock(&module_mutex);
3030 err = wait_event_interruptible(module_wq,
3031 finished_loading(mod->name));
3032 if (err)
3033 goto free_arch_cleanup;
3034 goto again;
3035 }
3036 err = -EEXIST;
3037 goto unlock;
3038 }
3039
3040 /* This has to be done once we're sure module name is unique. */
3041 dynamic_debug_setup(info.debug, info.num_debug);
3042
3043 /* Find duplicate symbols */
3044 err = verify_export_symbols(mod); 3241 err = verify_export_symbols(mod);
3045 if (err < 0) 3242 if (err < 0)
3046 goto ddebug; 3243 goto ddebug_cleanup;
3244
3245 /* This relies on module_mutex for list integrity. */
3246 module_bug_finalize(info->hdr, info->sechdrs, mod);
3247
3248 /* Mark state as coming so strong_try_module_get() ignores us,
3249 * but kallsyms etc. can see us. */
3250 mod->state = MODULE_STATE_COMING;
3047 3251
3048 module_bug_finalize(info.hdr, info.sechdrs, mod);
3049 list_add_rcu(&mod->list, &modules);
3050 mutex_unlock(&module_mutex); 3252 mutex_unlock(&module_mutex);
3051 3253
3052 /* Module is ready to execute: parsing args may do that. */ 3254 /* Module is ready to execute: parsing args may do that. */
3053 err = parse_args(mod->name, mod->args, mod->kp, mod->num_kp, 3255 err = parse_args(mod->name, mod->args, mod->kp, mod->num_kp,
3054 -32768, 32767, &ddebug_dyndbg_module_param_cb); 3256 -32768, 32767, &ddebug_dyndbg_module_param_cb);
3055 if (err < 0) 3257 if (err < 0)
3056 goto unlink; 3258 goto bug_cleanup;
3057 3259
3058 /* Link in to syfs. */ 3260 /* Link in to syfs. */
3059 err = mod_sysfs_setup(mod, &info, mod->kp, mod->num_kp); 3261 err = mod_sysfs_setup(mod, info, mod->kp, mod->num_kp);
3060 if (err < 0) 3262 if (err < 0)
3061 goto unlink; 3263 goto bug_cleanup;
3062 3264
3063 /* Get rid of temporary copy. */ 3265 /* Get rid of temporary copy. */
3064 free_copy(&info); 3266 free_copy(info);
3065 3267
3066 /* Done! */ 3268 /* Done! */
3067 trace_module_load(mod); 3269 trace_module_load(mod);
3068 return mod;
3069 3270
3070 unlink: 3271 return do_init_module(mod);
3272
3273 bug_cleanup:
3274 /* module_bug_cleanup needs module_mutex protection */
3071 mutex_lock(&module_mutex); 3275 mutex_lock(&module_mutex);
3072 /* Unlink carefully: kallsyms could be walking list. */
3073 list_del_rcu(&mod->list);
3074 module_bug_cleanup(mod); 3276 module_bug_cleanup(mod);
3075 wake_up_all(&module_wq); 3277 ddebug_cleanup:
3076 ddebug:
3077 dynamic_debug_remove(info.debug);
3078 unlock:
3079 mutex_unlock(&module_mutex); 3278 mutex_unlock(&module_mutex);
3279 dynamic_debug_remove(info->debug);
3080 synchronize_sched(); 3280 synchronize_sched();
3081 kfree(mod->args); 3281 kfree(mod->args);
3082 free_arch_cleanup: 3282 free_arch_cleanup:
@@ -3085,107 +3285,59 @@ again:
3085 free_modinfo(mod); 3285 free_modinfo(mod);
3086 free_unload: 3286 free_unload:
3087 module_unload_free(mod); 3287 module_unload_free(mod);
3288 unlink_mod:
3289 mutex_lock(&module_mutex);
3290 /* Unlink carefully: kallsyms could be walking list. */
3291 list_del_rcu(&mod->list);
3292 wake_up_all(&module_wq);
3293 mutex_unlock(&module_mutex);
3088 free_module: 3294 free_module:
3089 module_deallocate(mod, &info); 3295 module_deallocate(mod, info);
3090 free_copy: 3296 free_copy:
3091 free_copy(&info); 3297 free_copy(info);
3092 return ERR_PTR(err); 3298 return err;
3093}
3094
3095/* Call module constructors. */
3096static void do_mod_ctors(struct module *mod)
3097{
3098#ifdef CONFIG_CONSTRUCTORS
3099 unsigned long i;
3100
3101 for (i = 0; i < mod->num_ctors; i++)
3102 mod->ctors[i]();
3103#endif
3104} 3299}
3105 3300
3106/* This is where the real work happens */
3107SYSCALL_DEFINE3(init_module, void __user *, umod, 3301SYSCALL_DEFINE3(init_module, void __user *, umod,
3108 unsigned long, len, const char __user *, uargs) 3302 unsigned long, len, const char __user *, uargs)
3109{ 3303{
3110 struct module *mod; 3304 int err;
3111 int ret = 0; 3305 struct load_info info = { };
3112 3306
3113 /* Must have permission */ 3307 err = may_init_module();
3114 if (!capable(CAP_SYS_MODULE) || modules_disabled) 3308 if (err)
3115 return -EPERM; 3309 return err;
3116 3310
3117 /* Do all the hard work */ 3311 pr_debug("init_module: umod=%p, len=%lu, uargs=%p\n",
3118 mod = load_module(umod, len, uargs); 3312 umod, len, uargs);
3119 if (IS_ERR(mod))
3120 return PTR_ERR(mod);
3121 3313
3122 blocking_notifier_call_chain(&module_notify_list, 3314 err = copy_module_from_user(umod, len, &info);
3123 MODULE_STATE_COMING, mod); 3315 if (err)
3316 return err;
3124 3317
3125 /* Set RO and NX regions for core */ 3318 return load_module(&info, uargs, 0);
3126 set_section_ro_nx(mod->module_core, 3319}
3127 mod->core_text_size,
3128 mod->core_ro_size,
3129 mod->core_size);
3130 3320
3131 /* Set RO and NX regions for init */ 3321SYSCALL_DEFINE3(finit_module, int, fd, const char __user *, uargs, int, flags)
3132 set_section_ro_nx(mod->module_init, 3322{
3133 mod->init_text_size, 3323 int err;
3134 mod->init_ro_size, 3324 struct load_info info = { };
3135 mod->init_size);
3136 3325
3137 do_mod_ctors(mod); 3326 err = may_init_module();
3138 /* Start the module */ 3327 if (err)
3139 if (mod->init != NULL) 3328 return err;
3140 ret = do_one_initcall(mod->init);
3141 if (ret < 0) {
3142 /* Init routine failed: abort. Try to protect us from
3143 buggy refcounters. */
3144 mod->state = MODULE_STATE_GOING;
3145 synchronize_sched();
3146 module_put(mod);
3147 blocking_notifier_call_chain(&module_notify_list,
3148 MODULE_STATE_GOING, mod);
3149 free_module(mod);
3150 wake_up_all(&module_wq);
3151 return ret;
3152 }
3153 if (ret > 0) {
3154 printk(KERN_WARNING
3155"%s: '%s'->init suspiciously returned %d, it should follow 0/-E convention\n"
3156"%s: loading module anyway...\n",
3157 __func__, mod->name, ret,
3158 __func__);
3159 dump_stack();
3160 }
3161 3329
3162 /* Now it's a first class citizen! */ 3330 pr_debug("finit_module: fd=%d, uargs=%p, flags=%i\n", fd, uargs, flags);
3163 mod->state = MODULE_STATE_LIVE;
3164 blocking_notifier_call_chain(&module_notify_list,
3165 MODULE_STATE_LIVE, mod);
3166 3331
3167 /* We need to finish all async code before the module init sequence is done */ 3332 if (flags & ~(MODULE_INIT_IGNORE_MODVERSIONS
3168 async_synchronize_full(); 3333 |MODULE_INIT_IGNORE_VERMAGIC))
3334 return -EINVAL;
3169 3335
3170 mutex_lock(&module_mutex); 3336 err = copy_module_from_fd(fd, &info);
3171 /* Drop initial reference. */ 3337 if (err)
3172 module_put(mod); 3338 return err;
3173 trim_init_extable(mod);
3174#ifdef CONFIG_KALLSYMS
3175 mod->num_symtab = mod->core_num_syms;
3176 mod->symtab = mod->core_symtab;
3177 mod->strtab = mod->core_strtab;
3178#endif
3179 unset_module_init_ro_nx(mod);
3180 module_free(mod, mod->module_init);
3181 mod->module_init = NULL;
3182 mod->init_size = 0;
3183 mod->init_ro_size = 0;
3184 mod->init_text_size = 0;
3185 mutex_unlock(&module_mutex);
3186 wake_up_all(&module_wq);
3187 3339
3188 return 0; 3340 return load_module(&info, uargs, flags);
3189} 3341}
3190 3342
3191static inline int within(unsigned long addr, void *start, unsigned long size) 3343static inline int within(unsigned long addr, void *start, unsigned long size)
@@ -3261,6 +3413,8 @@ const char *module_address_lookup(unsigned long addr,
3261 3413
3262 preempt_disable(); 3414 preempt_disable();
3263 list_for_each_entry_rcu(mod, &modules, list) { 3415 list_for_each_entry_rcu(mod, &modules, list) {
3416 if (mod->state == MODULE_STATE_UNFORMED)
3417 continue;
3264 if (within_module_init(addr, mod) || 3418 if (within_module_init(addr, mod) ||
3265 within_module_core(addr, mod)) { 3419 within_module_core(addr, mod)) {
3266 if (modname) 3420 if (modname)
@@ -3284,6 +3438,8 @@ int lookup_module_symbol_name(unsigned long addr, char *symname)
3284 3438
3285 preempt_disable(); 3439 preempt_disable();
3286 list_for_each_entry_rcu(mod, &modules, list) { 3440 list_for_each_entry_rcu(mod, &modules, list) {
3441 if (mod->state == MODULE_STATE_UNFORMED)
3442 continue;
3287 if (within_module_init(addr, mod) || 3443 if (within_module_init(addr, mod) ||
3288 within_module_core(addr, mod)) { 3444 within_module_core(addr, mod)) {
3289 const char *sym; 3445 const char *sym;
@@ -3308,6 +3464,8 @@ int lookup_module_symbol_attrs(unsigned long addr, unsigned long *size,
3308 3464
3309 preempt_disable(); 3465 preempt_disable();
3310 list_for_each_entry_rcu(mod, &modules, list) { 3466 list_for_each_entry_rcu(mod, &modules, list) {
3467 if (mod->state == MODULE_STATE_UNFORMED)
3468 continue;
3311 if (within_module_init(addr, mod) || 3469 if (within_module_init(addr, mod) ||
3312 within_module_core(addr, mod)) { 3470 within_module_core(addr, mod)) {
3313 const char *sym; 3471 const char *sym;
@@ -3335,6 +3493,8 @@ int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
3335 3493
3336 preempt_disable(); 3494 preempt_disable();
3337 list_for_each_entry_rcu(mod, &modules, list) { 3495 list_for_each_entry_rcu(mod, &modules, list) {
3496 if (mod->state == MODULE_STATE_UNFORMED)
3497 continue;
3338 if (symnum < mod->num_symtab) { 3498 if (symnum < mod->num_symtab) {
3339 *value = mod->symtab[symnum].st_value; 3499 *value = mod->symtab[symnum].st_value;
3340 *type = mod->symtab[symnum].st_info; 3500 *type = mod->symtab[symnum].st_info;
@@ -3377,9 +3537,12 @@ unsigned long module_kallsyms_lookup_name(const char *name)
3377 ret = mod_find_symname(mod, colon+1); 3537 ret = mod_find_symname(mod, colon+1);
3378 *colon = ':'; 3538 *colon = ':';
3379 } else { 3539 } else {
3380 list_for_each_entry_rcu(mod, &modules, list) 3540 list_for_each_entry_rcu(mod, &modules, list) {
3541 if (mod->state == MODULE_STATE_UNFORMED)
3542 continue;
3381 if ((ret = mod_find_symname(mod, name)) != 0) 3543 if ((ret = mod_find_symname(mod, name)) != 0)
3382 break; 3544 break;
3545 }
3383 } 3546 }
3384 preempt_enable(); 3547 preempt_enable();
3385 return ret; 3548 return ret;
@@ -3394,6 +3557,8 @@ int module_kallsyms_on_each_symbol(int (*fn)(void *, const char *,
3394 int ret; 3557 int ret;
3395 3558
3396 list_for_each_entry(mod, &modules, list) { 3559 list_for_each_entry(mod, &modules, list) {
3560 if (mod->state == MODULE_STATE_UNFORMED)
3561 continue;
3397 for (i = 0; i < mod->num_symtab; i++) { 3562 for (i = 0; i < mod->num_symtab; i++) {
3398 ret = fn(data, mod->strtab + mod->symtab[i].st_name, 3563 ret = fn(data, mod->strtab + mod->symtab[i].st_name,
3399 mod, mod->symtab[i].st_value); 3564 mod, mod->symtab[i].st_value);
@@ -3409,6 +3574,7 @@ static char *module_flags(struct module *mod, char *buf)
3409{ 3574{
3410 int bx = 0; 3575 int bx = 0;
3411 3576
3577 BUG_ON(mod->state == MODULE_STATE_UNFORMED);
3412 if (mod->taints || 3578 if (mod->taints ||
3413 mod->state == MODULE_STATE_GOING || 3579 mod->state == MODULE_STATE_GOING ||
3414 mod->state == MODULE_STATE_COMING) { 3580 mod->state == MODULE_STATE_COMING) {
@@ -3450,6 +3616,10 @@ static int m_show(struct seq_file *m, void *p)
3450 struct module *mod = list_entry(p, struct module, list); 3616 struct module *mod = list_entry(p, struct module, list);
3451 char buf[8]; 3617 char buf[8];
3452 3618
3619 /* We always ignore unformed modules. */
3620 if (mod->state == MODULE_STATE_UNFORMED)
3621 return 0;
3622
3453 seq_printf(m, "%s %u", 3623 seq_printf(m, "%s %u",
3454 mod->name, mod->init_size + mod->core_size); 3624 mod->name, mod->init_size + mod->core_size);
3455 print_unload_info(m, mod); 3625 print_unload_info(m, mod);
@@ -3510,6 +3680,8 @@ const struct exception_table_entry *search_module_extables(unsigned long addr)
3510 3680
3511 preempt_disable(); 3681 preempt_disable();
3512 list_for_each_entry_rcu(mod, &modules, list) { 3682 list_for_each_entry_rcu(mod, &modules, list) {
3683 if (mod->state == MODULE_STATE_UNFORMED)
3684 continue;
3513 if (mod->num_exentries == 0) 3685 if (mod->num_exentries == 0)
3514 continue; 3686 continue;
3515 3687
@@ -3558,10 +3730,13 @@ struct module *__module_address(unsigned long addr)
3558 if (addr < module_addr_min || addr > module_addr_max) 3730 if (addr < module_addr_min || addr > module_addr_max)
3559 return NULL; 3731 return NULL;
3560 3732
3561 list_for_each_entry_rcu(mod, &modules, list) 3733 list_for_each_entry_rcu(mod, &modules, list) {
3734 if (mod->state == MODULE_STATE_UNFORMED)
3735 continue;
3562 if (within_module_core(addr, mod) 3736 if (within_module_core(addr, mod)
3563 || within_module_init(addr, mod)) 3737 || within_module_init(addr, mod))
3564 return mod; 3738 return mod;
3739 }
3565 return NULL; 3740 return NULL;
3566} 3741}
3567EXPORT_SYMBOL_GPL(__module_address); 3742EXPORT_SYMBOL_GPL(__module_address);
@@ -3614,8 +3789,11 @@ void print_modules(void)
3614 printk(KERN_DEFAULT "Modules linked in:"); 3789 printk(KERN_DEFAULT "Modules linked in:");
3615 /* Most callers should already have preempt disabled, but make sure */ 3790 /* Most callers should already have preempt disabled, but make sure */
3616 preempt_disable(); 3791 preempt_disable();
3617 list_for_each_entry_rcu(mod, &modules, list) 3792 list_for_each_entry_rcu(mod, &modules, list) {
3793 if (mod->state == MODULE_STATE_UNFORMED)
3794 continue;
3618 printk(" %s%s", mod->name, module_flags(mod, buf)); 3795 printk(" %s%s", mod->name, module_flags(mod, buf));
3796 }
3619 preempt_enable(); 3797 preempt_enable();
3620 if (last_unloaded_module[0]) 3798 if (last_unloaded_module[0])
3621 printk(" [last unloaded: %s]", last_unloaded_module); 3799 printk(" [last unloaded: %s]", last_unloaded_module);
diff --git a/kernel/pid.c b/kernel/pid.c
index 36aa02ff17d6..de9af600006f 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -270,7 +270,6 @@ void free_pid(struct pid *pid)
270 wake_up_process(ns->child_reaper); 270 wake_up_process(ns->child_reaper);
271 break; 271 break;
272 case 0: 272 case 0:
273 ns->nr_hashed = -1;
274 schedule_work(&ns->proc_work); 273 schedule_work(&ns->proc_work);
275 break; 274 break;
276 } 275 }
@@ -319,7 +318,7 @@ struct pid *alloc_pid(struct pid_namespace *ns)
319 318
320 upid = pid->numbers + ns->level; 319 upid = pid->numbers + ns->level;
321 spin_lock_irq(&pidmap_lock); 320 spin_lock_irq(&pidmap_lock);
322 if (ns->nr_hashed < 0) 321 if (!(ns->nr_hashed & PIDNS_HASH_ADDING))
323 goto out_unlock; 322 goto out_unlock;
324 for ( ; upid >= pid->numbers; --upid) { 323 for ( ; upid >= pid->numbers; --upid) {
325 hlist_add_head_rcu(&upid->pid_chain, 324 hlist_add_head_rcu(&upid->pid_chain,
@@ -342,6 +341,13 @@ out_free:
342 goto out; 341 goto out;
343} 342}
344 343
344void disable_pid_allocation(struct pid_namespace *ns)
345{
346 spin_lock_irq(&pidmap_lock);
347 ns->nr_hashed &= ~PIDNS_HASH_ADDING;
348 spin_unlock_irq(&pidmap_lock);
349}
350
345struct pid *find_pid_ns(int nr, struct pid_namespace *ns) 351struct pid *find_pid_ns(int nr, struct pid_namespace *ns)
346{ 352{
347 struct hlist_node *elem; 353 struct hlist_node *elem;
@@ -573,6 +579,9 @@ void __init pidhash_init(void)
573 579
574void __init pidmap_init(void) 580void __init pidmap_init(void)
575{ 581{
582 /* Veryify no one has done anything silly */
583 BUILD_BUG_ON(PID_MAX_LIMIT >= PIDNS_HASH_ADDING);
584
576 /* bump default and minimum pid_max based on number of cpus */ 585 /* bump default and minimum pid_max based on number of cpus */
577 pid_max = min(pid_max_max, max_t(int, pid_max, 586 pid_max = min(pid_max_max, max_t(int, pid_max,
578 PIDS_PER_CPU_DEFAULT * num_possible_cpus())); 587 PIDS_PER_CPU_DEFAULT * num_possible_cpus()));
@@ -584,7 +593,7 @@ void __init pidmap_init(void)
584 /* Reserve PID 0. We never call free_pidmap(0) */ 593 /* Reserve PID 0. We never call free_pidmap(0) */
585 set_bit(0, init_pid_ns.pidmap[0].page); 594 set_bit(0, init_pid_ns.pidmap[0].page);
586 atomic_dec(&init_pid_ns.pidmap[0].nr_free); 595 atomic_dec(&init_pid_ns.pidmap[0].nr_free);
587 init_pid_ns.nr_hashed = 1; 596 init_pid_ns.nr_hashed = PIDNS_HASH_ADDING;
588 597
589 init_pid_ns.pid_cachep = KMEM_CACHE(pid, 598 init_pid_ns.pid_cachep = KMEM_CACHE(pid,
590 SLAB_HWCACHE_ALIGN | SLAB_PANIC); 599 SLAB_HWCACHE_ALIGN | SLAB_PANIC);
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index 560da0dab230..c1c3dc1c6023 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -115,6 +115,7 @@ static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns
115 ns->level = level; 115 ns->level = level;
116 ns->parent = get_pid_ns(parent_pid_ns); 116 ns->parent = get_pid_ns(parent_pid_ns);
117 ns->user_ns = get_user_ns(user_ns); 117 ns->user_ns = get_user_ns(user_ns);
118 ns->nr_hashed = PIDNS_HASH_ADDING;
118 INIT_WORK(&ns->proc_work, proc_cleanup_work); 119 INIT_WORK(&ns->proc_work, proc_cleanup_work);
119 120
120 set_bit(0, ns->pidmap[0].page); 121 set_bit(0, ns->pidmap[0].page);
@@ -181,6 +182,9 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns)
181 int rc; 182 int rc;
182 struct task_struct *task, *me = current; 183 struct task_struct *task, *me = current;
183 184
185 /* Don't allow any more processes into the pid namespace */
186 disable_pid_allocation(pid_ns);
187
184 /* Ignore SIGCHLD causing any terminated children to autoreap */ 188 /* Ignore SIGCHLD causing any terminated children to autoreap */
185 spin_lock_irq(&me->sighand->siglock); 189 spin_lock_irq(&me->sighand->siglock);
186 me->sighand->action[SIGCHLD - 1].sa.sa_handler = SIG_IGN; 190 me->sighand->action[SIGCHLD - 1].sa.sa_handler = SIG_IGN;
@@ -325,7 +329,8 @@ static int pidns_install(struct nsproxy *nsproxy, void *ns)
325 struct pid_namespace *active = task_active_pid_ns(current); 329 struct pid_namespace *active = task_active_pid_ns(current);
326 struct pid_namespace *ancestor, *new = ns; 330 struct pid_namespace *ancestor, *new = ns;
327 331
328 if (!ns_capable(new->user_ns, CAP_SYS_ADMIN)) 332 if (!ns_capable(new->user_ns, CAP_SYS_ADMIN) ||
333 !nsown_capable(CAP_SYS_ADMIN))
329 return -EPERM; 334 return -EPERM;
330 335
331 /* 336 /*
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index d73840271dce..a278cad1d5d6 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -9,6 +9,7 @@
9#include <asm/uaccess.h> 9#include <asm/uaccess.h>
10#include <linux/kernel_stat.h> 10#include <linux/kernel_stat.h>
11#include <trace/events/timer.h> 11#include <trace/events/timer.h>
12#include <linux/random.h>
12 13
13/* 14/*
14 * Called after updating RLIMIT_CPU to run cpu timer and update 15 * Called after updating RLIMIT_CPU to run cpu timer and update
@@ -470,6 +471,8 @@ static void cleanup_timers(struct list_head *head,
470 */ 471 */
471void posix_cpu_timers_exit(struct task_struct *tsk) 472void posix_cpu_timers_exit(struct task_struct *tsk)
472{ 473{
474 add_device_randomness((const void*) &tsk->se.sum_exec_runtime,
475 sizeof(unsigned long long));
473 cleanup_timers(tsk->cpu_timers, 476 cleanup_timers(tsk->cpu_timers,
474 tsk->utime, tsk->stime, tsk->se.sum_exec_runtime); 477 tsk->utime, tsk->stime, tsk->se.sum_exec_runtime);
475 478
diff --git a/kernel/printk.c b/kernel/printk.c
index 19c0d7bcf24a..357f714ddd49 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -870,10 +870,11 @@ static size_t print_time(u64 ts, char *buf)
870 if (!printk_time) 870 if (!printk_time)
871 return 0; 871 return 0;
872 872
873 rem_nsec = do_div(ts, 1000000000);
874
873 if (!buf) 875 if (!buf)
874 return 15; 876 return snprintf(NULL, 0, "[%5lu.000000] ", (unsigned long)ts);
875 877
876 rem_nsec = do_div(ts, 1000000000);
877 return sprintf(buf, "[%5lu.%06lu] ", 878 return sprintf(buf, "[%5lu.%06lu] ",
878 (unsigned long)ts, rem_nsec / 1000); 879 (unsigned long)ts, rem_nsec / 1000);
879} 880}
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 1599157336a6..6cbeaae4406d 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -117,11 +117,45 @@ void __ptrace_unlink(struct task_struct *child)
117 * TASK_KILLABLE sleeps. 117 * TASK_KILLABLE sleeps.
118 */ 118 */
119 if (child->jobctl & JOBCTL_STOP_PENDING || task_is_traced(child)) 119 if (child->jobctl & JOBCTL_STOP_PENDING || task_is_traced(child))
120 signal_wake_up(child, task_is_traced(child)); 120 ptrace_signal_wake_up(child, true);
121 121
122 spin_unlock(&child->sighand->siglock); 122 spin_unlock(&child->sighand->siglock);
123} 123}
124 124
125/* Ensure that nothing can wake it up, even SIGKILL */
126static bool ptrace_freeze_traced(struct task_struct *task)
127{
128 bool ret = false;
129
130 /* Lockless, nobody but us can set this flag */
131 if (task->jobctl & JOBCTL_LISTENING)
132 return ret;
133
134 spin_lock_irq(&task->sighand->siglock);
135 if (task_is_traced(task) && !__fatal_signal_pending(task)) {
136 task->state = __TASK_TRACED;
137 ret = true;
138 }
139 spin_unlock_irq(&task->sighand->siglock);
140
141 return ret;
142}
143
144static void ptrace_unfreeze_traced(struct task_struct *task)
145{
146 if (task->state != __TASK_TRACED)
147 return;
148
149 WARN_ON(!task->ptrace || task->parent != current);
150
151 spin_lock_irq(&task->sighand->siglock);
152 if (__fatal_signal_pending(task))
153 wake_up_state(task, __TASK_TRACED);
154 else
155 task->state = TASK_TRACED;
156 spin_unlock_irq(&task->sighand->siglock);
157}
158
125/** 159/**
126 * ptrace_check_attach - check whether ptracee is ready for ptrace operation 160 * ptrace_check_attach - check whether ptracee is ready for ptrace operation
127 * @child: ptracee to check for 161 * @child: ptracee to check for
@@ -139,7 +173,7 @@ void __ptrace_unlink(struct task_struct *child)
139 * RETURNS: 173 * RETURNS:
140 * 0 on success, -ESRCH if %child is not ready. 174 * 0 on success, -ESRCH if %child is not ready.
141 */ 175 */
142int ptrace_check_attach(struct task_struct *child, bool ignore_state) 176static int ptrace_check_attach(struct task_struct *child, bool ignore_state)
143{ 177{
144 int ret = -ESRCH; 178 int ret = -ESRCH;
145 179
@@ -151,24 +185,29 @@ int ptrace_check_attach(struct task_struct *child, bool ignore_state)
151 * be changed by us so it's not changing right after this. 185 * be changed by us so it's not changing right after this.
152 */ 186 */
153 read_lock(&tasklist_lock); 187 read_lock(&tasklist_lock);
154 if ((child->ptrace & PT_PTRACED) && child->parent == current) { 188 if (child->ptrace && child->parent == current) {
189 WARN_ON(child->state == __TASK_TRACED);
155 /* 190 /*
156 * child->sighand can't be NULL, release_task() 191 * child->sighand can't be NULL, release_task()
157 * does ptrace_unlink() before __exit_signal(). 192 * does ptrace_unlink() before __exit_signal().
158 */ 193 */
159 spin_lock_irq(&child->sighand->siglock); 194 if (ignore_state || ptrace_freeze_traced(child))
160 WARN_ON_ONCE(task_is_stopped(child));
161 if (ignore_state || (task_is_traced(child) &&
162 !(child->jobctl & JOBCTL_LISTENING)))
163 ret = 0; 195 ret = 0;
164 spin_unlock_irq(&child->sighand->siglock);
165 } 196 }
166 read_unlock(&tasklist_lock); 197 read_unlock(&tasklist_lock);
167 198
168 if (!ret && !ignore_state) 199 if (!ret && !ignore_state) {
169 ret = wait_task_inactive(child, TASK_TRACED) ? 0 : -ESRCH; 200 if (!wait_task_inactive(child, __TASK_TRACED)) {
201 /*
202 * This can only happen if may_ptrace_stop() fails and
203 * ptrace_stop() changes ->state back to TASK_RUNNING,
204 * so we should not worry about leaking __TASK_TRACED.
205 */
206 WARN_ON(child->state == __TASK_TRACED);
207 ret = -ESRCH;
208 }
209 }
170 210
171 /* All systems go.. */
172 return ret; 211 return ret;
173} 212}
174 213
@@ -317,7 +356,7 @@ static int ptrace_attach(struct task_struct *task, long request,
317 */ 356 */
318 if (task_is_stopped(task) && 357 if (task_is_stopped(task) &&
319 task_set_jobctl_pending(task, JOBCTL_TRAP_STOP | JOBCTL_TRAPPING)) 358 task_set_jobctl_pending(task, JOBCTL_TRAP_STOP | JOBCTL_TRAPPING))
320 signal_wake_up(task, 1); 359 signal_wake_up_state(task, __TASK_STOPPED);
321 360
322 spin_unlock(&task->sighand->siglock); 361 spin_unlock(&task->sighand->siglock);
323 362
@@ -737,7 +776,7 @@ int ptrace_request(struct task_struct *child, long request,
737 * tracee into STOP. 776 * tracee into STOP.
738 */ 777 */
739 if (likely(task_set_jobctl_pending(child, JOBCTL_TRAP_STOP))) 778 if (likely(task_set_jobctl_pending(child, JOBCTL_TRAP_STOP)))
740 signal_wake_up(child, child->jobctl & JOBCTL_LISTENING); 779 ptrace_signal_wake_up(child, child->jobctl & JOBCTL_LISTENING);
741 780
742 unlock_task_sighand(child, &flags); 781 unlock_task_sighand(child, &flags);
743 ret = 0; 782 ret = 0;
@@ -763,7 +802,7 @@ int ptrace_request(struct task_struct *child, long request,
763 * start of this trap and now. Trigger re-trap. 802 * start of this trap and now. Trigger re-trap.
764 */ 803 */
765 if (child->jobctl & JOBCTL_TRAP_NOTIFY) 804 if (child->jobctl & JOBCTL_TRAP_NOTIFY)
766 signal_wake_up(child, true); 805 ptrace_signal_wake_up(child, true);
767 ret = 0; 806 ret = 0;
768 } 807 }
769 unlock_task_sighand(child, &flags); 808 unlock_task_sighand(child, &flags);
@@ -900,6 +939,8 @@ SYSCALL_DEFINE4(ptrace, long, request, long, pid, unsigned long, addr,
900 goto out_put_task_struct; 939 goto out_put_task_struct;
901 940
902 ret = arch_ptrace(child, request, addr, data); 941 ret = arch_ptrace(child, request, addr, data);
942 if (ret || request != PTRACE_DETACH)
943 ptrace_unfreeze_traced(child);
903 944
904 out_put_task_struct: 945 out_put_task_struct:
905 put_task_struct(child); 946 put_task_struct(child);
@@ -1039,8 +1080,11 @@ asmlinkage long compat_sys_ptrace(compat_long_t request, compat_long_t pid,
1039 1080
1040 ret = ptrace_check_attach(child, request == PTRACE_KILL || 1081 ret = ptrace_check_attach(child, request == PTRACE_KILL ||
1041 request == PTRACE_INTERRUPT); 1082 request == PTRACE_INTERRUPT);
1042 if (!ret) 1083 if (!ret) {
1043 ret = compat_arch_ptrace(child, request, addr, data); 1084 ret = compat_arch_ptrace(child, request, addr, data);
1085 if (ret || request != PTRACE_DETACH)
1086 ptrace_unfreeze_traced(child);
1087 }
1044 1088
1045 out_put_task_struct: 1089 out_put_task_struct:
1046 put_task_struct(child); 1090 put_task_struct(child);
diff --git a/kernel/res_counter.c b/kernel/res_counter.c
index 3920d593e63c..ff55247e7049 100644
--- a/kernel/res_counter.c
+++ b/kernel/res_counter.c
@@ -86,33 +86,39 @@ int res_counter_charge_nofail(struct res_counter *counter, unsigned long val,
86 return __res_counter_charge(counter, val, limit_fail_at, true); 86 return __res_counter_charge(counter, val, limit_fail_at, true);
87} 87}
88 88
89void res_counter_uncharge_locked(struct res_counter *counter, unsigned long val) 89u64 res_counter_uncharge_locked(struct res_counter *counter, unsigned long val)
90{ 90{
91 if (WARN_ON(counter->usage < val)) 91 if (WARN_ON(counter->usage < val))
92 val = counter->usage; 92 val = counter->usage;
93 93
94 counter->usage -= val; 94 counter->usage -= val;
95 return counter->usage;
95} 96}
96 97
97void res_counter_uncharge_until(struct res_counter *counter, 98u64 res_counter_uncharge_until(struct res_counter *counter,
98 struct res_counter *top, 99 struct res_counter *top,
99 unsigned long val) 100 unsigned long val)
100{ 101{
101 unsigned long flags; 102 unsigned long flags;
102 struct res_counter *c; 103 struct res_counter *c;
104 u64 ret = 0;
103 105
104 local_irq_save(flags); 106 local_irq_save(flags);
105 for (c = counter; c != top; c = c->parent) { 107 for (c = counter; c != top; c = c->parent) {
108 u64 r;
106 spin_lock(&c->lock); 109 spin_lock(&c->lock);
107 res_counter_uncharge_locked(c, val); 110 r = res_counter_uncharge_locked(c, val);
111 if (c == counter)
112 ret = r;
108 spin_unlock(&c->lock); 113 spin_unlock(&c->lock);
109 } 114 }
110 local_irq_restore(flags); 115 local_irq_restore(flags);
116 return ret;
111} 117}
112 118
113void res_counter_uncharge(struct res_counter *counter, unsigned long val) 119u64 res_counter_uncharge(struct res_counter *counter, unsigned long val)
114{ 120{
115 res_counter_uncharge_until(counter, NULL, val); 121 return res_counter_uncharge_until(counter, NULL, val);
116} 122}
117 123
118static inline unsigned long long * 124static inline unsigned long long *
diff --git a/kernel/rwsem.c b/kernel/rwsem.c
index 6850f53e02d8..b3c6c3fcd847 100644
--- a/kernel/rwsem.c
+++ b/kernel/rwsem.c
@@ -116,6 +116,16 @@ void down_read_nested(struct rw_semaphore *sem, int subclass)
116 116
117EXPORT_SYMBOL(down_read_nested); 117EXPORT_SYMBOL(down_read_nested);
118 118
119void _down_write_nest_lock(struct rw_semaphore *sem, struct lockdep_map *nest)
120{
121 might_sleep();
122 rwsem_acquire_nest(&sem->dep_map, 0, 0, nest, _RET_IP_);
123
124 LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
125}
126
127EXPORT_SYMBOL(_down_write_nest_lock);
128
119void down_write_nested(struct rw_semaphore *sem, int subclass) 129void down_write_nested(struct rw_semaphore *sem, int subclass)
120{ 130{
121 might_sleep(); 131 might_sleep();
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index c6737f4fb63b..bfe8ae22f710 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1523,7 +1523,8 @@ out:
1523 */ 1523 */
1524int wake_up_process(struct task_struct *p) 1524int wake_up_process(struct task_struct *p)
1525{ 1525{
1526 return try_to_wake_up(p, TASK_ALL, 0); 1526 WARN_ON(task_is_stopped_or_traced(p));
1527 return try_to_wake_up(p, TASK_NORMAL, 0);
1527} 1528}
1528EXPORT_SYMBOL(wake_up_process); 1529EXPORT_SYMBOL(wake_up_process);
1529 1530
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 4603d6cb9e25..5eea8707234a 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -793,8 +793,11 @@ unsigned int sysctl_numa_balancing_scan_delay = 1000;
793 793
794static void task_numa_placement(struct task_struct *p) 794static void task_numa_placement(struct task_struct *p)
795{ 795{
796 int seq = ACCESS_ONCE(p->mm->numa_scan_seq); 796 int seq;
797 797
798 if (!p->mm) /* for example, ksmd faulting in a user's mm */
799 return;
800 seq = ACCESS_ONCE(p->mm->numa_scan_seq);
798 if (p->numa_scan_seq == seq) 801 if (p->numa_scan_seq == seq)
799 return; 802 return;
800 p->numa_scan_seq = seq; 803 p->numa_scan_seq = seq;
diff --git a/kernel/signal.c b/kernel/signal.c
index 580a91e63471..3d09cf6cde75 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -31,6 +31,7 @@
31#include <linux/nsproxy.h> 31#include <linux/nsproxy.h>
32#include <linux/user_namespace.h> 32#include <linux/user_namespace.h>
33#include <linux/uprobes.h> 33#include <linux/uprobes.h>
34#include <linux/compat.h>
34#define CREATE_TRACE_POINTS 35#define CREATE_TRACE_POINTS
35#include <trace/events/signal.h> 36#include <trace/events/signal.h>
36 37
@@ -679,23 +680,17 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info)
679 * No need to set need_resched since signal event passing 680 * No need to set need_resched since signal event passing
680 * goes through ->blocked 681 * goes through ->blocked
681 */ 682 */
682void signal_wake_up(struct task_struct *t, int resume) 683void signal_wake_up_state(struct task_struct *t, unsigned int state)
683{ 684{
684 unsigned int mask;
685
686 set_tsk_thread_flag(t, TIF_SIGPENDING); 685 set_tsk_thread_flag(t, TIF_SIGPENDING);
687
688 /* 686 /*
689 * For SIGKILL, we want to wake it up in the stopped/traced/killable 687 * TASK_WAKEKILL also means wake it up in the stopped/traced/killable
690 * case. We don't check t->state here because there is a race with it 688 * case. We don't check t->state here because there is a race with it
691 * executing another processor and just now entering stopped state. 689 * executing another processor and just now entering stopped state.
692 * By using wake_up_state, we ensure the process will wake up and 690 * By using wake_up_state, we ensure the process will wake up and
693 * handle its death signal. 691 * handle its death signal.
694 */ 692 */
695 mask = TASK_INTERRUPTIBLE; 693 if (!wake_up_state(t, state | TASK_INTERRUPTIBLE))
696 if (resume)
697 mask |= TASK_WAKEKILL;
698 if (!wake_up_state(t, mask))
699 kick_process(t); 694 kick_process(t);
700} 695}
701 696
@@ -843,7 +838,7 @@ static void ptrace_trap_notify(struct task_struct *t)
843 assert_spin_locked(&t->sighand->siglock); 838 assert_spin_locked(&t->sighand->siglock);
844 839
845 task_set_jobctl_pending(t, JOBCTL_TRAP_NOTIFY); 840 task_set_jobctl_pending(t, JOBCTL_TRAP_NOTIFY);
846 signal_wake_up(t, t->jobctl & JOBCTL_LISTENING); 841 ptrace_signal_wake_up(t, t->jobctl & JOBCTL_LISTENING);
847} 842}
848 843
849/* 844/*
@@ -1799,6 +1794,10 @@ static inline int may_ptrace_stop(void)
1799 * If SIGKILL was already sent before the caller unlocked 1794 * If SIGKILL was already sent before the caller unlocked
1800 * ->siglock we must see ->core_state != NULL. Otherwise it 1795 * ->siglock we must see ->core_state != NULL. Otherwise it
1801 * is safe to enter schedule(). 1796 * is safe to enter schedule().
1797 *
1798 * This is almost outdated, a task with the pending SIGKILL can't
1799 * block in TASK_TRACED. But PTRACE_EVENT_EXIT can be reported
1800 * after SIGKILL was already dequeued.
1802 */ 1801 */
1803 if (unlikely(current->mm->core_state) && 1802 if (unlikely(current->mm->core_state) &&
1804 unlikely(current->mm == current->parent->mm)) 1803 unlikely(current->mm == current->parent->mm))
@@ -1924,6 +1923,7 @@ static void ptrace_stop(int exit_code, int why, int clear_code, siginfo_t *info)
1924 if (gstop_done) 1923 if (gstop_done)
1925 do_notify_parent_cldstop(current, false, why); 1924 do_notify_parent_cldstop(current, false, why);
1926 1925
1926 /* tasklist protects us from ptrace_freeze_traced() */
1927 __set_current_state(TASK_RUNNING); 1927 __set_current_state(TASK_RUNNING);
1928 if (clear_code) 1928 if (clear_code)
1929 current->exit_code = 0; 1929 current->exit_code = 0;
@@ -2527,11 +2527,8 @@ static void __set_task_blocked(struct task_struct *tsk, const sigset_t *newset)
2527 */ 2527 */
2528void set_current_blocked(sigset_t *newset) 2528void set_current_blocked(sigset_t *newset)
2529{ 2529{
2530 struct task_struct *tsk = current;
2531 sigdelsetmask(newset, sigmask(SIGKILL) | sigmask(SIGSTOP)); 2530 sigdelsetmask(newset, sigmask(SIGKILL) | sigmask(SIGSTOP));
2532 spin_lock_irq(&tsk->sighand->siglock); 2531 __set_current_blocked(newset);
2533 __set_task_blocked(tsk, newset);
2534 spin_unlock_irq(&tsk->sighand->siglock);
2535} 2532}
2536 2533
2537void __set_current_blocked(const sigset_t *newset) 2534void __set_current_blocked(const sigset_t *newset)
@@ -3094,6 +3091,80 @@ do_sigaltstack (const stack_t __user *uss, stack_t __user *uoss, unsigned long s
3094out: 3091out:
3095 return error; 3092 return error;
3096} 3093}
3094#ifdef CONFIG_GENERIC_SIGALTSTACK
3095SYSCALL_DEFINE2(sigaltstack,const stack_t __user *,uss, stack_t __user *,uoss)
3096{
3097 return do_sigaltstack(uss, uoss, current_user_stack_pointer());
3098}
3099#endif
3100
3101int restore_altstack(const stack_t __user *uss)
3102{
3103 int err = do_sigaltstack(uss, NULL, current_user_stack_pointer());
3104 /* squash all but EFAULT for now */
3105 return err == -EFAULT ? err : 0;
3106}
3107
3108int __save_altstack(stack_t __user *uss, unsigned long sp)
3109{
3110 struct task_struct *t = current;
3111 return __put_user((void __user *)t->sas_ss_sp, &uss->ss_sp) |
3112 __put_user(sas_ss_flags(sp), &uss->ss_flags) |
3113 __put_user(t->sas_ss_size, &uss->ss_size);
3114}
3115
3116#ifdef CONFIG_COMPAT
3117#ifdef CONFIG_GENERIC_SIGALTSTACK
3118COMPAT_SYSCALL_DEFINE2(sigaltstack,
3119 const compat_stack_t __user *, uss_ptr,
3120 compat_stack_t __user *, uoss_ptr)
3121{
3122 stack_t uss, uoss;
3123 int ret;
3124 mm_segment_t seg;
3125
3126 if (uss_ptr) {
3127 compat_stack_t uss32;
3128
3129 memset(&uss, 0, sizeof(stack_t));
3130 if (copy_from_user(&uss32, uss_ptr, sizeof(compat_stack_t)))
3131 return -EFAULT;
3132 uss.ss_sp = compat_ptr(uss32.ss_sp);
3133 uss.ss_flags = uss32.ss_flags;
3134 uss.ss_size = uss32.ss_size;
3135 }
3136 seg = get_fs();
3137 set_fs(KERNEL_DS);
3138 ret = do_sigaltstack((stack_t __force __user *) (uss_ptr ? &uss : NULL),
3139 (stack_t __force __user *) &uoss,
3140 compat_user_stack_pointer());
3141 set_fs(seg);
3142 if (ret >= 0 && uoss_ptr) {
3143 if (!access_ok(VERIFY_WRITE, uoss_ptr, sizeof(compat_stack_t)) ||
3144 __put_user(ptr_to_compat(uoss.ss_sp), &uoss_ptr->ss_sp) ||
3145 __put_user(uoss.ss_flags, &uoss_ptr->ss_flags) ||
3146 __put_user(uoss.ss_size, &uoss_ptr->ss_size))
3147 ret = -EFAULT;
3148 }
3149 return ret;
3150}
3151
3152int compat_restore_altstack(const compat_stack_t __user *uss)
3153{
3154 int err = compat_sys_sigaltstack(uss, NULL);
3155 /* squash all but -EFAULT for now */
3156 return err == -EFAULT ? err : 0;
3157}
3158
3159int __compat_save_altstack(compat_stack_t __user *uss, unsigned long sp)
3160{
3161 struct task_struct *t = current;
3162 return __put_user(ptr_to_compat((void __user *)t->sas_ss_sp), &uss->ss_sp) |
3163 __put_user(sas_ss_flags(sp), &uss->ss_flags) |
3164 __put_user(t->sas_ss_size, &uss->ss_size);
3165}
3166#endif
3167#endif
3097 3168
3098#ifdef __ARCH_WANT_SYS_SIGPENDING 3169#ifdef __ARCH_WANT_SYS_SIGPENDING
3099 3170
@@ -3130,7 +3201,6 @@ SYSCALL_DEFINE3(sigprocmask, int, how, old_sigset_t __user *, nset,
3130 if (nset) { 3201 if (nset) {
3131 if (copy_from_user(&new_set, nset, sizeof(*nset))) 3202 if (copy_from_user(&new_set, nset, sizeof(*nset)))
3132 return -EFAULT; 3203 return -EFAULT;
3133 new_set &= ~(sigmask(SIGKILL) | sigmask(SIGSTOP));
3134 3204
3135 new_blocked = current->blocked; 3205 new_blocked = current->blocked;
3136 3206
@@ -3148,7 +3218,7 @@ SYSCALL_DEFINE3(sigprocmask, int, how, old_sigset_t __user *, nset,
3148 return -EINVAL; 3218 return -EINVAL;
3149 } 3219 }
3150 3220
3151 __set_current_blocked(&new_blocked); 3221 set_current_blocked(&new_blocked);
3152 } 3222 }
3153 3223
3154 if (oset) { 3224 if (oset) {
@@ -3212,6 +3282,7 @@ SYSCALL_DEFINE1(ssetmask, int, newmask)
3212 int old = current->blocked.sig[0]; 3282 int old = current->blocked.sig[0];
3213 sigset_t newset; 3283 sigset_t newset;
3214 3284
3285 siginitset(&newset, newmask);
3215 set_current_blocked(&newset); 3286 set_current_blocked(&newset);
3216 3287
3217 return old; 3288 return old;
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index dbff751e4086..395084d4ce16 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -25,6 +25,7 @@ cond_syscall(sys_swapoff);
25cond_syscall(sys_kexec_load); 25cond_syscall(sys_kexec_load);
26cond_syscall(compat_sys_kexec_load); 26cond_syscall(compat_sys_kexec_load);
27cond_syscall(sys_init_module); 27cond_syscall(sys_init_module);
28cond_syscall(sys_finit_module);
28cond_syscall(sys_delete_module); 29cond_syscall(sys_delete_module);
29cond_syscall(sys_socketpair); 30cond_syscall(sys_socketpair);
30cond_syscall(sys_bind); 31cond_syscall(sys_bind);
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 3ffe4c5ad3f3..41473b4ad7a4 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -3998,7 +3998,7 @@ static int ftrace_module_notify(struct notifier_block *self,
3998 3998
3999struct notifier_block ftrace_module_nb = { 3999struct notifier_block ftrace_module_nb = {
4000 .notifier_call = ftrace_module_notify, 4000 .notifier_call = ftrace_module_notify,
4001 .priority = 0, 4001 .priority = INT_MAX, /* Run before anything that can use kprobes */
4002}; 4002};
4003 4003
4004extern unsigned long __start_mcount_loc[]; 4004extern unsigned long __start_mcount_loc[];
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 61e081b4ba11..3c13e46d7d24 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2899,6 +2899,8 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf,
2899 if (copy_from_user(&buf, ubuf, cnt)) 2899 if (copy_from_user(&buf, ubuf, cnt))
2900 return -EFAULT; 2900 return -EFAULT;
2901 2901
2902 buf[cnt] = 0;
2903
2902 trace_set_options(buf); 2904 trace_set_options(buf);
2903 2905
2904 *ppos += cnt; 2906 *ppos += cnt;
@@ -3034,6 +3036,31 @@ static void set_buffer_entries(struct trace_array *tr, unsigned long val)
3034 tr->data[cpu]->entries = val; 3036 tr->data[cpu]->entries = val;
3035} 3037}
3036 3038
3039/* resize @tr's buffer to the size of @size_tr's entries */
3040static int resize_buffer_duplicate_size(struct trace_array *tr,
3041 struct trace_array *size_tr, int cpu_id)
3042{
3043 int cpu, ret = 0;
3044
3045 if (cpu_id == RING_BUFFER_ALL_CPUS) {
3046 for_each_tracing_cpu(cpu) {
3047 ret = ring_buffer_resize(tr->buffer,
3048 size_tr->data[cpu]->entries, cpu);
3049 if (ret < 0)
3050 break;
3051 tr->data[cpu]->entries = size_tr->data[cpu]->entries;
3052 }
3053 } else {
3054 ret = ring_buffer_resize(tr->buffer,
3055 size_tr->data[cpu_id]->entries, cpu_id);
3056 if (ret == 0)
3057 tr->data[cpu_id]->entries =
3058 size_tr->data[cpu_id]->entries;
3059 }
3060
3061 return ret;
3062}
3063
3037static int __tracing_resize_ring_buffer(unsigned long size, int cpu) 3064static int __tracing_resize_ring_buffer(unsigned long size, int cpu)
3038{ 3065{
3039 int ret; 3066 int ret;
@@ -3058,23 +3085,8 @@ static int __tracing_resize_ring_buffer(unsigned long size, int cpu)
3058 3085
3059 ret = ring_buffer_resize(max_tr.buffer, size, cpu); 3086 ret = ring_buffer_resize(max_tr.buffer, size, cpu);
3060 if (ret < 0) { 3087 if (ret < 0) {
3061 int r = 0; 3088 int r = resize_buffer_duplicate_size(&global_trace,
3062 3089 &global_trace, cpu);
3063 if (cpu == RING_BUFFER_ALL_CPUS) {
3064 int i;
3065 for_each_tracing_cpu(i) {
3066 r = ring_buffer_resize(global_trace.buffer,
3067 global_trace.data[i]->entries,
3068 i);
3069 if (r < 0)
3070 break;
3071 }
3072 } else {
3073 r = ring_buffer_resize(global_trace.buffer,
3074 global_trace.data[cpu]->entries,
3075 cpu);
3076 }
3077
3078 if (r < 0) { 3090 if (r < 0) {
3079 /* 3091 /*
3080 * AARGH! We are left with different 3092 * AARGH! We are left with different
@@ -3212,17 +3224,11 @@ static int tracing_set_tracer(const char *buf)
3212 3224
3213 topts = create_trace_option_files(t); 3225 topts = create_trace_option_files(t);
3214 if (t->use_max_tr) { 3226 if (t->use_max_tr) {
3215 int cpu;
3216 /* we need to make per cpu buffer sizes equivalent */ 3227 /* we need to make per cpu buffer sizes equivalent */
3217 for_each_tracing_cpu(cpu) { 3228 ret = resize_buffer_duplicate_size(&max_tr, &global_trace,
3218 ret = ring_buffer_resize(max_tr.buffer, 3229 RING_BUFFER_ALL_CPUS);
3219 global_trace.data[cpu]->entries, 3230 if (ret < 0)
3220 cpu); 3231 goto out;
3221 if (ret < 0)
3222 goto out;
3223 max_tr.data[cpu]->entries =
3224 global_trace.data[cpu]->entries;
3225 }
3226 } 3232 }
3227 3233
3228 if (t->init) { 3234 if (t->init) {
@@ -3448,7 +3454,7 @@ static int tracing_wait_pipe(struct file *filp)
3448 return -EINTR; 3454 return -EINTR;
3449 3455
3450 /* 3456 /*
3451 * We block until we read something and tracing is enabled. 3457 * We block until we read something and tracing is disabled.
3452 * We still block if tracing is disabled, but we have never 3458 * We still block if tracing is disabled, but we have never
3453 * read anything. This allows a user to cat this file, and 3459 * read anything. This allows a user to cat this file, and
3454 * then enable tracing. But after we have read something, 3460 * then enable tracing. But after we have read something,
@@ -3456,7 +3462,7 @@ static int tracing_wait_pipe(struct file *filp)
3456 * 3462 *
3457 * iter->pos will be 0 if we haven't read anything. 3463 * iter->pos will be 0 if we haven't read anything.
3458 */ 3464 */
3459 if (tracing_is_enabled() && iter->pos) 3465 if (!tracing_is_enabled() && iter->pos)
3460 break; 3466 break;
3461 } 3467 }
3462 3468
@@ -4271,13 +4277,11 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
4271 return -ENOMEM; 4277 return -ENOMEM;
4272 4278
4273 if (*ppos & (PAGE_SIZE - 1)) { 4279 if (*ppos & (PAGE_SIZE - 1)) {
4274 WARN_ONCE(1, "Ftrace: previous read must page-align\n");
4275 ret = -EINVAL; 4280 ret = -EINVAL;
4276 goto out; 4281 goto out;
4277 } 4282 }
4278 4283
4279 if (len & (PAGE_SIZE - 1)) { 4284 if (len & (PAGE_SIZE - 1)) {
4280 WARN_ONCE(1, "Ftrace: splice_read should page-align\n");
4281 if (len < PAGE_SIZE) { 4285 if (len < PAGE_SIZE) {
4282 ret = -EINVAL; 4286 ret = -EINVAL;
4283 goto out; 4287 goto out;
@@ -4813,10 +4817,17 @@ rb_simple_write(struct file *filp, const char __user *ubuf,
4813 return ret; 4817 return ret;
4814 4818
4815 if (buffer) { 4819 if (buffer) {
4816 if (val) 4820 mutex_lock(&trace_types_lock);
4821 if (val) {
4817 ring_buffer_record_on(buffer); 4822 ring_buffer_record_on(buffer);
4818 else 4823 if (current_trace->start)
4824 current_trace->start(tr);
4825 } else {
4819 ring_buffer_record_off(buffer); 4826 ring_buffer_record_off(buffer);
4827 if (current_trace->stop)
4828 current_trace->stop(tr);
4829 }
4830 mutex_unlock(&trace_types_lock);
4820 } 4831 }
4821 4832
4822 (*ppos)++; 4833 (*ppos)++;
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 0c1b165778e5..42ca822fc701 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -33,7 +33,6 @@ static unsigned long max_stack_size;
33static arch_spinlock_t max_stack_lock = 33static arch_spinlock_t max_stack_lock =
34 (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; 34 (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
35 35
36static int stack_trace_disabled __read_mostly;
37static DEFINE_PER_CPU(int, trace_active); 36static DEFINE_PER_CPU(int, trace_active);
38static DEFINE_MUTEX(stack_sysctl_mutex); 37static DEFINE_MUTEX(stack_sysctl_mutex);
39 38
@@ -116,9 +115,6 @@ stack_trace_call(unsigned long ip, unsigned long parent_ip,
116{ 115{
117 int cpu; 116 int cpu;
118 117
119 if (unlikely(!ftrace_enabled || stack_trace_disabled))
120 return;
121
122 preempt_disable_notrace(); 118 preempt_disable_notrace();
123 119
124 cpu = raw_smp_processor_id(); 120 cpu = raw_smp_processor_id();
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index f5975ccf9348..2b042c42fbc4 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -799,7 +799,7 @@ static int userns_install(struct nsproxy *nsproxy, void *ns)
799 if (user_ns == current_user_ns()) 799 if (user_ns == current_user_ns())
800 return -EINVAL; 800 return -EINVAL;
801 801
802 /* Threaded many not enter a different user namespace */ 802 /* Threaded processes may not enter a different user namespace */
803 if (atomic_read(&current->mm->mm_users) > 1) 803 if (atomic_read(&current->mm->mm_users) > 1)
804 return -EINVAL; 804 return -EINVAL;
805 805
diff --git a/kernel/utsname.c b/kernel/utsname.c
index f6336d51d64c..08b197e8c485 100644
--- a/kernel/utsname.c
+++ b/kernel/utsname.c
@@ -113,7 +113,8 @@ static int utsns_install(struct nsproxy *nsproxy, void *new)
113{ 113{
114 struct uts_namespace *ns = new; 114 struct uts_namespace *ns = new;
115 115
116 if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN)) 116 if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN) ||
117 !nsown_capable(CAP_SYS_ADMIN))
117 return -EPERM; 118 return -EPERM;
118 119
119 get_uts_ns(ns); 120 get_uts_ns(ns);
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 997c6a16ec22..75a2ab3d0b02 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -344,6 +344,10 @@ static void watchdog_enable(unsigned int cpu)
344{ 344{
345 struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer); 345 struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer);
346 346
347 /* kick off the timer for the hardlockup detector */
348 hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
349 hrtimer->function = watchdog_timer_fn;
350
347 if (!watchdog_enabled) { 351 if (!watchdog_enabled) {
348 kthread_park(current); 352 kthread_park(current);
349 return; 353 return;
@@ -352,10 +356,6 @@ static void watchdog_enable(unsigned int cpu)
352 /* Enable the perf event */ 356 /* Enable the perf event */
353 watchdog_nmi_enable(cpu); 357 watchdog_nmi_enable(cpu);
354 358
355 /* kick off the timer for the hardlockup detector */
356 hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
357 hrtimer->function = watchdog_timer_fn;
358
359 /* done here because hrtimer_start can only pin to smp_processor_id() */ 359 /* done here because hrtimer_start can only pin to smp_processor_id() */
360 hrtimer_start(hrtimer, ns_to_ktime(sample_period), 360 hrtimer_start(hrtimer, ns_to_ktime(sample_period),
361 HRTIMER_MODE_REL_PINNED); 361 HRTIMER_MODE_REL_PINNED);
@@ -369,9 +369,6 @@ static void watchdog_disable(unsigned int cpu)
369{ 369{
370 struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer); 370 struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer);
371 371
372 if (!watchdog_enabled)
373 return;
374
375 watchdog_set_prio(SCHED_NORMAL, 0); 372 watchdog_set_prio(SCHED_NORMAL, 0);
376 hrtimer_cancel(hrtimer); 373 hrtimer_cancel(hrtimer);
377 /* disable the perf event */ 374 /* disable the perf event */