diff options
author | Tejun Heo <tj@kernel.org> | 2013-01-23 12:31:01 -0500 |
---|---|---|
committer | Tejun Heo <tj@kernel.org> | 2013-01-23 12:31:01 -0500 |
commit | c14afb82ffff5903a701a9fb737ac20f36d1f755 (patch) | |
tree | 304dcc7b1d7b9a5f564f7e978228e61ef41fbef2 /kernel | |
parent | 0fdff3ec6d87856cdcc99e69cf42143fdd6c56b4 (diff) | |
parent | 1d8549085377674224bf30a368284c391a3ce40e (diff) |
Merge branch 'master' into for-3.9-async
To receive f56c3196f251012de9b3ebaff55732a9074fdaae ("async: fix
__lowest_in_progress()").
Signed-off-by: Tejun Heo <tj@kernel.org>
Diffstat (limited to 'kernel')
34 files changed, 832 insertions, 389 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index ac0d533eb7de..6c072b6da239 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
@@ -54,7 +54,7 @@ obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o | |||
54 | obj-$(CONFIG_PROVE_LOCKING) += spinlock.o | 54 | obj-$(CONFIG_PROVE_LOCKING) += spinlock.o |
55 | obj-$(CONFIG_UID16) += uid16.o | 55 | obj-$(CONFIG_UID16) += uid16.o |
56 | obj-$(CONFIG_MODULES) += module.o | 56 | obj-$(CONFIG_MODULES) += module.o |
57 | obj-$(CONFIG_MODULE_SIG) += module_signing.o modsign_pubkey.o | 57 | obj-$(CONFIG_MODULE_SIG) += module_signing.o modsign_pubkey.o modsign_certificate.o |
58 | obj-$(CONFIG_KALLSYMS) += kallsyms.o | 58 | obj-$(CONFIG_KALLSYMS) += kallsyms.o |
59 | obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o | 59 | obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o |
60 | obj-$(CONFIG_KEXEC) += kexec.o | 60 | obj-$(CONFIG_KEXEC) += kexec.o |
@@ -137,10 +137,14 @@ ifeq ($(CONFIG_MODULE_SIG),y) | |||
137 | # | 137 | # |
138 | # Pull the signing certificate and any extra certificates into the kernel | 138 | # Pull the signing certificate and any extra certificates into the kernel |
139 | # | 139 | # |
140 | |||
141 | quiet_cmd_touch = TOUCH $@ | ||
142 | cmd_touch = touch $@ | ||
143 | |||
140 | extra_certificates: | 144 | extra_certificates: |
141 | touch $@ | 145 | $(call cmd,touch) |
142 | 146 | ||
143 | kernel/modsign_pubkey.o: signing_key.x509 extra_certificates | 147 | kernel/modsign_certificate.o: signing_key.x509 extra_certificates |
144 | 148 | ||
145 | ############################################################################### | 149 | ############################################################################### |
146 | # | 150 | # |
diff --git a/kernel/async.c b/kernel/async.c index d9bf2a9b5cee..6c68fc3fae7b 100644 --- a/kernel/async.c +++ b/kernel/async.c | |||
@@ -88,18 +88,27 @@ static atomic_t entry_count; | |||
88 | */ | 88 | */ |
89 | static async_cookie_t __lowest_in_progress(struct async_domain *running) | 89 | static async_cookie_t __lowest_in_progress(struct async_domain *running) |
90 | { | 90 | { |
91 | async_cookie_t first_running = next_cookie; /* infinity value */ | ||
92 | async_cookie_t first_pending = next_cookie; /* ditto */ | ||
91 | struct async_entry *entry; | 93 | struct async_entry *entry; |
92 | 94 | ||
95 | /* | ||
96 | * Both running and pending lists are sorted but not disjoint. | ||
97 | * Take the first cookies from both and return the min. | ||
98 | */ | ||
93 | if (!list_empty(&running->domain)) { | 99 | if (!list_empty(&running->domain)) { |
94 | entry = list_first_entry(&running->domain, typeof(*entry), list); | 100 | entry = list_first_entry(&running->domain, typeof(*entry), list); |
95 | return entry->cookie; | 101 | first_running = entry->cookie; |
96 | } | 102 | } |
97 | 103 | ||
98 | list_for_each_entry(entry, &async_pending, list) | 104 | list_for_each_entry(entry, &async_pending, list) { |
99 | if (entry->running == running) | 105 | if (entry->running == running) { |
100 | return entry->cookie; | 106 | first_pending = entry->cookie; |
107 | break; | ||
108 | } | ||
109 | } | ||
101 | 110 | ||
102 | return next_cookie; /* "infinity" value */ | 111 | return min(first_running, first_pending); |
103 | } | 112 | } |
104 | 113 | ||
105 | static async_cookie_t lowest_in_progress(struct async_domain *running) | 114 | static async_cookie_t lowest_in_progress(struct async_domain *running) |
@@ -120,13 +129,17 @@ static void async_run_entry_fn(struct work_struct *work) | |||
120 | { | 129 | { |
121 | struct async_entry *entry = | 130 | struct async_entry *entry = |
122 | container_of(work, struct async_entry, work); | 131 | container_of(work, struct async_entry, work); |
132 | struct async_entry *pos; | ||
123 | unsigned long flags; | 133 | unsigned long flags; |
124 | ktime_t uninitialized_var(calltime), delta, rettime; | 134 | ktime_t uninitialized_var(calltime), delta, rettime; |
125 | struct async_domain *running = entry->running; | 135 | struct async_domain *running = entry->running; |
126 | 136 | ||
127 | /* 1) move self to the running queue */ | 137 | /* 1) move self to the running queue, make sure it stays sorted */ |
128 | spin_lock_irqsave(&async_lock, flags); | 138 | spin_lock_irqsave(&async_lock, flags); |
129 | list_move_tail(&entry->list, &running->domain); | 139 | list_for_each_entry_reverse(pos, &running->domain, list) |
140 | if (entry->cookie < pos->cookie) | ||
141 | break; | ||
142 | list_move_tail(&entry->list, &pos->list); | ||
130 | spin_unlock_irqrestore(&async_lock, flags); | 143 | spin_unlock_irqrestore(&async_lock, flags); |
131 | 144 | ||
132 | /* 2) run (and print duration) */ | 145 | /* 2) run (and print duration) */ |
@@ -198,6 +211,9 @@ static async_cookie_t __async_schedule(async_func_ptr *ptr, void *data, struct a | |||
198 | atomic_inc(&entry_count); | 211 | atomic_inc(&entry_count); |
199 | spin_unlock_irqrestore(&async_lock, flags); | 212 | spin_unlock_irqrestore(&async_lock, flags); |
200 | 213 | ||
214 | /* mark that this task has queued an async job, used by module init */ | ||
215 | current->flags |= PF_USED_ASYNC; | ||
216 | |||
201 | /* schedule for execution */ | 217 | /* schedule for execution */ |
202 | queue_work(system_unbound_wq, &entry->work); | 218 | queue_work(system_unbound_wq, &entry->work); |
203 | 219 | ||
diff --git a/kernel/audit.c b/kernel/audit.c index 40414e9143db..d596e5355f15 100644 --- a/kernel/audit.c +++ b/kernel/audit.c | |||
@@ -272,6 +272,8 @@ static int audit_log_config_change(char *function_name, int new, int old, | |||
272 | int rc = 0; | 272 | int rc = 0; |
273 | 273 | ||
274 | ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE); | 274 | ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE); |
275 | if (unlikely(!ab)) | ||
276 | return rc; | ||
275 | audit_log_format(ab, "%s=%d old=%d auid=%u ses=%u", function_name, new, | 277 | audit_log_format(ab, "%s=%d old=%d auid=%u ses=%u", function_name, new, |
276 | old, from_kuid(&init_user_ns, loginuid), sessionid); | 278 | old, from_kuid(&init_user_ns, loginuid), sessionid); |
277 | if (sid) { | 279 | if (sid) { |
@@ -619,6 +621,8 @@ static int audit_log_common_recv_msg(struct audit_buffer **ab, u16 msg_type, | |||
619 | } | 621 | } |
620 | 622 | ||
621 | *ab = audit_log_start(NULL, GFP_KERNEL, msg_type); | 623 | *ab = audit_log_start(NULL, GFP_KERNEL, msg_type); |
624 | if (unlikely(!*ab)) | ||
625 | return rc; | ||
622 | audit_log_format(*ab, "pid=%d uid=%u auid=%u ses=%u", | 626 | audit_log_format(*ab, "pid=%d uid=%u auid=%u ses=%u", |
623 | task_tgid_vnr(current), | 627 | task_tgid_vnr(current), |
624 | from_kuid(&init_user_ns, current_uid()), | 628 | from_kuid(&init_user_ns, current_uid()), |
@@ -1097,6 +1101,23 @@ static inline void audit_get_stamp(struct audit_context *ctx, | |||
1097 | } | 1101 | } |
1098 | } | 1102 | } |
1099 | 1103 | ||
1104 | /* | ||
1105 | * Wait for auditd to drain the queue a little | ||
1106 | */ | ||
1107 | static void wait_for_auditd(unsigned long sleep_time) | ||
1108 | { | ||
1109 | DECLARE_WAITQUEUE(wait, current); | ||
1110 | set_current_state(TASK_INTERRUPTIBLE); | ||
1111 | add_wait_queue(&audit_backlog_wait, &wait); | ||
1112 | |||
1113 | if (audit_backlog_limit && | ||
1114 | skb_queue_len(&audit_skb_queue) > audit_backlog_limit) | ||
1115 | schedule_timeout(sleep_time); | ||
1116 | |||
1117 | __set_current_state(TASK_RUNNING); | ||
1118 | remove_wait_queue(&audit_backlog_wait, &wait); | ||
1119 | } | ||
1120 | |||
1100 | /* Obtain an audit buffer. This routine does locking to obtain the | 1121 | /* Obtain an audit buffer. This routine does locking to obtain the |
1101 | * audit buffer, but then no locking is required for calls to | 1122 | * audit buffer, but then no locking is required for calls to |
1102 | * audit_log_*format. If the tsk is a task that is currently in a | 1123 | * audit_log_*format. If the tsk is a task that is currently in a |
@@ -1142,20 +1163,13 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask, | |||
1142 | 1163 | ||
1143 | while (audit_backlog_limit | 1164 | while (audit_backlog_limit |
1144 | && skb_queue_len(&audit_skb_queue) > audit_backlog_limit + reserve) { | 1165 | && skb_queue_len(&audit_skb_queue) > audit_backlog_limit + reserve) { |
1145 | if (gfp_mask & __GFP_WAIT && audit_backlog_wait_time | 1166 | if (gfp_mask & __GFP_WAIT && audit_backlog_wait_time) { |
1146 | && time_before(jiffies, timeout_start + audit_backlog_wait_time)) { | 1167 | unsigned long sleep_time; |
1147 | 1168 | ||
1148 | /* Wait for auditd to drain the queue a little */ | 1169 | sleep_time = timeout_start + audit_backlog_wait_time - |
1149 | DECLARE_WAITQUEUE(wait, current); | 1170 | jiffies; |
1150 | set_current_state(TASK_INTERRUPTIBLE); | 1171 | if ((long)sleep_time > 0) |
1151 | add_wait_queue(&audit_backlog_wait, &wait); | 1172 | wait_for_auditd(sleep_time); |
1152 | |||
1153 | if (audit_backlog_limit && | ||
1154 | skb_queue_len(&audit_skb_queue) > audit_backlog_limit) | ||
1155 | schedule_timeout(timeout_start + audit_backlog_wait_time - jiffies); | ||
1156 | |||
1157 | __set_current_state(TASK_RUNNING); | ||
1158 | remove_wait_queue(&audit_backlog_wait, &wait); | ||
1159 | continue; | 1173 | continue; |
1160 | } | 1174 | } |
1161 | if (audit_rate_check() && printk_ratelimit()) | 1175 | if (audit_rate_check() && printk_ratelimit()) |
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c index ed206fd88cca..642a89c4f3d6 100644 --- a/kernel/audit_tree.c +++ b/kernel/audit_tree.c | |||
@@ -249,7 +249,7 @@ static void untag_chunk(struct node *p) | |||
249 | list_del_rcu(&chunk->hash); | 249 | list_del_rcu(&chunk->hash); |
250 | spin_unlock(&hash_lock); | 250 | spin_unlock(&hash_lock); |
251 | spin_unlock(&entry->lock); | 251 | spin_unlock(&entry->lock); |
252 | fsnotify_destroy_mark(entry); | 252 | fsnotify_destroy_mark(entry, audit_tree_group); |
253 | goto out; | 253 | goto out; |
254 | } | 254 | } |
255 | 255 | ||
@@ -291,7 +291,7 @@ static void untag_chunk(struct node *p) | |||
291 | owner->root = new; | 291 | owner->root = new; |
292 | spin_unlock(&hash_lock); | 292 | spin_unlock(&hash_lock); |
293 | spin_unlock(&entry->lock); | 293 | spin_unlock(&entry->lock); |
294 | fsnotify_destroy_mark(entry); | 294 | fsnotify_destroy_mark(entry, audit_tree_group); |
295 | fsnotify_put_mark(&new->mark); /* drop initial reference */ | 295 | fsnotify_put_mark(&new->mark); /* drop initial reference */ |
296 | goto out; | 296 | goto out; |
297 | 297 | ||
@@ -331,7 +331,7 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree) | |||
331 | spin_unlock(&hash_lock); | 331 | spin_unlock(&hash_lock); |
332 | chunk->dead = 1; | 332 | chunk->dead = 1; |
333 | spin_unlock(&entry->lock); | 333 | spin_unlock(&entry->lock); |
334 | fsnotify_destroy_mark(entry); | 334 | fsnotify_destroy_mark(entry, audit_tree_group); |
335 | fsnotify_put_mark(entry); | 335 | fsnotify_put_mark(entry); |
336 | return 0; | 336 | return 0; |
337 | } | 337 | } |
@@ -412,7 +412,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree) | |||
412 | spin_unlock(&chunk_entry->lock); | 412 | spin_unlock(&chunk_entry->lock); |
413 | spin_unlock(&old_entry->lock); | 413 | spin_unlock(&old_entry->lock); |
414 | 414 | ||
415 | fsnotify_destroy_mark(chunk_entry); | 415 | fsnotify_destroy_mark(chunk_entry, audit_tree_group); |
416 | 416 | ||
417 | fsnotify_put_mark(chunk_entry); | 417 | fsnotify_put_mark(chunk_entry); |
418 | fsnotify_put_mark(old_entry); | 418 | fsnotify_put_mark(old_entry); |
@@ -443,17 +443,32 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree) | |||
443 | spin_unlock(&hash_lock); | 443 | spin_unlock(&hash_lock); |
444 | spin_unlock(&chunk_entry->lock); | 444 | spin_unlock(&chunk_entry->lock); |
445 | spin_unlock(&old_entry->lock); | 445 | spin_unlock(&old_entry->lock); |
446 | fsnotify_destroy_mark(old_entry); | 446 | fsnotify_destroy_mark(old_entry, audit_tree_group); |
447 | fsnotify_put_mark(chunk_entry); /* drop initial reference */ | 447 | fsnotify_put_mark(chunk_entry); /* drop initial reference */ |
448 | fsnotify_put_mark(old_entry); /* pair to fsnotify_find mark_entry */ | 448 | fsnotify_put_mark(old_entry); /* pair to fsnotify_find mark_entry */ |
449 | return 0; | 449 | return 0; |
450 | } | 450 | } |
451 | 451 | ||
452 | static void audit_log_remove_rule(struct audit_krule *rule) | ||
453 | { | ||
454 | struct audit_buffer *ab; | ||
455 | |||
456 | ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE); | ||
457 | if (unlikely(!ab)) | ||
458 | return; | ||
459 | audit_log_format(ab, "op="); | ||
460 | audit_log_string(ab, "remove rule"); | ||
461 | audit_log_format(ab, " dir="); | ||
462 | audit_log_untrustedstring(ab, rule->tree->pathname); | ||
463 | audit_log_key(ab, rule->filterkey); | ||
464 | audit_log_format(ab, " list=%d res=1", rule->listnr); | ||
465 | audit_log_end(ab); | ||
466 | } | ||
467 | |||
452 | static void kill_rules(struct audit_tree *tree) | 468 | static void kill_rules(struct audit_tree *tree) |
453 | { | 469 | { |
454 | struct audit_krule *rule, *next; | 470 | struct audit_krule *rule, *next; |
455 | struct audit_entry *entry; | 471 | struct audit_entry *entry; |
456 | struct audit_buffer *ab; | ||
457 | 472 | ||
458 | list_for_each_entry_safe(rule, next, &tree->rules, rlist) { | 473 | list_for_each_entry_safe(rule, next, &tree->rules, rlist) { |
459 | entry = container_of(rule, struct audit_entry, rule); | 474 | entry = container_of(rule, struct audit_entry, rule); |
@@ -461,14 +476,7 @@ static void kill_rules(struct audit_tree *tree) | |||
461 | list_del_init(&rule->rlist); | 476 | list_del_init(&rule->rlist); |
462 | if (rule->tree) { | 477 | if (rule->tree) { |
463 | /* not a half-baked one */ | 478 | /* not a half-baked one */ |
464 | ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE); | 479 | audit_log_remove_rule(rule); |
465 | audit_log_format(ab, "op="); | ||
466 | audit_log_string(ab, "remove rule"); | ||
467 | audit_log_format(ab, " dir="); | ||
468 | audit_log_untrustedstring(ab, rule->tree->pathname); | ||
469 | audit_log_key(ab, rule->filterkey); | ||
470 | audit_log_format(ab, " list=%d res=1", rule->listnr); | ||
471 | audit_log_end(ab); | ||
472 | rule->tree = NULL; | 480 | rule->tree = NULL; |
473 | list_del_rcu(&entry->list); | 481 | list_del_rcu(&entry->list); |
474 | list_del(&entry->rule.list); | 482 | list_del(&entry->rule.list); |
diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c index 9a9ae6e3d290..22831c4d369c 100644 --- a/kernel/audit_watch.c +++ b/kernel/audit_watch.c | |||
@@ -240,6 +240,8 @@ static void audit_watch_log_rule_change(struct audit_krule *r, struct audit_watc | |||
240 | if (audit_enabled) { | 240 | if (audit_enabled) { |
241 | struct audit_buffer *ab; | 241 | struct audit_buffer *ab; |
242 | ab = audit_log_start(NULL, GFP_NOFS, AUDIT_CONFIG_CHANGE); | 242 | ab = audit_log_start(NULL, GFP_NOFS, AUDIT_CONFIG_CHANGE); |
243 | if (unlikely(!ab)) | ||
244 | return; | ||
243 | audit_log_format(ab, "auid=%u ses=%u op=", | 245 | audit_log_format(ab, "auid=%u ses=%u op=", |
244 | from_kuid(&init_user_ns, audit_get_loginuid(current)), | 246 | from_kuid(&init_user_ns, audit_get_loginuid(current)), |
245 | audit_get_sessionid(current)); | 247 | audit_get_sessionid(current)); |
@@ -350,7 +352,7 @@ static void audit_remove_parent_watches(struct audit_parent *parent) | |||
350 | } | 352 | } |
351 | mutex_unlock(&audit_filter_mutex); | 353 | mutex_unlock(&audit_filter_mutex); |
352 | 354 | ||
353 | fsnotify_destroy_mark(&parent->mark); | 355 | fsnotify_destroy_mark(&parent->mark, audit_watch_group); |
354 | } | 356 | } |
355 | 357 | ||
356 | /* Get path information necessary for adding watches. */ | 358 | /* Get path information necessary for adding watches. */ |
@@ -457,7 +459,7 @@ void audit_remove_watch_rule(struct audit_krule *krule) | |||
457 | 459 | ||
458 | if (list_empty(&parent->watches)) { | 460 | if (list_empty(&parent->watches)) { |
459 | audit_get_parent(parent); | 461 | audit_get_parent(parent); |
460 | fsnotify_destroy_mark(&parent->mark); | 462 | fsnotify_destroy_mark(&parent->mark, audit_watch_group); |
461 | audit_put_parent(parent); | 463 | audit_put_parent(parent); |
462 | } | 464 | } |
463 | } | 465 | } |
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index 7f19f23d38a3..f9fc54bbe06f 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c | |||
@@ -1144,7 +1144,6 @@ static void audit_log_rule_change(kuid_t loginuid, u32 sessionid, u32 sid, | |||
1144 | * audit_receive_filter - apply all rules to the specified message type | 1144 | * audit_receive_filter - apply all rules to the specified message type |
1145 | * @type: audit message type | 1145 | * @type: audit message type |
1146 | * @pid: target pid for netlink audit messages | 1146 | * @pid: target pid for netlink audit messages |
1147 | * @uid: target uid for netlink audit messages | ||
1148 | * @seq: netlink audit message sequence (serial) number | 1147 | * @seq: netlink audit message sequence (serial) number |
1149 | * @data: payload data | 1148 | * @data: payload data |
1150 | * @datasz: size of payload data | 1149 | * @datasz: size of payload data |
diff --git a/kernel/auditsc.c b/kernel/auditsc.c index e37e6a12c5e3..a371f857a0a9 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c | |||
@@ -1464,14 +1464,14 @@ static void show_special(struct audit_context *context, int *call_panic) | |||
1464 | audit_log_end(ab); | 1464 | audit_log_end(ab); |
1465 | ab = audit_log_start(context, GFP_KERNEL, | 1465 | ab = audit_log_start(context, GFP_KERNEL, |
1466 | AUDIT_IPC_SET_PERM); | 1466 | AUDIT_IPC_SET_PERM); |
1467 | if (unlikely(!ab)) | ||
1468 | return; | ||
1467 | audit_log_format(ab, | 1469 | audit_log_format(ab, |
1468 | "qbytes=%lx ouid=%u ogid=%u mode=%#ho", | 1470 | "qbytes=%lx ouid=%u ogid=%u mode=%#ho", |
1469 | context->ipc.qbytes, | 1471 | context->ipc.qbytes, |
1470 | context->ipc.perm_uid, | 1472 | context->ipc.perm_uid, |
1471 | context->ipc.perm_gid, | 1473 | context->ipc.perm_gid, |
1472 | context->ipc.perm_mode); | 1474 | context->ipc.perm_mode); |
1473 | if (!ab) | ||
1474 | return; | ||
1475 | } | 1475 | } |
1476 | break; } | 1476 | break; } |
1477 | case AUDIT_MQ_OPEN: { | 1477 | case AUDIT_MQ_OPEN: { |
@@ -2675,7 +2675,7 @@ void __audit_mmap_fd(int fd, int flags) | |||
2675 | context->type = AUDIT_MMAP; | 2675 | context->type = AUDIT_MMAP; |
2676 | } | 2676 | } |
2677 | 2677 | ||
2678 | static void audit_log_abend(struct audit_buffer *ab, char *reason, long signr) | 2678 | static void audit_log_task(struct audit_buffer *ab) |
2679 | { | 2679 | { |
2680 | kuid_t auid, uid; | 2680 | kuid_t auid, uid; |
2681 | kgid_t gid; | 2681 | kgid_t gid; |
@@ -2693,6 +2693,11 @@ static void audit_log_abend(struct audit_buffer *ab, char *reason, long signr) | |||
2693 | audit_log_task_context(ab); | 2693 | audit_log_task_context(ab); |
2694 | audit_log_format(ab, " pid=%d comm=", current->pid); | 2694 | audit_log_format(ab, " pid=%d comm=", current->pid); |
2695 | audit_log_untrustedstring(ab, current->comm); | 2695 | audit_log_untrustedstring(ab, current->comm); |
2696 | } | ||
2697 | |||
2698 | static void audit_log_abend(struct audit_buffer *ab, char *reason, long signr) | ||
2699 | { | ||
2700 | audit_log_task(ab); | ||
2696 | audit_log_format(ab, " reason="); | 2701 | audit_log_format(ab, " reason="); |
2697 | audit_log_string(ab, reason); | 2702 | audit_log_string(ab, reason); |
2698 | audit_log_format(ab, " sig=%ld", signr); | 2703 | audit_log_format(ab, " sig=%ld", signr); |
@@ -2715,6 +2720,8 @@ void audit_core_dumps(long signr) | |||
2715 | return; | 2720 | return; |
2716 | 2721 | ||
2717 | ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_ANOM_ABEND); | 2722 | ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_ANOM_ABEND); |
2723 | if (unlikely(!ab)) | ||
2724 | return; | ||
2718 | audit_log_abend(ab, "memory violation", signr); | 2725 | audit_log_abend(ab, "memory violation", signr); |
2719 | audit_log_end(ab); | 2726 | audit_log_end(ab); |
2720 | } | 2727 | } |
@@ -2723,8 +2730,11 @@ void __audit_seccomp(unsigned long syscall, long signr, int code) | |||
2723 | { | 2730 | { |
2724 | struct audit_buffer *ab; | 2731 | struct audit_buffer *ab; |
2725 | 2732 | ||
2726 | ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_ANOM_ABEND); | 2733 | ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_SECCOMP); |
2727 | audit_log_abend(ab, "seccomp", signr); | 2734 | if (unlikely(!ab)) |
2735 | return; | ||
2736 | audit_log_task(ab); | ||
2737 | audit_log_format(ab, " sig=%ld", signr); | ||
2728 | audit_log_format(ab, " syscall=%ld", syscall); | 2738 | audit_log_format(ab, " syscall=%ld", syscall); |
2729 | audit_log_format(ab, " compat=%d", is_compat_task()); | 2739 | audit_log_format(ab, " compat=%d", is_compat_task()); |
2730 | audit_log_format(ab, " ip=0x%lx", KSTK_EIP(current)); | 2740 | audit_log_format(ab, " ip=0x%lx", KSTK_EIP(current)); |
diff --git a/kernel/compat.c b/kernel/compat.c index f6150e92dfc9..36700e9e2be9 100644 --- a/kernel/compat.c +++ b/kernel/compat.c | |||
@@ -535,9 +535,11 @@ asmlinkage long compat_sys_getrusage(int who, struct compat_rusage __user *ru) | |||
535 | return 0; | 535 | return 0; |
536 | } | 536 | } |
537 | 537 | ||
538 | asmlinkage long | 538 | COMPAT_SYSCALL_DEFINE4(wait4, |
539 | compat_sys_wait4(compat_pid_t pid, compat_uint_t __user *stat_addr, int options, | 539 | compat_pid_t, pid, |
540 | struct compat_rusage __user *ru) | 540 | compat_uint_t __user *, stat_addr, |
541 | int, options, | ||
542 | struct compat_rusage __user *, ru) | ||
541 | { | 543 | { |
542 | if (!ru) { | 544 | if (!ru) { |
543 | return sys_wait4(pid, stat_addr, options, NULL); | 545 | return sys_wait4(pid, stat_addr, options, NULL); |
@@ -564,9 +566,10 @@ compat_sys_wait4(compat_pid_t pid, compat_uint_t __user *stat_addr, int options, | |||
564 | } | 566 | } |
565 | } | 567 | } |
566 | 568 | ||
567 | asmlinkage long compat_sys_waitid(int which, compat_pid_t pid, | 569 | COMPAT_SYSCALL_DEFINE5(waitid, |
568 | struct compat_siginfo __user *uinfo, int options, | 570 | int, which, compat_pid_t, pid, |
569 | struct compat_rusage __user *uru) | 571 | struct compat_siginfo __user *, uinfo, int, options, |
572 | struct compat_rusage __user *, uru) | ||
570 | { | 573 | { |
571 | siginfo_t info; | 574 | siginfo_t info; |
572 | struct rusage ru; | 575 | struct rusage ru; |
@@ -584,7 +587,11 @@ asmlinkage long compat_sys_waitid(int which, compat_pid_t pid, | |||
584 | return ret; | 587 | return ret; |
585 | 588 | ||
586 | if (uru) { | 589 | if (uru) { |
587 | ret = put_compat_rusage(&ru, uru); | 590 | /* sys_waitid() overwrites everything in ru */ |
591 | if (COMPAT_USE_64BIT_TIME) | ||
592 | ret = copy_to_user(uru, &ru, sizeof(ru)); | ||
593 | else | ||
594 | ret = put_compat_rusage(&ru, uru); | ||
588 | if (ret) | 595 | if (ret) |
589 | return ret; | 596 | return ret; |
590 | } | 597 | } |
@@ -994,7 +1001,7 @@ compat_sys_rt_sigtimedwait (compat_sigset_t __user *uthese, | |||
994 | sigset_from_compat(&s, &s32); | 1001 | sigset_from_compat(&s, &s32); |
995 | 1002 | ||
996 | if (uts) { | 1003 | if (uts) { |
997 | if (get_compat_timespec(&t, uts)) | 1004 | if (compat_get_timespec(&t, uts)) |
998 | return -EFAULT; | 1005 | return -EFAULT; |
999 | } | 1006 | } |
1000 | 1007 | ||
diff --git a/kernel/cred.c b/kernel/cred.c index 8888afb846e9..e0573a43c7df 100644 --- a/kernel/cred.c +++ b/kernel/cred.c | |||
@@ -372,6 +372,31 @@ error_put: | |||
372 | return ret; | 372 | return ret; |
373 | } | 373 | } |
374 | 374 | ||
375 | static bool cred_cap_issubset(const struct cred *set, const struct cred *subset) | ||
376 | { | ||
377 | const struct user_namespace *set_ns = set->user_ns; | ||
378 | const struct user_namespace *subset_ns = subset->user_ns; | ||
379 | |||
380 | /* If the two credentials are in the same user namespace see if | ||
381 | * the capabilities of subset are a subset of set. | ||
382 | */ | ||
383 | if (set_ns == subset_ns) | ||
384 | return cap_issubset(subset->cap_permitted, set->cap_permitted); | ||
385 | |||
386 | /* The credentials are in a different user namespaces | ||
387 | * therefore one is a subset of the other only if a set is an | ||
388 | * ancestor of subset and set->euid is owner of subset or one | ||
389 | * of subsets ancestors. | ||
390 | */ | ||
391 | for (;subset_ns != &init_user_ns; subset_ns = subset_ns->parent) { | ||
392 | if ((set_ns == subset_ns->parent) && | ||
393 | uid_eq(subset_ns->owner, set->euid)) | ||
394 | return true; | ||
395 | } | ||
396 | |||
397 | return false; | ||
398 | } | ||
399 | |||
375 | /** | 400 | /** |
376 | * commit_creds - Install new credentials upon the current task | 401 | * commit_creds - Install new credentials upon the current task |
377 | * @new: The credentials to be assigned | 402 | * @new: The credentials to be assigned |
@@ -410,7 +435,7 @@ int commit_creds(struct cred *new) | |||
410 | !gid_eq(old->egid, new->egid) || | 435 | !gid_eq(old->egid, new->egid) || |
411 | !uid_eq(old->fsuid, new->fsuid) || | 436 | !uid_eq(old->fsuid, new->fsuid) || |
412 | !gid_eq(old->fsgid, new->fsgid) || | 437 | !gid_eq(old->fsgid, new->fsgid) || |
413 | !cap_issubset(new->cap_permitted, old->cap_permitted)) { | 438 | !cred_cap_issubset(old, new)) { |
414 | if (task->mm) | 439 | if (task->mm) |
415 | set_dumpable(task->mm, suid_dumpable); | 440 | set_dumpable(task->mm, suid_dumpable); |
416 | task->pdeath_signal = 0; | 441 | task->pdeath_signal = 0; |
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index 4d5f8d5612f3..8875254120b6 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c | |||
@@ -1970,6 +1970,8 @@ static int kdb_lsmod(int argc, const char **argv) | |||
1970 | 1970 | ||
1971 | kdb_printf("Module Size modstruct Used by\n"); | 1971 | kdb_printf("Module Size modstruct Used by\n"); |
1972 | list_for_each_entry(mod, kdb_modules, list) { | 1972 | list_for_each_entry(mod, kdb_modules, list) { |
1973 | if (mod->state == MODULE_STATE_UNFORMED) | ||
1974 | continue; | ||
1973 | 1975 | ||
1974 | kdb_printf("%-20s%8u 0x%p ", mod->name, | 1976 | kdb_printf("%-20s%8u 0x%p ", mod->name, |
1975 | mod->core_size, (void *)mod); | 1977 | mod->core_size, (void *)mod); |
diff --git a/kernel/fork.c b/kernel/fork.c index c36c4e301efe..c535f33bbb9c 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -146,7 +146,7 @@ void __weak arch_release_thread_info(struct thread_info *ti) | |||
146 | static struct thread_info *alloc_thread_info_node(struct task_struct *tsk, | 146 | static struct thread_info *alloc_thread_info_node(struct task_struct *tsk, |
147 | int node) | 147 | int node) |
148 | { | 148 | { |
149 | struct page *page = alloc_pages_node(node, THREADINFO_GFP, | 149 | struct page *page = alloc_pages_node(node, THREADINFO_GFP_ACCOUNTED, |
150 | THREAD_SIZE_ORDER); | 150 | THREAD_SIZE_ORDER); |
151 | 151 | ||
152 | return page ? page_address(page) : NULL; | 152 | return page ? page_address(page) : NULL; |
@@ -154,7 +154,7 @@ static struct thread_info *alloc_thread_info_node(struct task_struct *tsk, | |||
154 | 154 | ||
155 | static inline void free_thread_info(struct thread_info *ti) | 155 | static inline void free_thread_info(struct thread_info *ti) |
156 | { | 156 | { |
157 | free_pages((unsigned long)ti, THREAD_SIZE_ORDER); | 157 | free_memcg_kmem_pages((unsigned long)ti, THREAD_SIZE_ORDER); |
158 | } | 158 | } |
159 | # else | 159 | # else |
160 | static struct kmem_cache *thread_info_cache; | 160 | static struct kmem_cache *thread_info_cache; |
@@ -1166,6 +1166,14 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1166 | current->signal->flags & SIGNAL_UNKILLABLE) | 1166 | current->signal->flags & SIGNAL_UNKILLABLE) |
1167 | return ERR_PTR(-EINVAL); | 1167 | return ERR_PTR(-EINVAL); |
1168 | 1168 | ||
1169 | /* | ||
1170 | * If the new process will be in a different pid namespace | ||
1171 | * don't allow the creation of threads. | ||
1172 | */ | ||
1173 | if ((clone_flags & (CLONE_VM|CLONE_NEWPID)) && | ||
1174 | (task_active_pid_ns(current) != current->nsproxy->pid_ns)) | ||
1175 | return ERR_PTR(-EINVAL); | ||
1176 | |||
1169 | retval = security_task_create(clone_flags); | 1177 | retval = security_task_create(clone_flags); |
1170 | if (retval) | 1178 | if (retval) |
1171 | goto fork_out; | 1179 | goto fork_out; |
@@ -1613,7 +1621,6 @@ long do_fork(unsigned long clone_flags, | |||
1613 | return nr; | 1621 | return nr; |
1614 | } | 1622 | } |
1615 | 1623 | ||
1616 | #ifdef CONFIG_GENERIC_KERNEL_THREAD | ||
1617 | /* | 1624 | /* |
1618 | * Create a kernel thread. | 1625 | * Create a kernel thread. |
1619 | */ | 1626 | */ |
@@ -1622,7 +1629,6 @@ pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags) | |||
1622 | return do_fork(flags|CLONE_VM|CLONE_UNTRACED, (unsigned long)fn, | 1629 | return do_fork(flags|CLONE_VM|CLONE_UNTRACED, (unsigned long)fn, |
1623 | (unsigned long)arg, NULL, NULL); | 1630 | (unsigned long)arg, NULL, NULL); |
1624 | } | 1631 | } |
1625 | #endif | ||
1626 | 1632 | ||
1627 | #ifdef __ARCH_WANT_SYS_FORK | 1633 | #ifdef __ARCH_WANT_SYS_FORK |
1628 | SYSCALL_DEFINE0(fork) | 1634 | SYSCALL_DEFINE0(fork) |
@@ -1662,8 +1668,10 @@ SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp, | |||
1662 | int, tls_val) | 1668 | int, tls_val) |
1663 | #endif | 1669 | #endif |
1664 | { | 1670 | { |
1665 | return do_fork(clone_flags, newsp, 0, | 1671 | long ret = do_fork(clone_flags, newsp, 0, parent_tidptr, child_tidptr); |
1666 | parent_tidptr, child_tidptr); | 1672 | asmlinkage_protect(5, ret, clone_flags, newsp, |
1673 | parent_tidptr, child_tidptr, tls_val); | ||
1674 | return ret; | ||
1667 | } | 1675 | } |
1668 | #endif | 1676 | #endif |
1669 | 1677 | ||
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 35c70c9e24d8..e49a288fa479 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c | |||
@@ -818,7 +818,7 @@ static void irq_thread_dtor(struct callback_head *unused) | |||
818 | action = kthread_data(tsk); | 818 | action = kthread_data(tsk); |
819 | 819 | ||
820 | pr_err("exiting task \"%s\" (%d) is an active IRQ thread (irq %d)\n", | 820 | pr_err("exiting task \"%s\" (%d) is an active IRQ thread (irq %d)\n", |
821 | tsk->comm ? tsk->comm : "", tsk->pid, action->irq); | 821 | tsk->comm, tsk->pid, action->irq); |
822 | 822 | ||
823 | 823 | ||
824 | desc = irq_to_desc(action->irq); | 824 | desc = irq_to_desc(action->irq); |
diff --git a/kernel/kcmp.c b/kernel/kcmp.c index 30b7b225306c..e30ac0fe61c3 100644 --- a/kernel/kcmp.c +++ b/kernel/kcmp.c | |||
@@ -4,6 +4,7 @@ | |||
4 | #include <linux/string.h> | 4 | #include <linux/string.h> |
5 | #include <linux/random.h> | 5 | #include <linux/random.h> |
6 | #include <linux/module.h> | 6 | #include <linux/module.h> |
7 | #include <linux/ptrace.h> | ||
7 | #include <linux/init.h> | 8 | #include <linux/init.h> |
8 | #include <linux/errno.h> | 9 | #include <linux/errno.h> |
9 | #include <linux/cache.h> | 10 | #include <linux/cache.h> |
diff --git a/kernel/kmod.c b/kernel/kmod.c index ecd42b484db8..56dd34976d7b 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c | |||
@@ -228,9 +228,9 @@ static int ____call_usermodehelper(void *data) | |||
228 | 228 | ||
229 | commit_creds(new); | 229 | commit_creds(new); |
230 | 230 | ||
231 | retval = kernel_execve(sub_info->path, | 231 | retval = do_execve(sub_info->path, |
232 | (const char *const *)sub_info->argv, | 232 | (const char __user *const __user *)sub_info->argv, |
233 | (const char *const *)sub_info->envp); | 233 | (const char __user *const __user *)sub_info->envp); |
234 | if (!retval) | 234 | if (!retval) |
235 | return 0; | 235 | return 0; |
236 | 236 | ||
diff --git a/kernel/modsign_certificate.S b/kernel/modsign_certificate.S new file mode 100644 index 000000000000..246b4c6e6135 --- /dev/null +++ b/kernel/modsign_certificate.S | |||
@@ -0,0 +1,19 @@ | |||
1 | /* SYMBOL_PREFIX defined on commandline from CONFIG_SYMBOL_PREFIX */ | ||
2 | #ifndef SYMBOL_PREFIX | ||
3 | #define ASM_SYMBOL(sym) sym | ||
4 | #else | ||
5 | #define PASTE2(x,y) x##y | ||
6 | #define PASTE(x,y) PASTE2(x,y) | ||
7 | #define ASM_SYMBOL(sym) PASTE(SYMBOL_PREFIX, sym) | ||
8 | #endif | ||
9 | |||
10 | #define GLOBAL(name) \ | ||
11 | .globl ASM_SYMBOL(name); \ | ||
12 | ASM_SYMBOL(name): | ||
13 | |||
14 | .section ".init.data","aw" | ||
15 | |||
16 | GLOBAL(modsign_certificate_list) | ||
17 | .incbin "signing_key.x509" | ||
18 | .incbin "extra_certificates" | ||
19 | GLOBAL(modsign_certificate_list_end) | ||
diff --git a/kernel/modsign_pubkey.c b/kernel/modsign_pubkey.c index 767e559dfb10..2b6e69909c39 100644 --- a/kernel/modsign_pubkey.c +++ b/kernel/modsign_pubkey.c | |||
@@ -20,12 +20,6 @@ struct key *modsign_keyring; | |||
20 | 20 | ||
21 | extern __initdata const u8 modsign_certificate_list[]; | 21 | extern __initdata const u8 modsign_certificate_list[]; |
22 | extern __initdata const u8 modsign_certificate_list_end[]; | 22 | extern __initdata const u8 modsign_certificate_list_end[]; |
23 | asm(".section .init.data,\"aw\"\n" | ||
24 | SYMBOL_PREFIX "modsign_certificate_list:\n" | ||
25 | ".incbin \"signing_key.x509\"\n" | ||
26 | ".incbin \"extra_certificates\"\n" | ||
27 | SYMBOL_PREFIX "modsign_certificate_list_end:" | ||
28 | ); | ||
29 | 23 | ||
30 | /* | 24 | /* |
31 | * We need to make sure ccache doesn't cache the .o file as it doesn't notice | 25 | * We need to make sure ccache doesn't cache the .o file as it doesn't notice |
@@ -40,18 +34,15 @@ static __init int module_verify_init(void) | |||
40 | { | 34 | { |
41 | pr_notice("Initialise module verification\n"); | 35 | pr_notice("Initialise module verification\n"); |
42 | 36 | ||
43 | modsign_keyring = key_alloc(&key_type_keyring, ".module_sign", | 37 | modsign_keyring = keyring_alloc(".module_sign", |
44 | KUIDT_INIT(0), KGIDT_INIT(0), | 38 | KUIDT_INIT(0), KGIDT_INIT(0), |
45 | current_cred(), | 39 | current_cred(), |
46 | (KEY_POS_ALL & ~KEY_POS_SETATTR) | | 40 | ((KEY_POS_ALL & ~KEY_POS_SETATTR) | |
47 | KEY_USR_VIEW | KEY_USR_READ, | 41 | KEY_USR_VIEW | KEY_USR_READ), |
48 | KEY_ALLOC_NOT_IN_QUOTA); | 42 | KEY_ALLOC_NOT_IN_QUOTA, NULL); |
49 | if (IS_ERR(modsign_keyring)) | 43 | if (IS_ERR(modsign_keyring)) |
50 | panic("Can't allocate module signing keyring\n"); | 44 | panic("Can't allocate module signing keyring\n"); |
51 | 45 | ||
52 | if (key_instantiate_and_link(modsign_keyring, NULL, 0, NULL, NULL) < 0) | ||
53 | panic("Can't instantiate module signing keyring\n"); | ||
54 | |||
55 | return 0; | 46 | return 0; |
56 | } | 47 | } |
57 | 48 | ||
diff --git a/kernel/module.c b/kernel/module.c index 808bd62e1723..eab08274ec9b 100644 --- a/kernel/module.c +++ b/kernel/module.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <linux/ftrace_event.h> | 21 | #include <linux/ftrace_event.h> |
22 | #include <linux/init.h> | 22 | #include <linux/init.h> |
23 | #include <linux/kallsyms.h> | 23 | #include <linux/kallsyms.h> |
24 | #include <linux/file.h> | ||
24 | #include <linux/fs.h> | 25 | #include <linux/fs.h> |
25 | #include <linux/sysfs.h> | 26 | #include <linux/sysfs.h> |
26 | #include <linux/kernel.h> | 27 | #include <linux/kernel.h> |
@@ -28,6 +29,7 @@ | |||
28 | #include <linux/vmalloc.h> | 29 | #include <linux/vmalloc.h> |
29 | #include <linux/elf.h> | 30 | #include <linux/elf.h> |
30 | #include <linux/proc_fs.h> | 31 | #include <linux/proc_fs.h> |
32 | #include <linux/security.h> | ||
31 | #include <linux/seq_file.h> | 33 | #include <linux/seq_file.h> |
32 | #include <linux/syscalls.h> | 34 | #include <linux/syscalls.h> |
33 | #include <linux/fcntl.h> | 35 | #include <linux/fcntl.h> |
@@ -59,6 +61,7 @@ | |||
59 | #include <linux/pfn.h> | 61 | #include <linux/pfn.h> |
60 | #include <linux/bsearch.h> | 62 | #include <linux/bsearch.h> |
61 | #include <linux/fips.h> | 63 | #include <linux/fips.h> |
64 | #include <uapi/linux/module.h> | ||
62 | #include "module-internal.h" | 65 | #include "module-internal.h" |
63 | 66 | ||
64 | #define CREATE_TRACE_POINTS | 67 | #define CREATE_TRACE_POINTS |
@@ -185,6 +188,7 @@ struct load_info { | |||
185 | ongoing or failed initialization etc. */ | 188 | ongoing or failed initialization etc. */ |
186 | static inline int strong_try_module_get(struct module *mod) | 189 | static inline int strong_try_module_get(struct module *mod) |
187 | { | 190 | { |
191 | BUG_ON(mod && mod->state == MODULE_STATE_UNFORMED); | ||
188 | if (mod && mod->state == MODULE_STATE_COMING) | 192 | if (mod && mod->state == MODULE_STATE_COMING) |
189 | return -EBUSY; | 193 | return -EBUSY; |
190 | if (try_module_get(mod)) | 194 | if (try_module_get(mod)) |
@@ -340,6 +344,9 @@ bool each_symbol_section(bool (*fn)(const struct symsearch *arr, | |||
340 | #endif | 344 | #endif |
341 | }; | 345 | }; |
342 | 346 | ||
347 | if (mod->state == MODULE_STATE_UNFORMED) | ||
348 | continue; | ||
349 | |||
343 | if (each_symbol_in_section(arr, ARRAY_SIZE(arr), mod, fn, data)) | 350 | if (each_symbol_in_section(arr, ARRAY_SIZE(arr), mod, fn, data)) |
344 | return true; | 351 | return true; |
345 | } | 352 | } |
@@ -447,16 +454,24 @@ const struct kernel_symbol *find_symbol(const char *name, | |||
447 | EXPORT_SYMBOL_GPL(find_symbol); | 454 | EXPORT_SYMBOL_GPL(find_symbol); |
448 | 455 | ||
449 | /* Search for module by name: must hold module_mutex. */ | 456 | /* Search for module by name: must hold module_mutex. */ |
450 | struct module *find_module(const char *name) | 457 | static struct module *find_module_all(const char *name, |
458 | bool even_unformed) | ||
451 | { | 459 | { |
452 | struct module *mod; | 460 | struct module *mod; |
453 | 461 | ||
454 | list_for_each_entry(mod, &modules, list) { | 462 | list_for_each_entry(mod, &modules, list) { |
463 | if (!even_unformed && mod->state == MODULE_STATE_UNFORMED) | ||
464 | continue; | ||
455 | if (strcmp(mod->name, name) == 0) | 465 | if (strcmp(mod->name, name) == 0) |
456 | return mod; | 466 | return mod; |
457 | } | 467 | } |
458 | return NULL; | 468 | return NULL; |
459 | } | 469 | } |
470 | |||
471 | struct module *find_module(const char *name) | ||
472 | { | ||
473 | return find_module_all(name, false); | ||
474 | } | ||
460 | EXPORT_SYMBOL_GPL(find_module); | 475 | EXPORT_SYMBOL_GPL(find_module); |
461 | 476 | ||
462 | #ifdef CONFIG_SMP | 477 | #ifdef CONFIG_SMP |
@@ -522,6 +537,8 @@ bool is_module_percpu_address(unsigned long addr) | |||
522 | preempt_disable(); | 537 | preempt_disable(); |
523 | 538 | ||
524 | list_for_each_entry_rcu(mod, &modules, list) { | 539 | list_for_each_entry_rcu(mod, &modules, list) { |
540 | if (mod->state == MODULE_STATE_UNFORMED) | ||
541 | continue; | ||
525 | if (!mod->percpu_size) | 542 | if (!mod->percpu_size) |
526 | continue; | 543 | continue; |
527 | for_each_possible_cpu(cpu) { | 544 | for_each_possible_cpu(cpu) { |
@@ -1045,6 +1062,8 @@ static ssize_t show_initstate(struct module_attribute *mattr, | |||
1045 | case MODULE_STATE_GOING: | 1062 | case MODULE_STATE_GOING: |
1046 | state = "going"; | 1063 | state = "going"; |
1047 | break; | 1064 | break; |
1065 | default: | ||
1066 | BUG(); | ||
1048 | } | 1067 | } |
1049 | return sprintf(buffer, "%s\n", state); | 1068 | return sprintf(buffer, "%s\n", state); |
1050 | } | 1069 | } |
@@ -1783,6 +1802,8 @@ void set_all_modules_text_rw(void) | |||
1783 | 1802 | ||
1784 | mutex_lock(&module_mutex); | 1803 | mutex_lock(&module_mutex); |
1785 | list_for_each_entry_rcu(mod, &modules, list) { | 1804 | list_for_each_entry_rcu(mod, &modules, list) { |
1805 | if (mod->state == MODULE_STATE_UNFORMED) | ||
1806 | continue; | ||
1786 | if ((mod->module_core) && (mod->core_text_size)) { | 1807 | if ((mod->module_core) && (mod->core_text_size)) { |
1787 | set_page_attributes(mod->module_core, | 1808 | set_page_attributes(mod->module_core, |
1788 | mod->module_core + mod->core_text_size, | 1809 | mod->module_core + mod->core_text_size, |
@@ -1804,6 +1825,8 @@ void set_all_modules_text_ro(void) | |||
1804 | 1825 | ||
1805 | mutex_lock(&module_mutex); | 1826 | mutex_lock(&module_mutex); |
1806 | list_for_each_entry_rcu(mod, &modules, list) { | 1827 | list_for_each_entry_rcu(mod, &modules, list) { |
1828 | if (mod->state == MODULE_STATE_UNFORMED) | ||
1829 | continue; | ||
1807 | if ((mod->module_core) && (mod->core_text_size)) { | 1830 | if ((mod->module_core) && (mod->core_text_size)) { |
1808 | set_page_attributes(mod->module_core, | 1831 | set_page_attributes(mod->module_core, |
1809 | mod->module_core + mod->core_text_size, | 1832 | mod->module_core + mod->core_text_size, |
@@ -2279,7 +2302,7 @@ static void layout_symtab(struct module *mod, struct load_info *info) | |||
2279 | Elf_Shdr *symsect = info->sechdrs + info->index.sym; | 2302 | Elf_Shdr *symsect = info->sechdrs + info->index.sym; |
2280 | Elf_Shdr *strsect = info->sechdrs + info->index.str; | 2303 | Elf_Shdr *strsect = info->sechdrs + info->index.str; |
2281 | const Elf_Sym *src; | 2304 | const Elf_Sym *src; |
2282 | unsigned int i, nsrc, ndst, strtab_size; | 2305 | unsigned int i, nsrc, ndst, strtab_size = 0; |
2283 | 2306 | ||
2284 | /* Put symbol section at end of init part of module. */ | 2307 | /* Put symbol section at end of init part of module. */ |
2285 | symsect->sh_flags |= SHF_ALLOC; | 2308 | symsect->sh_flags |= SHF_ALLOC; |
@@ -2290,9 +2313,6 @@ static void layout_symtab(struct module *mod, struct load_info *info) | |||
2290 | src = (void *)info->hdr + symsect->sh_offset; | 2313 | src = (void *)info->hdr + symsect->sh_offset; |
2291 | nsrc = symsect->sh_size / sizeof(*src); | 2314 | nsrc = symsect->sh_size / sizeof(*src); |
2292 | 2315 | ||
2293 | /* strtab always starts with a nul, so offset 0 is the empty string. */ | ||
2294 | strtab_size = 1; | ||
2295 | |||
2296 | /* Compute total space required for the core symbols' strtab. */ | 2316 | /* Compute total space required for the core symbols' strtab. */ |
2297 | for (ndst = i = 0; i < nsrc; i++) { | 2317 | for (ndst = i = 0; i < nsrc; i++) { |
2298 | if (i == 0 || | 2318 | if (i == 0 || |
@@ -2334,7 +2354,6 @@ static void add_kallsyms(struct module *mod, const struct load_info *info) | |||
2334 | mod->core_symtab = dst = mod->module_core + info->symoffs; | 2354 | mod->core_symtab = dst = mod->module_core + info->symoffs; |
2335 | mod->core_strtab = s = mod->module_core + info->stroffs; | 2355 | mod->core_strtab = s = mod->module_core + info->stroffs; |
2336 | src = mod->symtab; | 2356 | src = mod->symtab; |
2337 | *s++ = 0; | ||
2338 | for (ndst = i = 0; i < mod->num_symtab; i++) { | 2357 | for (ndst = i = 0; i < mod->num_symtab; i++) { |
2339 | if (i == 0 || | 2358 | if (i == 0 || |
2340 | is_core_symbol(src+i, info->sechdrs, info->hdr->e_shnum)) { | 2359 | is_core_symbol(src+i, info->sechdrs, info->hdr->e_shnum)) { |
@@ -2375,7 +2394,7 @@ static void dynamic_debug_remove(struct _ddebug *debug) | |||
2375 | 2394 | ||
2376 | void * __weak module_alloc(unsigned long size) | 2395 | void * __weak module_alloc(unsigned long size) |
2377 | { | 2396 | { |
2378 | return size == 0 ? NULL : vmalloc_exec(size); | 2397 | return vmalloc_exec(size); |
2379 | } | 2398 | } |
2380 | 2399 | ||
2381 | static void *module_alloc_update_bounds(unsigned long size) | 2400 | static void *module_alloc_update_bounds(unsigned long size) |
@@ -2422,18 +2441,17 @@ static inline void kmemleak_load_module(const struct module *mod, | |||
2422 | #endif | 2441 | #endif |
2423 | 2442 | ||
2424 | #ifdef CONFIG_MODULE_SIG | 2443 | #ifdef CONFIG_MODULE_SIG |
2425 | static int module_sig_check(struct load_info *info, | 2444 | static int module_sig_check(struct load_info *info) |
2426 | const void *mod, unsigned long *_len) | ||
2427 | { | 2445 | { |
2428 | int err = -ENOKEY; | 2446 | int err = -ENOKEY; |
2429 | unsigned long markerlen = sizeof(MODULE_SIG_STRING) - 1; | 2447 | const unsigned long markerlen = sizeof(MODULE_SIG_STRING) - 1; |
2430 | unsigned long len = *_len; | 2448 | const void *mod = info->hdr; |
2431 | 2449 | ||
2432 | if (len > markerlen && | 2450 | if (info->len > markerlen && |
2433 | memcmp(mod + len - markerlen, MODULE_SIG_STRING, markerlen) == 0) { | 2451 | memcmp(mod + info->len - markerlen, MODULE_SIG_STRING, markerlen) == 0) { |
2434 | /* We truncate the module to discard the signature */ | 2452 | /* We truncate the module to discard the signature */ |
2435 | *_len -= markerlen; | 2453 | info->len -= markerlen; |
2436 | err = mod_verify_sig(mod, _len); | 2454 | err = mod_verify_sig(mod, &info->len); |
2437 | } | 2455 | } |
2438 | 2456 | ||
2439 | if (!err) { | 2457 | if (!err) { |
@@ -2451,59 +2469,114 @@ static int module_sig_check(struct load_info *info, | |||
2451 | return err; | 2469 | return err; |
2452 | } | 2470 | } |
2453 | #else /* !CONFIG_MODULE_SIG */ | 2471 | #else /* !CONFIG_MODULE_SIG */ |
2454 | static int module_sig_check(struct load_info *info, | 2472 | static int module_sig_check(struct load_info *info) |
2455 | void *mod, unsigned long *len) | ||
2456 | { | 2473 | { |
2457 | return 0; | 2474 | return 0; |
2458 | } | 2475 | } |
2459 | #endif /* !CONFIG_MODULE_SIG */ | 2476 | #endif /* !CONFIG_MODULE_SIG */ |
2460 | 2477 | ||
2461 | /* Sets info->hdr, info->len and info->sig_ok. */ | 2478 | /* Sanity checks against invalid binaries, wrong arch, weird elf version. */ |
2462 | static int copy_and_check(struct load_info *info, | 2479 | static int elf_header_check(struct load_info *info) |
2463 | const void __user *umod, unsigned long len, | 2480 | { |
2464 | const char __user *uargs) | 2481 | if (info->len < sizeof(*(info->hdr))) |
2482 | return -ENOEXEC; | ||
2483 | |||
2484 | if (memcmp(info->hdr->e_ident, ELFMAG, SELFMAG) != 0 | ||
2485 | || info->hdr->e_type != ET_REL | ||
2486 | || !elf_check_arch(info->hdr) | ||
2487 | || info->hdr->e_shentsize != sizeof(Elf_Shdr)) | ||
2488 | return -ENOEXEC; | ||
2489 | |||
2490 | if (info->hdr->e_shoff >= info->len | ||
2491 | || (info->hdr->e_shnum * sizeof(Elf_Shdr) > | ||
2492 | info->len - info->hdr->e_shoff)) | ||
2493 | return -ENOEXEC; | ||
2494 | |||
2495 | return 0; | ||
2496 | } | ||
2497 | |||
2498 | /* Sets info->hdr and info->len. */ | ||
2499 | static int copy_module_from_user(const void __user *umod, unsigned long len, | ||
2500 | struct load_info *info) | ||
2465 | { | 2501 | { |
2466 | int err; | 2502 | int err; |
2467 | Elf_Ehdr *hdr; | ||
2468 | 2503 | ||
2469 | if (len < sizeof(*hdr)) | 2504 | info->len = len; |
2505 | if (info->len < sizeof(*(info->hdr))) | ||
2470 | return -ENOEXEC; | 2506 | return -ENOEXEC; |
2471 | 2507 | ||
2508 | err = security_kernel_module_from_file(NULL); | ||
2509 | if (err) | ||
2510 | return err; | ||
2511 | |||
2472 | /* Suck in entire file: we'll want most of it. */ | 2512 | /* Suck in entire file: we'll want most of it. */ |
2473 | if ((hdr = vmalloc(len)) == NULL) | 2513 | info->hdr = vmalloc(info->len); |
2514 | if (!info->hdr) | ||
2474 | return -ENOMEM; | 2515 | return -ENOMEM; |
2475 | 2516 | ||
2476 | if (copy_from_user(hdr, umod, len) != 0) { | 2517 | if (copy_from_user(info->hdr, umod, info->len) != 0) { |
2477 | err = -EFAULT; | 2518 | vfree(info->hdr); |
2478 | goto free_hdr; | 2519 | return -EFAULT; |
2479 | } | 2520 | } |
2480 | 2521 | ||
2481 | err = module_sig_check(info, hdr, &len); | 2522 | return 0; |
2523 | } | ||
2524 | |||
2525 | /* Sets info->hdr and info->len. */ | ||
2526 | static int copy_module_from_fd(int fd, struct load_info *info) | ||
2527 | { | ||
2528 | struct file *file; | ||
2529 | int err; | ||
2530 | struct kstat stat; | ||
2531 | loff_t pos; | ||
2532 | ssize_t bytes = 0; | ||
2533 | |||
2534 | file = fget(fd); | ||
2535 | if (!file) | ||
2536 | return -ENOEXEC; | ||
2537 | |||
2538 | err = security_kernel_module_from_file(file); | ||
2539 | if (err) | ||
2540 | goto out; | ||
2541 | |||
2542 | err = vfs_getattr(file->f_vfsmnt, file->f_dentry, &stat); | ||
2482 | if (err) | 2543 | if (err) |
2483 | goto free_hdr; | 2544 | goto out; |
2484 | 2545 | ||
2485 | /* Sanity checks against insmoding binaries or wrong arch, | 2546 | if (stat.size > INT_MAX) { |
2486 | weird elf version */ | 2547 | err = -EFBIG; |
2487 | if (memcmp(hdr->e_ident, ELFMAG, SELFMAG) != 0 | 2548 | goto out; |
2488 | || hdr->e_type != ET_REL | ||
2489 | || !elf_check_arch(hdr) | ||
2490 | || hdr->e_shentsize != sizeof(Elf_Shdr)) { | ||
2491 | err = -ENOEXEC; | ||
2492 | goto free_hdr; | ||
2493 | } | 2549 | } |
2494 | 2550 | ||
2495 | if (hdr->e_shoff >= len || | 2551 | /* Don't hand 0 to vmalloc, it whines. */ |
2496 | hdr->e_shnum * sizeof(Elf_Shdr) > len - hdr->e_shoff) { | 2552 | if (stat.size == 0) { |
2497 | err = -ENOEXEC; | 2553 | err = -EINVAL; |
2498 | goto free_hdr; | 2554 | goto out; |
2499 | } | 2555 | } |
2500 | 2556 | ||
2501 | info->hdr = hdr; | 2557 | info->hdr = vmalloc(stat.size); |
2502 | info->len = len; | 2558 | if (!info->hdr) { |
2503 | return 0; | 2559 | err = -ENOMEM; |
2560 | goto out; | ||
2561 | } | ||
2562 | |||
2563 | pos = 0; | ||
2564 | while (pos < stat.size) { | ||
2565 | bytes = kernel_read(file, pos, (char *)(info->hdr) + pos, | ||
2566 | stat.size - pos); | ||
2567 | if (bytes < 0) { | ||
2568 | vfree(info->hdr); | ||
2569 | err = bytes; | ||
2570 | goto out; | ||
2571 | } | ||
2572 | if (bytes == 0) | ||
2573 | break; | ||
2574 | pos += bytes; | ||
2575 | } | ||
2576 | info->len = pos; | ||
2504 | 2577 | ||
2505 | free_hdr: | 2578 | out: |
2506 | vfree(hdr); | 2579 | fput(file); |
2507 | return err; | 2580 | return err; |
2508 | } | 2581 | } |
2509 | 2582 | ||
@@ -2512,7 +2585,7 @@ static void free_copy(struct load_info *info) | |||
2512 | vfree(info->hdr); | 2585 | vfree(info->hdr); |
2513 | } | 2586 | } |
2514 | 2587 | ||
2515 | static int rewrite_section_headers(struct load_info *info) | 2588 | static int rewrite_section_headers(struct load_info *info, int flags) |
2516 | { | 2589 | { |
2517 | unsigned int i; | 2590 | unsigned int i; |
2518 | 2591 | ||
@@ -2540,7 +2613,10 @@ static int rewrite_section_headers(struct load_info *info) | |||
2540 | } | 2613 | } |
2541 | 2614 | ||
2542 | /* Track but don't keep modinfo and version sections. */ | 2615 | /* Track but don't keep modinfo and version sections. */ |
2543 | info->index.vers = find_sec(info, "__versions"); | 2616 | if (flags & MODULE_INIT_IGNORE_MODVERSIONS) |
2617 | info->index.vers = 0; /* Pretend no __versions section! */ | ||
2618 | else | ||
2619 | info->index.vers = find_sec(info, "__versions"); | ||
2544 | info->index.info = find_sec(info, ".modinfo"); | 2620 | info->index.info = find_sec(info, ".modinfo"); |
2545 | info->sechdrs[info->index.info].sh_flags &= ~(unsigned long)SHF_ALLOC; | 2621 | info->sechdrs[info->index.info].sh_flags &= ~(unsigned long)SHF_ALLOC; |
2546 | info->sechdrs[info->index.vers].sh_flags &= ~(unsigned long)SHF_ALLOC; | 2622 | info->sechdrs[info->index.vers].sh_flags &= ~(unsigned long)SHF_ALLOC; |
@@ -2555,7 +2631,7 @@ static int rewrite_section_headers(struct load_info *info) | |||
2555 | * Return the temporary module pointer (we'll replace it with the final | 2631 | * Return the temporary module pointer (we'll replace it with the final |
2556 | * one when we move the module sections around). | 2632 | * one when we move the module sections around). |
2557 | */ | 2633 | */ |
2558 | static struct module *setup_load_info(struct load_info *info) | 2634 | static struct module *setup_load_info(struct load_info *info, int flags) |
2559 | { | 2635 | { |
2560 | unsigned int i; | 2636 | unsigned int i; |
2561 | int err; | 2637 | int err; |
@@ -2566,7 +2642,7 @@ static struct module *setup_load_info(struct load_info *info) | |||
2566 | info->secstrings = (void *)info->hdr | 2642 | info->secstrings = (void *)info->hdr |
2567 | + info->sechdrs[info->hdr->e_shstrndx].sh_offset; | 2643 | + info->sechdrs[info->hdr->e_shstrndx].sh_offset; |
2568 | 2644 | ||
2569 | err = rewrite_section_headers(info); | 2645 | err = rewrite_section_headers(info, flags); |
2570 | if (err) | 2646 | if (err) |
2571 | return ERR_PTR(err); | 2647 | return ERR_PTR(err); |
2572 | 2648 | ||
@@ -2604,11 +2680,14 @@ static struct module *setup_load_info(struct load_info *info) | |||
2604 | return mod; | 2680 | return mod; |
2605 | } | 2681 | } |
2606 | 2682 | ||
2607 | static int check_modinfo(struct module *mod, struct load_info *info) | 2683 | static int check_modinfo(struct module *mod, struct load_info *info, int flags) |
2608 | { | 2684 | { |
2609 | const char *modmagic = get_modinfo(info, "vermagic"); | 2685 | const char *modmagic = get_modinfo(info, "vermagic"); |
2610 | int err; | 2686 | int err; |
2611 | 2687 | ||
2688 | if (flags & MODULE_INIT_IGNORE_VERMAGIC) | ||
2689 | modmagic = NULL; | ||
2690 | |||
2612 | /* This is allowed: modprobe --force will invalidate it. */ | 2691 | /* This is allowed: modprobe --force will invalidate it. */ |
2613 | if (!modmagic) { | 2692 | if (!modmagic) { |
2614 | err = try_to_force_load(mod, "bad vermagic"); | 2693 | err = try_to_force_load(mod, "bad vermagic"); |
@@ -2738,20 +2817,23 @@ static int move_module(struct module *mod, struct load_info *info) | |||
2738 | memset(ptr, 0, mod->core_size); | 2817 | memset(ptr, 0, mod->core_size); |
2739 | mod->module_core = ptr; | 2818 | mod->module_core = ptr; |
2740 | 2819 | ||
2741 | ptr = module_alloc_update_bounds(mod->init_size); | 2820 | if (mod->init_size) { |
2742 | /* | 2821 | ptr = module_alloc_update_bounds(mod->init_size); |
2743 | * The pointer to this block is stored in the module structure | 2822 | /* |
2744 | * which is inside the block. This block doesn't need to be | 2823 | * The pointer to this block is stored in the module structure |
2745 | * scanned as it contains data and code that will be freed | 2824 | * which is inside the block. This block doesn't need to be |
2746 | * after the module is initialized. | 2825 | * scanned as it contains data and code that will be freed |
2747 | */ | 2826 | * after the module is initialized. |
2748 | kmemleak_ignore(ptr); | 2827 | */ |
2749 | if (!ptr && mod->init_size) { | 2828 | kmemleak_ignore(ptr); |
2750 | module_free(mod, mod->module_core); | 2829 | if (!ptr) { |
2751 | return -ENOMEM; | 2830 | module_free(mod, mod->module_core); |
2752 | } | 2831 | return -ENOMEM; |
2753 | memset(ptr, 0, mod->init_size); | 2832 | } |
2754 | mod->module_init = ptr; | 2833 | memset(ptr, 0, mod->init_size); |
2834 | mod->module_init = ptr; | ||
2835 | } else | ||
2836 | mod->module_init = NULL; | ||
2755 | 2837 | ||
2756 | /* Transfer each section which specifies SHF_ALLOC */ | 2838 | /* Transfer each section which specifies SHF_ALLOC */ |
2757 | pr_debug("final section addresses:\n"); | 2839 | pr_debug("final section addresses:\n"); |
@@ -2844,18 +2926,18 @@ int __weak module_frob_arch_sections(Elf_Ehdr *hdr, | |||
2844 | return 0; | 2926 | return 0; |
2845 | } | 2927 | } |
2846 | 2928 | ||
2847 | static struct module *layout_and_allocate(struct load_info *info) | 2929 | static struct module *layout_and_allocate(struct load_info *info, int flags) |
2848 | { | 2930 | { |
2849 | /* Module within temporary copy. */ | 2931 | /* Module within temporary copy. */ |
2850 | struct module *mod; | 2932 | struct module *mod; |
2851 | Elf_Shdr *pcpusec; | 2933 | Elf_Shdr *pcpusec; |
2852 | int err; | 2934 | int err; |
2853 | 2935 | ||
2854 | mod = setup_load_info(info); | 2936 | mod = setup_load_info(info, flags); |
2855 | if (IS_ERR(mod)) | 2937 | if (IS_ERR(mod)) |
2856 | return mod; | 2938 | return mod; |
2857 | 2939 | ||
2858 | err = check_modinfo(mod, info); | 2940 | err = check_modinfo(mod, info, flags); |
2859 | if (err) | 2941 | if (err) |
2860 | return ERR_PTR(err); | 2942 | return ERR_PTR(err); |
2861 | 2943 | ||
@@ -2935,40 +3017,181 @@ static bool finished_loading(const char *name) | |||
2935 | bool ret; | 3017 | bool ret; |
2936 | 3018 | ||
2937 | mutex_lock(&module_mutex); | 3019 | mutex_lock(&module_mutex); |
2938 | mod = find_module(name); | 3020 | mod = find_module_all(name, true); |
2939 | ret = !mod || mod->state != MODULE_STATE_COMING; | 3021 | ret = !mod || mod->state == MODULE_STATE_LIVE |
3022 | || mod->state == MODULE_STATE_GOING; | ||
2940 | mutex_unlock(&module_mutex); | 3023 | mutex_unlock(&module_mutex); |
2941 | 3024 | ||
2942 | return ret; | 3025 | return ret; |
2943 | } | 3026 | } |
2944 | 3027 | ||
3028 | /* Call module constructors. */ | ||
3029 | static void do_mod_ctors(struct module *mod) | ||
3030 | { | ||
3031 | #ifdef CONFIG_CONSTRUCTORS | ||
3032 | unsigned long i; | ||
3033 | |||
3034 | for (i = 0; i < mod->num_ctors; i++) | ||
3035 | mod->ctors[i](); | ||
3036 | #endif | ||
3037 | } | ||
3038 | |||
3039 | /* This is where the real work happens */ | ||
3040 | static int do_init_module(struct module *mod) | ||
3041 | { | ||
3042 | int ret = 0; | ||
3043 | |||
3044 | /* | ||
3045 | * We want to find out whether @mod uses async during init. Clear | ||
3046 | * PF_USED_ASYNC. async_schedule*() will set it. | ||
3047 | */ | ||
3048 | current->flags &= ~PF_USED_ASYNC; | ||
3049 | |||
3050 | blocking_notifier_call_chain(&module_notify_list, | ||
3051 | MODULE_STATE_COMING, mod); | ||
3052 | |||
3053 | /* Set RO and NX regions for core */ | ||
3054 | set_section_ro_nx(mod->module_core, | ||
3055 | mod->core_text_size, | ||
3056 | mod->core_ro_size, | ||
3057 | mod->core_size); | ||
3058 | |||
3059 | /* Set RO and NX regions for init */ | ||
3060 | set_section_ro_nx(mod->module_init, | ||
3061 | mod->init_text_size, | ||
3062 | mod->init_ro_size, | ||
3063 | mod->init_size); | ||
3064 | |||
3065 | do_mod_ctors(mod); | ||
3066 | /* Start the module */ | ||
3067 | if (mod->init != NULL) | ||
3068 | ret = do_one_initcall(mod->init); | ||
3069 | if (ret < 0) { | ||
3070 | /* Init routine failed: abort. Try to protect us from | ||
3071 | buggy refcounters. */ | ||
3072 | mod->state = MODULE_STATE_GOING; | ||
3073 | synchronize_sched(); | ||
3074 | module_put(mod); | ||
3075 | blocking_notifier_call_chain(&module_notify_list, | ||
3076 | MODULE_STATE_GOING, mod); | ||
3077 | free_module(mod); | ||
3078 | wake_up_all(&module_wq); | ||
3079 | return ret; | ||
3080 | } | ||
3081 | if (ret > 0) { | ||
3082 | printk(KERN_WARNING | ||
3083 | "%s: '%s'->init suspiciously returned %d, it should follow 0/-E convention\n" | ||
3084 | "%s: loading module anyway...\n", | ||
3085 | __func__, mod->name, ret, | ||
3086 | __func__); | ||
3087 | dump_stack(); | ||
3088 | } | ||
3089 | |||
3090 | /* Now it's a first class citizen! */ | ||
3091 | mod->state = MODULE_STATE_LIVE; | ||
3092 | blocking_notifier_call_chain(&module_notify_list, | ||
3093 | MODULE_STATE_LIVE, mod); | ||
3094 | |||
3095 | /* | ||
3096 | * We need to finish all async code before the module init sequence | ||
3097 | * is done. This has potential to deadlock. For example, a newly | ||
3098 | * detected block device can trigger request_module() of the | ||
3099 | * default iosched from async probing task. Once userland helper | ||
3100 | * reaches here, async_synchronize_full() will wait on the async | ||
3101 | * task waiting on request_module() and deadlock. | ||
3102 | * | ||
3103 | * This deadlock is avoided by perfomring async_synchronize_full() | ||
3104 | * iff module init queued any async jobs. This isn't a full | ||
3105 | * solution as it will deadlock the same if module loading from | ||
3106 | * async jobs nests more than once; however, due to the various | ||
3107 | * constraints, this hack seems to be the best option for now. | ||
3108 | * Please refer to the following thread for details. | ||
3109 | * | ||
3110 | * http://thread.gmane.org/gmane.linux.kernel/1420814 | ||
3111 | */ | ||
3112 | if (current->flags & PF_USED_ASYNC) | ||
3113 | async_synchronize_full(); | ||
3114 | |||
3115 | mutex_lock(&module_mutex); | ||
3116 | /* Drop initial reference. */ | ||
3117 | module_put(mod); | ||
3118 | trim_init_extable(mod); | ||
3119 | #ifdef CONFIG_KALLSYMS | ||
3120 | mod->num_symtab = mod->core_num_syms; | ||
3121 | mod->symtab = mod->core_symtab; | ||
3122 | mod->strtab = mod->core_strtab; | ||
3123 | #endif | ||
3124 | unset_module_init_ro_nx(mod); | ||
3125 | module_free(mod, mod->module_init); | ||
3126 | mod->module_init = NULL; | ||
3127 | mod->init_size = 0; | ||
3128 | mod->init_ro_size = 0; | ||
3129 | mod->init_text_size = 0; | ||
3130 | mutex_unlock(&module_mutex); | ||
3131 | wake_up_all(&module_wq); | ||
3132 | |||
3133 | return 0; | ||
3134 | } | ||
3135 | |||
3136 | static int may_init_module(void) | ||
3137 | { | ||
3138 | if (!capable(CAP_SYS_MODULE) || modules_disabled) | ||
3139 | return -EPERM; | ||
3140 | |||
3141 | return 0; | ||
3142 | } | ||
3143 | |||
2945 | /* Allocate and load the module: note that size of section 0 is always | 3144 | /* Allocate and load the module: note that size of section 0 is always |
2946 | zero, and we rely on this for optional sections. */ | 3145 | zero, and we rely on this for optional sections. */ |
2947 | static struct module *load_module(void __user *umod, | 3146 | static int load_module(struct load_info *info, const char __user *uargs, |
2948 | unsigned long len, | 3147 | int flags) |
2949 | const char __user *uargs) | ||
2950 | { | 3148 | { |
2951 | struct load_info info = { NULL, }; | ||
2952 | struct module *mod, *old; | 3149 | struct module *mod, *old; |
2953 | long err; | 3150 | long err; |
2954 | 3151 | ||
2955 | pr_debug("load_module: umod=%p, len=%lu, uargs=%p\n", | 3152 | err = module_sig_check(info); |
2956 | umod, len, uargs); | 3153 | if (err) |
3154 | goto free_copy; | ||
2957 | 3155 | ||
2958 | /* Copy in the blobs from userspace, check they are vaguely sane. */ | 3156 | err = elf_header_check(info); |
2959 | err = copy_and_check(&info, umod, len, uargs); | ||
2960 | if (err) | 3157 | if (err) |
2961 | return ERR_PTR(err); | 3158 | goto free_copy; |
2962 | 3159 | ||
2963 | /* Figure out module layout, and allocate all the memory. */ | 3160 | /* Figure out module layout, and allocate all the memory. */ |
2964 | mod = layout_and_allocate(&info); | 3161 | mod = layout_and_allocate(info, flags); |
2965 | if (IS_ERR(mod)) { | 3162 | if (IS_ERR(mod)) { |
2966 | err = PTR_ERR(mod); | 3163 | err = PTR_ERR(mod); |
2967 | goto free_copy; | 3164 | goto free_copy; |
2968 | } | 3165 | } |
2969 | 3166 | ||
3167 | /* | ||
3168 | * We try to place it in the list now to make sure it's unique | ||
3169 | * before we dedicate too many resources. In particular, | ||
3170 | * temporary percpu memory exhaustion. | ||
3171 | */ | ||
3172 | mod->state = MODULE_STATE_UNFORMED; | ||
3173 | again: | ||
3174 | mutex_lock(&module_mutex); | ||
3175 | if ((old = find_module_all(mod->name, true)) != NULL) { | ||
3176 | if (old->state == MODULE_STATE_COMING | ||
3177 | || old->state == MODULE_STATE_UNFORMED) { | ||
3178 | /* Wait in case it fails to load. */ | ||
3179 | mutex_unlock(&module_mutex); | ||
3180 | err = wait_event_interruptible(module_wq, | ||
3181 | finished_loading(mod->name)); | ||
3182 | if (err) | ||
3183 | goto free_module; | ||
3184 | goto again; | ||
3185 | } | ||
3186 | err = -EEXIST; | ||
3187 | mutex_unlock(&module_mutex); | ||
3188 | goto free_module; | ||
3189 | } | ||
3190 | list_add_rcu(&mod->list, &modules); | ||
3191 | mutex_unlock(&module_mutex); | ||
3192 | |||
2970 | #ifdef CONFIG_MODULE_SIG | 3193 | #ifdef CONFIG_MODULE_SIG |
2971 | mod->sig_ok = info.sig_ok; | 3194 | mod->sig_ok = info->sig_ok; |
2972 | if (!mod->sig_ok) | 3195 | if (!mod->sig_ok) |
2973 | add_taint_module(mod, TAINT_FORCED_MODULE); | 3196 | add_taint_module(mod, TAINT_FORCED_MODULE); |
2974 | #endif | 3197 | #endif |
@@ -2976,29 +3199,29 @@ static struct module *load_module(void __user *umod, | |||
2976 | /* Now module is in final location, initialize linked lists, etc. */ | 3199 | /* Now module is in final location, initialize linked lists, etc. */ |
2977 | err = module_unload_init(mod); | 3200 | err = module_unload_init(mod); |
2978 | if (err) | 3201 | if (err) |
2979 | goto free_module; | 3202 | goto unlink_mod; |
2980 | 3203 | ||
2981 | /* Now we've got everything in the final locations, we can | 3204 | /* Now we've got everything in the final locations, we can |
2982 | * find optional sections. */ | 3205 | * find optional sections. */ |
2983 | find_module_sections(mod, &info); | 3206 | find_module_sections(mod, info); |
2984 | 3207 | ||
2985 | err = check_module_license_and_versions(mod); | 3208 | err = check_module_license_and_versions(mod); |
2986 | if (err) | 3209 | if (err) |
2987 | goto free_unload; | 3210 | goto free_unload; |
2988 | 3211 | ||
2989 | /* Set up MODINFO_ATTR fields */ | 3212 | /* Set up MODINFO_ATTR fields */ |
2990 | setup_modinfo(mod, &info); | 3213 | setup_modinfo(mod, info); |
2991 | 3214 | ||
2992 | /* Fix up syms, so that st_value is a pointer to location. */ | 3215 | /* Fix up syms, so that st_value is a pointer to location. */ |
2993 | err = simplify_symbols(mod, &info); | 3216 | err = simplify_symbols(mod, info); |
2994 | if (err < 0) | 3217 | if (err < 0) |
2995 | goto free_modinfo; | 3218 | goto free_modinfo; |
2996 | 3219 | ||
2997 | err = apply_relocations(mod, &info); | 3220 | err = apply_relocations(mod, info); |
2998 | if (err < 0) | 3221 | if (err < 0) |
2999 | goto free_modinfo; | 3222 | goto free_modinfo; |
3000 | 3223 | ||
3001 | err = post_relocation(mod, &info); | 3224 | err = post_relocation(mod, info); |
3002 | if (err < 0) | 3225 | if (err < 0) |
3003 | goto free_modinfo; | 3226 | goto free_modinfo; |
3004 | 3227 | ||
@@ -3011,72 +3234,49 @@ static struct module *load_module(void __user *umod, | |||
3011 | goto free_arch_cleanup; | 3234 | goto free_arch_cleanup; |
3012 | } | 3235 | } |
3013 | 3236 | ||
3014 | /* Mark state as coming so strong_try_module_get() ignores us. */ | 3237 | dynamic_debug_setup(info->debug, info->num_debug); |
3015 | mod->state = MODULE_STATE_COMING; | ||
3016 | 3238 | ||
3017 | /* Now sew it into the lists so we can get lockdep and oops | ||
3018 | * info during argument parsing. No one should access us, since | ||
3019 | * strong_try_module_get() will fail. | ||
3020 | * lockdep/oops can run asynchronous, so use the RCU list insertion | ||
3021 | * function to insert in a way safe to concurrent readers. | ||
3022 | * The mutex protects against concurrent writers. | ||
3023 | */ | ||
3024 | again: | ||
3025 | mutex_lock(&module_mutex); | 3239 | mutex_lock(&module_mutex); |
3026 | if ((old = find_module(mod->name)) != NULL) { | 3240 | /* Find duplicate symbols (must be called under lock). */ |
3027 | if (old->state == MODULE_STATE_COMING) { | ||
3028 | /* Wait in case it fails to load. */ | ||
3029 | mutex_unlock(&module_mutex); | ||
3030 | err = wait_event_interruptible(module_wq, | ||
3031 | finished_loading(mod->name)); | ||
3032 | if (err) | ||
3033 | goto free_arch_cleanup; | ||
3034 | goto again; | ||
3035 | } | ||
3036 | err = -EEXIST; | ||
3037 | goto unlock; | ||
3038 | } | ||
3039 | |||
3040 | /* This has to be done once we're sure module name is unique. */ | ||
3041 | dynamic_debug_setup(info.debug, info.num_debug); | ||
3042 | |||
3043 | /* Find duplicate symbols */ | ||
3044 | err = verify_export_symbols(mod); | 3241 | err = verify_export_symbols(mod); |
3045 | if (err < 0) | 3242 | if (err < 0) |
3046 | goto ddebug; | 3243 | goto ddebug_cleanup; |
3244 | |||
3245 | /* This relies on module_mutex for list integrity. */ | ||
3246 | module_bug_finalize(info->hdr, info->sechdrs, mod); | ||
3247 | |||
3248 | /* Mark state as coming so strong_try_module_get() ignores us, | ||
3249 | * but kallsyms etc. can see us. */ | ||
3250 | mod->state = MODULE_STATE_COMING; | ||
3047 | 3251 | ||
3048 | module_bug_finalize(info.hdr, info.sechdrs, mod); | ||
3049 | list_add_rcu(&mod->list, &modules); | ||
3050 | mutex_unlock(&module_mutex); | 3252 | mutex_unlock(&module_mutex); |
3051 | 3253 | ||
3052 | /* Module is ready to execute: parsing args may do that. */ | 3254 | /* Module is ready to execute: parsing args may do that. */ |
3053 | err = parse_args(mod->name, mod->args, mod->kp, mod->num_kp, | 3255 | err = parse_args(mod->name, mod->args, mod->kp, mod->num_kp, |
3054 | -32768, 32767, &ddebug_dyndbg_module_param_cb); | 3256 | -32768, 32767, &ddebug_dyndbg_module_param_cb); |
3055 | if (err < 0) | 3257 | if (err < 0) |
3056 | goto unlink; | 3258 | goto bug_cleanup; |
3057 | 3259 | ||
3058 | /* Link in to syfs. */ | 3260 | /* Link in to syfs. */ |
3059 | err = mod_sysfs_setup(mod, &info, mod->kp, mod->num_kp); | 3261 | err = mod_sysfs_setup(mod, info, mod->kp, mod->num_kp); |
3060 | if (err < 0) | 3262 | if (err < 0) |
3061 | goto unlink; | 3263 | goto bug_cleanup; |
3062 | 3264 | ||
3063 | /* Get rid of temporary copy. */ | 3265 | /* Get rid of temporary copy. */ |
3064 | free_copy(&info); | 3266 | free_copy(info); |
3065 | 3267 | ||
3066 | /* Done! */ | 3268 | /* Done! */ |
3067 | trace_module_load(mod); | 3269 | trace_module_load(mod); |
3068 | return mod; | ||
3069 | 3270 | ||
3070 | unlink: | 3271 | return do_init_module(mod); |
3272 | |||
3273 | bug_cleanup: | ||
3274 | /* module_bug_cleanup needs module_mutex protection */ | ||
3071 | mutex_lock(&module_mutex); | 3275 | mutex_lock(&module_mutex); |
3072 | /* Unlink carefully: kallsyms could be walking list. */ | ||
3073 | list_del_rcu(&mod->list); | ||
3074 | module_bug_cleanup(mod); | 3276 | module_bug_cleanup(mod); |
3075 | wake_up_all(&module_wq); | 3277 | ddebug_cleanup: |
3076 | ddebug: | ||
3077 | dynamic_debug_remove(info.debug); | ||
3078 | unlock: | ||
3079 | mutex_unlock(&module_mutex); | 3278 | mutex_unlock(&module_mutex); |
3279 | dynamic_debug_remove(info->debug); | ||
3080 | synchronize_sched(); | 3280 | synchronize_sched(); |
3081 | kfree(mod->args); | 3281 | kfree(mod->args); |
3082 | free_arch_cleanup: | 3282 | free_arch_cleanup: |
@@ -3085,107 +3285,59 @@ again: | |||
3085 | free_modinfo(mod); | 3285 | free_modinfo(mod); |
3086 | free_unload: | 3286 | free_unload: |
3087 | module_unload_free(mod); | 3287 | module_unload_free(mod); |
3288 | unlink_mod: | ||
3289 | mutex_lock(&module_mutex); | ||
3290 | /* Unlink carefully: kallsyms could be walking list. */ | ||
3291 | list_del_rcu(&mod->list); | ||
3292 | wake_up_all(&module_wq); | ||
3293 | mutex_unlock(&module_mutex); | ||
3088 | free_module: | 3294 | free_module: |
3089 | module_deallocate(mod, &info); | 3295 | module_deallocate(mod, info); |
3090 | free_copy: | 3296 | free_copy: |
3091 | free_copy(&info); | 3297 | free_copy(info); |
3092 | return ERR_PTR(err); | 3298 | return err; |
3093 | } | ||
3094 | |||
3095 | /* Call module constructors. */ | ||
3096 | static void do_mod_ctors(struct module *mod) | ||
3097 | { | ||
3098 | #ifdef CONFIG_CONSTRUCTORS | ||
3099 | unsigned long i; | ||
3100 | |||
3101 | for (i = 0; i < mod->num_ctors; i++) | ||
3102 | mod->ctors[i](); | ||
3103 | #endif | ||
3104 | } | 3299 | } |
3105 | 3300 | ||
3106 | /* This is where the real work happens */ | ||
3107 | SYSCALL_DEFINE3(init_module, void __user *, umod, | 3301 | SYSCALL_DEFINE3(init_module, void __user *, umod, |
3108 | unsigned long, len, const char __user *, uargs) | 3302 | unsigned long, len, const char __user *, uargs) |
3109 | { | 3303 | { |
3110 | struct module *mod; | 3304 | int err; |
3111 | int ret = 0; | 3305 | struct load_info info = { }; |
3112 | 3306 | ||
3113 | /* Must have permission */ | 3307 | err = may_init_module(); |
3114 | if (!capable(CAP_SYS_MODULE) || modules_disabled) | 3308 | if (err) |
3115 | return -EPERM; | 3309 | return err; |
3116 | 3310 | ||
3117 | /* Do all the hard work */ | 3311 | pr_debug("init_module: umod=%p, len=%lu, uargs=%p\n", |
3118 | mod = load_module(umod, len, uargs); | 3312 | umod, len, uargs); |
3119 | if (IS_ERR(mod)) | ||
3120 | return PTR_ERR(mod); | ||
3121 | 3313 | ||
3122 | blocking_notifier_call_chain(&module_notify_list, | 3314 | err = copy_module_from_user(umod, len, &info); |
3123 | MODULE_STATE_COMING, mod); | 3315 | if (err) |
3316 | return err; | ||
3124 | 3317 | ||
3125 | /* Set RO and NX regions for core */ | 3318 | return load_module(&info, uargs, 0); |
3126 | set_section_ro_nx(mod->module_core, | 3319 | } |
3127 | mod->core_text_size, | ||
3128 | mod->core_ro_size, | ||
3129 | mod->core_size); | ||
3130 | 3320 | ||
3131 | /* Set RO and NX regions for init */ | 3321 | SYSCALL_DEFINE3(finit_module, int, fd, const char __user *, uargs, int, flags) |
3132 | set_section_ro_nx(mod->module_init, | 3322 | { |
3133 | mod->init_text_size, | 3323 | int err; |
3134 | mod->init_ro_size, | 3324 | struct load_info info = { }; |
3135 | mod->init_size); | ||
3136 | 3325 | ||
3137 | do_mod_ctors(mod); | 3326 | err = may_init_module(); |
3138 | /* Start the module */ | 3327 | if (err) |
3139 | if (mod->init != NULL) | 3328 | return err; |
3140 | ret = do_one_initcall(mod->init); | ||
3141 | if (ret < 0) { | ||
3142 | /* Init routine failed: abort. Try to protect us from | ||
3143 | buggy refcounters. */ | ||
3144 | mod->state = MODULE_STATE_GOING; | ||
3145 | synchronize_sched(); | ||
3146 | module_put(mod); | ||
3147 | blocking_notifier_call_chain(&module_notify_list, | ||
3148 | MODULE_STATE_GOING, mod); | ||
3149 | free_module(mod); | ||
3150 | wake_up_all(&module_wq); | ||
3151 | return ret; | ||
3152 | } | ||
3153 | if (ret > 0) { | ||
3154 | printk(KERN_WARNING | ||
3155 | "%s: '%s'->init suspiciously returned %d, it should follow 0/-E convention\n" | ||
3156 | "%s: loading module anyway...\n", | ||
3157 | __func__, mod->name, ret, | ||
3158 | __func__); | ||
3159 | dump_stack(); | ||
3160 | } | ||
3161 | 3329 | ||
3162 | /* Now it's a first class citizen! */ | 3330 | pr_debug("finit_module: fd=%d, uargs=%p, flags=%i\n", fd, uargs, flags); |
3163 | mod->state = MODULE_STATE_LIVE; | ||
3164 | blocking_notifier_call_chain(&module_notify_list, | ||
3165 | MODULE_STATE_LIVE, mod); | ||
3166 | 3331 | ||
3167 | /* We need to finish all async code before the module init sequence is done */ | 3332 | if (flags & ~(MODULE_INIT_IGNORE_MODVERSIONS |
3168 | async_synchronize_full(); | 3333 | |MODULE_INIT_IGNORE_VERMAGIC)) |
3334 | return -EINVAL; | ||
3169 | 3335 | ||
3170 | mutex_lock(&module_mutex); | 3336 | err = copy_module_from_fd(fd, &info); |
3171 | /* Drop initial reference. */ | 3337 | if (err) |
3172 | module_put(mod); | 3338 | return err; |
3173 | trim_init_extable(mod); | ||
3174 | #ifdef CONFIG_KALLSYMS | ||
3175 | mod->num_symtab = mod->core_num_syms; | ||
3176 | mod->symtab = mod->core_symtab; | ||
3177 | mod->strtab = mod->core_strtab; | ||
3178 | #endif | ||
3179 | unset_module_init_ro_nx(mod); | ||
3180 | module_free(mod, mod->module_init); | ||
3181 | mod->module_init = NULL; | ||
3182 | mod->init_size = 0; | ||
3183 | mod->init_ro_size = 0; | ||
3184 | mod->init_text_size = 0; | ||
3185 | mutex_unlock(&module_mutex); | ||
3186 | wake_up_all(&module_wq); | ||
3187 | 3339 | ||
3188 | return 0; | 3340 | return load_module(&info, uargs, flags); |
3189 | } | 3341 | } |
3190 | 3342 | ||
3191 | static inline int within(unsigned long addr, void *start, unsigned long size) | 3343 | static inline int within(unsigned long addr, void *start, unsigned long size) |
@@ -3261,6 +3413,8 @@ const char *module_address_lookup(unsigned long addr, | |||
3261 | 3413 | ||
3262 | preempt_disable(); | 3414 | preempt_disable(); |
3263 | list_for_each_entry_rcu(mod, &modules, list) { | 3415 | list_for_each_entry_rcu(mod, &modules, list) { |
3416 | if (mod->state == MODULE_STATE_UNFORMED) | ||
3417 | continue; | ||
3264 | if (within_module_init(addr, mod) || | 3418 | if (within_module_init(addr, mod) || |
3265 | within_module_core(addr, mod)) { | 3419 | within_module_core(addr, mod)) { |
3266 | if (modname) | 3420 | if (modname) |
@@ -3284,6 +3438,8 @@ int lookup_module_symbol_name(unsigned long addr, char *symname) | |||
3284 | 3438 | ||
3285 | preempt_disable(); | 3439 | preempt_disable(); |
3286 | list_for_each_entry_rcu(mod, &modules, list) { | 3440 | list_for_each_entry_rcu(mod, &modules, list) { |
3441 | if (mod->state == MODULE_STATE_UNFORMED) | ||
3442 | continue; | ||
3287 | if (within_module_init(addr, mod) || | 3443 | if (within_module_init(addr, mod) || |
3288 | within_module_core(addr, mod)) { | 3444 | within_module_core(addr, mod)) { |
3289 | const char *sym; | 3445 | const char *sym; |
@@ -3308,6 +3464,8 @@ int lookup_module_symbol_attrs(unsigned long addr, unsigned long *size, | |||
3308 | 3464 | ||
3309 | preempt_disable(); | 3465 | preempt_disable(); |
3310 | list_for_each_entry_rcu(mod, &modules, list) { | 3466 | list_for_each_entry_rcu(mod, &modules, list) { |
3467 | if (mod->state == MODULE_STATE_UNFORMED) | ||
3468 | continue; | ||
3311 | if (within_module_init(addr, mod) || | 3469 | if (within_module_init(addr, mod) || |
3312 | within_module_core(addr, mod)) { | 3470 | within_module_core(addr, mod)) { |
3313 | const char *sym; | 3471 | const char *sym; |
@@ -3335,6 +3493,8 @@ int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type, | |||
3335 | 3493 | ||
3336 | preempt_disable(); | 3494 | preempt_disable(); |
3337 | list_for_each_entry_rcu(mod, &modules, list) { | 3495 | list_for_each_entry_rcu(mod, &modules, list) { |
3496 | if (mod->state == MODULE_STATE_UNFORMED) | ||
3497 | continue; | ||
3338 | if (symnum < mod->num_symtab) { | 3498 | if (symnum < mod->num_symtab) { |
3339 | *value = mod->symtab[symnum].st_value; | 3499 | *value = mod->symtab[symnum].st_value; |
3340 | *type = mod->symtab[symnum].st_info; | 3500 | *type = mod->symtab[symnum].st_info; |
@@ -3377,9 +3537,12 @@ unsigned long module_kallsyms_lookup_name(const char *name) | |||
3377 | ret = mod_find_symname(mod, colon+1); | 3537 | ret = mod_find_symname(mod, colon+1); |
3378 | *colon = ':'; | 3538 | *colon = ':'; |
3379 | } else { | 3539 | } else { |
3380 | list_for_each_entry_rcu(mod, &modules, list) | 3540 | list_for_each_entry_rcu(mod, &modules, list) { |
3541 | if (mod->state == MODULE_STATE_UNFORMED) | ||
3542 | continue; | ||
3381 | if ((ret = mod_find_symname(mod, name)) != 0) | 3543 | if ((ret = mod_find_symname(mod, name)) != 0) |
3382 | break; | 3544 | break; |
3545 | } | ||
3383 | } | 3546 | } |
3384 | preempt_enable(); | 3547 | preempt_enable(); |
3385 | return ret; | 3548 | return ret; |
@@ -3394,6 +3557,8 @@ int module_kallsyms_on_each_symbol(int (*fn)(void *, const char *, | |||
3394 | int ret; | 3557 | int ret; |
3395 | 3558 | ||
3396 | list_for_each_entry(mod, &modules, list) { | 3559 | list_for_each_entry(mod, &modules, list) { |
3560 | if (mod->state == MODULE_STATE_UNFORMED) | ||
3561 | continue; | ||
3397 | for (i = 0; i < mod->num_symtab; i++) { | 3562 | for (i = 0; i < mod->num_symtab; i++) { |
3398 | ret = fn(data, mod->strtab + mod->symtab[i].st_name, | 3563 | ret = fn(data, mod->strtab + mod->symtab[i].st_name, |
3399 | mod, mod->symtab[i].st_value); | 3564 | mod, mod->symtab[i].st_value); |
@@ -3409,6 +3574,7 @@ static char *module_flags(struct module *mod, char *buf) | |||
3409 | { | 3574 | { |
3410 | int bx = 0; | 3575 | int bx = 0; |
3411 | 3576 | ||
3577 | BUG_ON(mod->state == MODULE_STATE_UNFORMED); | ||
3412 | if (mod->taints || | 3578 | if (mod->taints || |
3413 | mod->state == MODULE_STATE_GOING || | 3579 | mod->state == MODULE_STATE_GOING || |
3414 | mod->state == MODULE_STATE_COMING) { | 3580 | mod->state == MODULE_STATE_COMING) { |
@@ -3450,6 +3616,10 @@ static int m_show(struct seq_file *m, void *p) | |||
3450 | struct module *mod = list_entry(p, struct module, list); | 3616 | struct module *mod = list_entry(p, struct module, list); |
3451 | char buf[8]; | 3617 | char buf[8]; |
3452 | 3618 | ||
3619 | /* We always ignore unformed modules. */ | ||
3620 | if (mod->state == MODULE_STATE_UNFORMED) | ||
3621 | return 0; | ||
3622 | |||
3453 | seq_printf(m, "%s %u", | 3623 | seq_printf(m, "%s %u", |
3454 | mod->name, mod->init_size + mod->core_size); | 3624 | mod->name, mod->init_size + mod->core_size); |
3455 | print_unload_info(m, mod); | 3625 | print_unload_info(m, mod); |
@@ -3510,6 +3680,8 @@ const struct exception_table_entry *search_module_extables(unsigned long addr) | |||
3510 | 3680 | ||
3511 | preempt_disable(); | 3681 | preempt_disable(); |
3512 | list_for_each_entry_rcu(mod, &modules, list) { | 3682 | list_for_each_entry_rcu(mod, &modules, list) { |
3683 | if (mod->state == MODULE_STATE_UNFORMED) | ||
3684 | continue; | ||
3513 | if (mod->num_exentries == 0) | 3685 | if (mod->num_exentries == 0) |
3514 | continue; | 3686 | continue; |
3515 | 3687 | ||
@@ -3558,10 +3730,13 @@ struct module *__module_address(unsigned long addr) | |||
3558 | if (addr < module_addr_min || addr > module_addr_max) | 3730 | if (addr < module_addr_min || addr > module_addr_max) |
3559 | return NULL; | 3731 | return NULL; |
3560 | 3732 | ||
3561 | list_for_each_entry_rcu(mod, &modules, list) | 3733 | list_for_each_entry_rcu(mod, &modules, list) { |
3734 | if (mod->state == MODULE_STATE_UNFORMED) | ||
3735 | continue; | ||
3562 | if (within_module_core(addr, mod) | 3736 | if (within_module_core(addr, mod) |
3563 | || within_module_init(addr, mod)) | 3737 | || within_module_init(addr, mod)) |
3564 | return mod; | 3738 | return mod; |
3739 | } | ||
3565 | return NULL; | 3740 | return NULL; |
3566 | } | 3741 | } |
3567 | EXPORT_SYMBOL_GPL(__module_address); | 3742 | EXPORT_SYMBOL_GPL(__module_address); |
@@ -3614,8 +3789,11 @@ void print_modules(void) | |||
3614 | printk(KERN_DEFAULT "Modules linked in:"); | 3789 | printk(KERN_DEFAULT "Modules linked in:"); |
3615 | /* Most callers should already have preempt disabled, but make sure */ | 3790 | /* Most callers should already have preempt disabled, but make sure */ |
3616 | preempt_disable(); | 3791 | preempt_disable(); |
3617 | list_for_each_entry_rcu(mod, &modules, list) | 3792 | list_for_each_entry_rcu(mod, &modules, list) { |
3793 | if (mod->state == MODULE_STATE_UNFORMED) | ||
3794 | continue; | ||
3618 | printk(" %s%s", mod->name, module_flags(mod, buf)); | 3795 | printk(" %s%s", mod->name, module_flags(mod, buf)); |
3796 | } | ||
3619 | preempt_enable(); | 3797 | preempt_enable(); |
3620 | if (last_unloaded_module[0]) | 3798 | if (last_unloaded_module[0]) |
3621 | printk(" [last unloaded: %s]", last_unloaded_module); | 3799 | printk(" [last unloaded: %s]", last_unloaded_module); |
diff --git a/kernel/pid.c b/kernel/pid.c index 36aa02ff17d6..de9af600006f 100644 --- a/kernel/pid.c +++ b/kernel/pid.c | |||
@@ -270,7 +270,6 @@ void free_pid(struct pid *pid) | |||
270 | wake_up_process(ns->child_reaper); | 270 | wake_up_process(ns->child_reaper); |
271 | break; | 271 | break; |
272 | case 0: | 272 | case 0: |
273 | ns->nr_hashed = -1; | ||
274 | schedule_work(&ns->proc_work); | 273 | schedule_work(&ns->proc_work); |
275 | break; | 274 | break; |
276 | } | 275 | } |
@@ -319,7 +318,7 @@ struct pid *alloc_pid(struct pid_namespace *ns) | |||
319 | 318 | ||
320 | upid = pid->numbers + ns->level; | 319 | upid = pid->numbers + ns->level; |
321 | spin_lock_irq(&pidmap_lock); | 320 | spin_lock_irq(&pidmap_lock); |
322 | if (ns->nr_hashed < 0) | 321 | if (!(ns->nr_hashed & PIDNS_HASH_ADDING)) |
323 | goto out_unlock; | 322 | goto out_unlock; |
324 | for ( ; upid >= pid->numbers; --upid) { | 323 | for ( ; upid >= pid->numbers; --upid) { |
325 | hlist_add_head_rcu(&upid->pid_chain, | 324 | hlist_add_head_rcu(&upid->pid_chain, |
@@ -342,6 +341,13 @@ out_free: | |||
342 | goto out; | 341 | goto out; |
343 | } | 342 | } |
344 | 343 | ||
344 | void disable_pid_allocation(struct pid_namespace *ns) | ||
345 | { | ||
346 | spin_lock_irq(&pidmap_lock); | ||
347 | ns->nr_hashed &= ~PIDNS_HASH_ADDING; | ||
348 | spin_unlock_irq(&pidmap_lock); | ||
349 | } | ||
350 | |||
345 | struct pid *find_pid_ns(int nr, struct pid_namespace *ns) | 351 | struct pid *find_pid_ns(int nr, struct pid_namespace *ns) |
346 | { | 352 | { |
347 | struct hlist_node *elem; | 353 | struct hlist_node *elem; |
@@ -573,6 +579,9 @@ void __init pidhash_init(void) | |||
573 | 579 | ||
574 | void __init pidmap_init(void) | 580 | void __init pidmap_init(void) |
575 | { | 581 | { |
582 | /* Veryify no one has done anything silly */ | ||
583 | BUILD_BUG_ON(PID_MAX_LIMIT >= PIDNS_HASH_ADDING); | ||
584 | |||
576 | /* bump default and minimum pid_max based on number of cpus */ | 585 | /* bump default and minimum pid_max based on number of cpus */ |
577 | pid_max = min(pid_max_max, max_t(int, pid_max, | 586 | pid_max = min(pid_max_max, max_t(int, pid_max, |
578 | PIDS_PER_CPU_DEFAULT * num_possible_cpus())); | 587 | PIDS_PER_CPU_DEFAULT * num_possible_cpus())); |
@@ -584,7 +593,7 @@ void __init pidmap_init(void) | |||
584 | /* Reserve PID 0. We never call free_pidmap(0) */ | 593 | /* Reserve PID 0. We never call free_pidmap(0) */ |
585 | set_bit(0, init_pid_ns.pidmap[0].page); | 594 | set_bit(0, init_pid_ns.pidmap[0].page); |
586 | atomic_dec(&init_pid_ns.pidmap[0].nr_free); | 595 | atomic_dec(&init_pid_ns.pidmap[0].nr_free); |
587 | init_pid_ns.nr_hashed = 1; | 596 | init_pid_ns.nr_hashed = PIDNS_HASH_ADDING; |
588 | 597 | ||
589 | init_pid_ns.pid_cachep = KMEM_CACHE(pid, | 598 | init_pid_ns.pid_cachep = KMEM_CACHE(pid, |
590 | SLAB_HWCACHE_ALIGN | SLAB_PANIC); | 599 | SLAB_HWCACHE_ALIGN | SLAB_PANIC); |
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index 560da0dab230..c1c3dc1c6023 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c | |||
@@ -115,6 +115,7 @@ static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns | |||
115 | ns->level = level; | 115 | ns->level = level; |
116 | ns->parent = get_pid_ns(parent_pid_ns); | 116 | ns->parent = get_pid_ns(parent_pid_ns); |
117 | ns->user_ns = get_user_ns(user_ns); | 117 | ns->user_ns = get_user_ns(user_ns); |
118 | ns->nr_hashed = PIDNS_HASH_ADDING; | ||
118 | INIT_WORK(&ns->proc_work, proc_cleanup_work); | 119 | INIT_WORK(&ns->proc_work, proc_cleanup_work); |
119 | 120 | ||
120 | set_bit(0, ns->pidmap[0].page); | 121 | set_bit(0, ns->pidmap[0].page); |
@@ -181,6 +182,9 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns) | |||
181 | int rc; | 182 | int rc; |
182 | struct task_struct *task, *me = current; | 183 | struct task_struct *task, *me = current; |
183 | 184 | ||
185 | /* Don't allow any more processes into the pid namespace */ | ||
186 | disable_pid_allocation(pid_ns); | ||
187 | |||
184 | /* Ignore SIGCHLD causing any terminated children to autoreap */ | 188 | /* Ignore SIGCHLD causing any terminated children to autoreap */ |
185 | spin_lock_irq(&me->sighand->siglock); | 189 | spin_lock_irq(&me->sighand->siglock); |
186 | me->sighand->action[SIGCHLD - 1].sa.sa_handler = SIG_IGN; | 190 | me->sighand->action[SIGCHLD - 1].sa.sa_handler = SIG_IGN; |
@@ -325,7 +329,8 @@ static int pidns_install(struct nsproxy *nsproxy, void *ns) | |||
325 | struct pid_namespace *active = task_active_pid_ns(current); | 329 | struct pid_namespace *active = task_active_pid_ns(current); |
326 | struct pid_namespace *ancestor, *new = ns; | 330 | struct pid_namespace *ancestor, *new = ns; |
327 | 331 | ||
328 | if (!ns_capable(new->user_ns, CAP_SYS_ADMIN)) | 332 | if (!ns_capable(new->user_ns, CAP_SYS_ADMIN) || |
333 | !nsown_capable(CAP_SYS_ADMIN)) | ||
329 | return -EPERM; | 334 | return -EPERM; |
330 | 335 | ||
331 | /* | 336 | /* |
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index d73840271dce..a278cad1d5d6 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c | |||
@@ -9,6 +9,7 @@ | |||
9 | #include <asm/uaccess.h> | 9 | #include <asm/uaccess.h> |
10 | #include <linux/kernel_stat.h> | 10 | #include <linux/kernel_stat.h> |
11 | #include <trace/events/timer.h> | 11 | #include <trace/events/timer.h> |
12 | #include <linux/random.h> | ||
12 | 13 | ||
13 | /* | 14 | /* |
14 | * Called after updating RLIMIT_CPU to run cpu timer and update | 15 | * Called after updating RLIMIT_CPU to run cpu timer and update |
@@ -470,6 +471,8 @@ static void cleanup_timers(struct list_head *head, | |||
470 | */ | 471 | */ |
471 | void posix_cpu_timers_exit(struct task_struct *tsk) | 472 | void posix_cpu_timers_exit(struct task_struct *tsk) |
472 | { | 473 | { |
474 | add_device_randomness((const void*) &tsk->se.sum_exec_runtime, | ||
475 | sizeof(unsigned long long)); | ||
473 | cleanup_timers(tsk->cpu_timers, | 476 | cleanup_timers(tsk->cpu_timers, |
474 | tsk->utime, tsk->stime, tsk->se.sum_exec_runtime); | 477 | tsk->utime, tsk->stime, tsk->se.sum_exec_runtime); |
475 | 478 | ||
diff --git a/kernel/printk.c b/kernel/printk.c index 19c0d7bcf24a..357f714ddd49 100644 --- a/kernel/printk.c +++ b/kernel/printk.c | |||
@@ -870,10 +870,11 @@ static size_t print_time(u64 ts, char *buf) | |||
870 | if (!printk_time) | 870 | if (!printk_time) |
871 | return 0; | 871 | return 0; |
872 | 872 | ||
873 | rem_nsec = do_div(ts, 1000000000); | ||
874 | |||
873 | if (!buf) | 875 | if (!buf) |
874 | return 15; | 876 | return snprintf(NULL, 0, "[%5lu.000000] ", (unsigned long)ts); |
875 | 877 | ||
876 | rem_nsec = do_div(ts, 1000000000); | ||
877 | return sprintf(buf, "[%5lu.%06lu] ", | 878 | return sprintf(buf, "[%5lu.%06lu] ", |
878 | (unsigned long)ts, rem_nsec / 1000); | 879 | (unsigned long)ts, rem_nsec / 1000); |
879 | } | 880 | } |
diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 1599157336a6..6cbeaae4406d 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c | |||
@@ -117,11 +117,45 @@ void __ptrace_unlink(struct task_struct *child) | |||
117 | * TASK_KILLABLE sleeps. | 117 | * TASK_KILLABLE sleeps. |
118 | */ | 118 | */ |
119 | if (child->jobctl & JOBCTL_STOP_PENDING || task_is_traced(child)) | 119 | if (child->jobctl & JOBCTL_STOP_PENDING || task_is_traced(child)) |
120 | signal_wake_up(child, task_is_traced(child)); | 120 | ptrace_signal_wake_up(child, true); |
121 | 121 | ||
122 | spin_unlock(&child->sighand->siglock); | 122 | spin_unlock(&child->sighand->siglock); |
123 | } | 123 | } |
124 | 124 | ||
125 | /* Ensure that nothing can wake it up, even SIGKILL */ | ||
126 | static bool ptrace_freeze_traced(struct task_struct *task) | ||
127 | { | ||
128 | bool ret = false; | ||
129 | |||
130 | /* Lockless, nobody but us can set this flag */ | ||
131 | if (task->jobctl & JOBCTL_LISTENING) | ||
132 | return ret; | ||
133 | |||
134 | spin_lock_irq(&task->sighand->siglock); | ||
135 | if (task_is_traced(task) && !__fatal_signal_pending(task)) { | ||
136 | task->state = __TASK_TRACED; | ||
137 | ret = true; | ||
138 | } | ||
139 | spin_unlock_irq(&task->sighand->siglock); | ||
140 | |||
141 | return ret; | ||
142 | } | ||
143 | |||
144 | static void ptrace_unfreeze_traced(struct task_struct *task) | ||
145 | { | ||
146 | if (task->state != __TASK_TRACED) | ||
147 | return; | ||
148 | |||
149 | WARN_ON(!task->ptrace || task->parent != current); | ||
150 | |||
151 | spin_lock_irq(&task->sighand->siglock); | ||
152 | if (__fatal_signal_pending(task)) | ||
153 | wake_up_state(task, __TASK_TRACED); | ||
154 | else | ||
155 | task->state = TASK_TRACED; | ||
156 | spin_unlock_irq(&task->sighand->siglock); | ||
157 | } | ||
158 | |||
125 | /** | 159 | /** |
126 | * ptrace_check_attach - check whether ptracee is ready for ptrace operation | 160 | * ptrace_check_attach - check whether ptracee is ready for ptrace operation |
127 | * @child: ptracee to check for | 161 | * @child: ptracee to check for |
@@ -139,7 +173,7 @@ void __ptrace_unlink(struct task_struct *child) | |||
139 | * RETURNS: | 173 | * RETURNS: |
140 | * 0 on success, -ESRCH if %child is not ready. | 174 | * 0 on success, -ESRCH if %child is not ready. |
141 | */ | 175 | */ |
142 | int ptrace_check_attach(struct task_struct *child, bool ignore_state) | 176 | static int ptrace_check_attach(struct task_struct *child, bool ignore_state) |
143 | { | 177 | { |
144 | int ret = -ESRCH; | 178 | int ret = -ESRCH; |
145 | 179 | ||
@@ -151,24 +185,29 @@ int ptrace_check_attach(struct task_struct *child, bool ignore_state) | |||
151 | * be changed by us so it's not changing right after this. | 185 | * be changed by us so it's not changing right after this. |
152 | */ | 186 | */ |
153 | read_lock(&tasklist_lock); | 187 | read_lock(&tasklist_lock); |
154 | if ((child->ptrace & PT_PTRACED) && child->parent == current) { | 188 | if (child->ptrace && child->parent == current) { |
189 | WARN_ON(child->state == __TASK_TRACED); | ||
155 | /* | 190 | /* |
156 | * child->sighand can't be NULL, release_task() | 191 | * child->sighand can't be NULL, release_task() |
157 | * does ptrace_unlink() before __exit_signal(). | 192 | * does ptrace_unlink() before __exit_signal(). |
158 | */ | 193 | */ |
159 | spin_lock_irq(&child->sighand->siglock); | 194 | if (ignore_state || ptrace_freeze_traced(child)) |
160 | WARN_ON_ONCE(task_is_stopped(child)); | ||
161 | if (ignore_state || (task_is_traced(child) && | ||
162 | !(child->jobctl & JOBCTL_LISTENING))) | ||
163 | ret = 0; | 195 | ret = 0; |
164 | spin_unlock_irq(&child->sighand->siglock); | ||
165 | } | 196 | } |
166 | read_unlock(&tasklist_lock); | 197 | read_unlock(&tasklist_lock); |
167 | 198 | ||
168 | if (!ret && !ignore_state) | 199 | if (!ret && !ignore_state) { |
169 | ret = wait_task_inactive(child, TASK_TRACED) ? 0 : -ESRCH; | 200 | if (!wait_task_inactive(child, __TASK_TRACED)) { |
201 | /* | ||
202 | * This can only happen if may_ptrace_stop() fails and | ||
203 | * ptrace_stop() changes ->state back to TASK_RUNNING, | ||
204 | * so we should not worry about leaking __TASK_TRACED. | ||
205 | */ | ||
206 | WARN_ON(child->state == __TASK_TRACED); | ||
207 | ret = -ESRCH; | ||
208 | } | ||
209 | } | ||
170 | 210 | ||
171 | /* All systems go.. */ | ||
172 | return ret; | 211 | return ret; |
173 | } | 212 | } |
174 | 213 | ||
@@ -317,7 +356,7 @@ static int ptrace_attach(struct task_struct *task, long request, | |||
317 | */ | 356 | */ |
318 | if (task_is_stopped(task) && | 357 | if (task_is_stopped(task) && |
319 | task_set_jobctl_pending(task, JOBCTL_TRAP_STOP | JOBCTL_TRAPPING)) | 358 | task_set_jobctl_pending(task, JOBCTL_TRAP_STOP | JOBCTL_TRAPPING)) |
320 | signal_wake_up(task, 1); | 359 | signal_wake_up_state(task, __TASK_STOPPED); |
321 | 360 | ||
322 | spin_unlock(&task->sighand->siglock); | 361 | spin_unlock(&task->sighand->siglock); |
323 | 362 | ||
@@ -737,7 +776,7 @@ int ptrace_request(struct task_struct *child, long request, | |||
737 | * tracee into STOP. | 776 | * tracee into STOP. |
738 | */ | 777 | */ |
739 | if (likely(task_set_jobctl_pending(child, JOBCTL_TRAP_STOP))) | 778 | if (likely(task_set_jobctl_pending(child, JOBCTL_TRAP_STOP))) |
740 | signal_wake_up(child, child->jobctl & JOBCTL_LISTENING); | 779 | ptrace_signal_wake_up(child, child->jobctl & JOBCTL_LISTENING); |
741 | 780 | ||
742 | unlock_task_sighand(child, &flags); | 781 | unlock_task_sighand(child, &flags); |
743 | ret = 0; | 782 | ret = 0; |
@@ -763,7 +802,7 @@ int ptrace_request(struct task_struct *child, long request, | |||
763 | * start of this trap and now. Trigger re-trap. | 802 | * start of this trap and now. Trigger re-trap. |
764 | */ | 803 | */ |
765 | if (child->jobctl & JOBCTL_TRAP_NOTIFY) | 804 | if (child->jobctl & JOBCTL_TRAP_NOTIFY) |
766 | signal_wake_up(child, true); | 805 | ptrace_signal_wake_up(child, true); |
767 | ret = 0; | 806 | ret = 0; |
768 | } | 807 | } |
769 | unlock_task_sighand(child, &flags); | 808 | unlock_task_sighand(child, &flags); |
@@ -900,6 +939,8 @@ SYSCALL_DEFINE4(ptrace, long, request, long, pid, unsigned long, addr, | |||
900 | goto out_put_task_struct; | 939 | goto out_put_task_struct; |
901 | 940 | ||
902 | ret = arch_ptrace(child, request, addr, data); | 941 | ret = arch_ptrace(child, request, addr, data); |
942 | if (ret || request != PTRACE_DETACH) | ||
943 | ptrace_unfreeze_traced(child); | ||
903 | 944 | ||
904 | out_put_task_struct: | 945 | out_put_task_struct: |
905 | put_task_struct(child); | 946 | put_task_struct(child); |
@@ -1039,8 +1080,11 @@ asmlinkage long compat_sys_ptrace(compat_long_t request, compat_long_t pid, | |||
1039 | 1080 | ||
1040 | ret = ptrace_check_attach(child, request == PTRACE_KILL || | 1081 | ret = ptrace_check_attach(child, request == PTRACE_KILL || |
1041 | request == PTRACE_INTERRUPT); | 1082 | request == PTRACE_INTERRUPT); |
1042 | if (!ret) | 1083 | if (!ret) { |
1043 | ret = compat_arch_ptrace(child, request, addr, data); | 1084 | ret = compat_arch_ptrace(child, request, addr, data); |
1085 | if (ret || request != PTRACE_DETACH) | ||
1086 | ptrace_unfreeze_traced(child); | ||
1087 | } | ||
1044 | 1088 | ||
1045 | out_put_task_struct: | 1089 | out_put_task_struct: |
1046 | put_task_struct(child); | 1090 | put_task_struct(child); |
diff --git a/kernel/res_counter.c b/kernel/res_counter.c index 3920d593e63c..ff55247e7049 100644 --- a/kernel/res_counter.c +++ b/kernel/res_counter.c | |||
@@ -86,33 +86,39 @@ int res_counter_charge_nofail(struct res_counter *counter, unsigned long val, | |||
86 | return __res_counter_charge(counter, val, limit_fail_at, true); | 86 | return __res_counter_charge(counter, val, limit_fail_at, true); |
87 | } | 87 | } |
88 | 88 | ||
89 | void res_counter_uncharge_locked(struct res_counter *counter, unsigned long val) | 89 | u64 res_counter_uncharge_locked(struct res_counter *counter, unsigned long val) |
90 | { | 90 | { |
91 | if (WARN_ON(counter->usage < val)) | 91 | if (WARN_ON(counter->usage < val)) |
92 | val = counter->usage; | 92 | val = counter->usage; |
93 | 93 | ||
94 | counter->usage -= val; | 94 | counter->usage -= val; |
95 | return counter->usage; | ||
95 | } | 96 | } |
96 | 97 | ||
97 | void res_counter_uncharge_until(struct res_counter *counter, | 98 | u64 res_counter_uncharge_until(struct res_counter *counter, |
98 | struct res_counter *top, | 99 | struct res_counter *top, |
99 | unsigned long val) | 100 | unsigned long val) |
100 | { | 101 | { |
101 | unsigned long flags; | 102 | unsigned long flags; |
102 | struct res_counter *c; | 103 | struct res_counter *c; |
104 | u64 ret = 0; | ||
103 | 105 | ||
104 | local_irq_save(flags); | 106 | local_irq_save(flags); |
105 | for (c = counter; c != top; c = c->parent) { | 107 | for (c = counter; c != top; c = c->parent) { |
108 | u64 r; | ||
106 | spin_lock(&c->lock); | 109 | spin_lock(&c->lock); |
107 | res_counter_uncharge_locked(c, val); | 110 | r = res_counter_uncharge_locked(c, val); |
111 | if (c == counter) | ||
112 | ret = r; | ||
108 | spin_unlock(&c->lock); | 113 | spin_unlock(&c->lock); |
109 | } | 114 | } |
110 | local_irq_restore(flags); | 115 | local_irq_restore(flags); |
116 | return ret; | ||
111 | } | 117 | } |
112 | 118 | ||
113 | void res_counter_uncharge(struct res_counter *counter, unsigned long val) | 119 | u64 res_counter_uncharge(struct res_counter *counter, unsigned long val) |
114 | { | 120 | { |
115 | res_counter_uncharge_until(counter, NULL, val); | 121 | return res_counter_uncharge_until(counter, NULL, val); |
116 | } | 122 | } |
117 | 123 | ||
118 | static inline unsigned long long * | 124 | static inline unsigned long long * |
diff --git a/kernel/rwsem.c b/kernel/rwsem.c index 6850f53e02d8..b3c6c3fcd847 100644 --- a/kernel/rwsem.c +++ b/kernel/rwsem.c | |||
@@ -116,6 +116,16 @@ void down_read_nested(struct rw_semaphore *sem, int subclass) | |||
116 | 116 | ||
117 | EXPORT_SYMBOL(down_read_nested); | 117 | EXPORT_SYMBOL(down_read_nested); |
118 | 118 | ||
119 | void _down_write_nest_lock(struct rw_semaphore *sem, struct lockdep_map *nest) | ||
120 | { | ||
121 | might_sleep(); | ||
122 | rwsem_acquire_nest(&sem->dep_map, 0, 0, nest, _RET_IP_); | ||
123 | |||
124 | LOCK_CONTENDED(sem, __down_write_trylock, __down_write); | ||
125 | } | ||
126 | |||
127 | EXPORT_SYMBOL(_down_write_nest_lock); | ||
128 | |||
119 | void down_write_nested(struct rw_semaphore *sem, int subclass) | 129 | void down_write_nested(struct rw_semaphore *sem, int subclass) |
120 | { | 130 | { |
121 | might_sleep(); | 131 | might_sleep(); |
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index c6737f4fb63b..bfe8ae22f710 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
@@ -1523,7 +1523,8 @@ out: | |||
1523 | */ | 1523 | */ |
1524 | int wake_up_process(struct task_struct *p) | 1524 | int wake_up_process(struct task_struct *p) |
1525 | { | 1525 | { |
1526 | return try_to_wake_up(p, TASK_ALL, 0); | 1526 | WARN_ON(task_is_stopped_or_traced(p)); |
1527 | return try_to_wake_up(p, TASK_NORMAL, 0); | ||
1527 | } | 1528 | } |
1528 | EXPORT_SYMBOL(wake_up_process); | 1529 | EXPORT_SYMBOL(wake_up_process); |
1529 | 1530 | ||
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 4603d6cb9e25..5eea8707234a 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c | |||
@@ -793,8 +793,11 @@ unsigned int sysctl_numa_balancing_scan_delay = 1000; | |||
793 | 793 | ||
794 | static void task_numa_placement(struct task_struct *p) | 794 | static void task_numa_placement(struct task_struct *p) |
795 | { | 795 | { |
796 | int seq = ACCESS_ONCE(p->mm->numa_scan_seq); | 796 | int seq; |
797 | 797 | ||
798 | if (!p->mm) /* for example, ksmd faulting in a user's mm */ | ||
799 | return; | ||
800 | seq = ACCESS_ONCE(p->mm->numa_scan_seq); | ||
798 | if (p->numa_scan_seq == seq) | 801 | if (p->numa_scan_seq == seq) |
799 | return; | 802 | return; |
800 | p->numa_scan_seq = seq; | 803 | p->numa_scan_seq = seq; |
diff --git a/kernel/signal.c b/kernel/signal.c index 580a91e63471..3d09cf6cde75 100644 --- a/kernel/signal.c +++ b/kernel/signal.c | |||
@@ -31,6 +31,7 @@ | |||
31 | #include <linux/nsproxy.h> | 31 | #include <linux/nsproxy.h> |
32 | #include <linux/user_namespace.h> | 32 | #include <linux/user_namespace.h> |
33 | #include <linux/uprobes.h> | 33 | #include <linux/uprobes.h> |
34 | #include <linux/compat.h> | ||
34 | #define CREATE_TRACE_POINTS | 35 | #define CREATE_TRACE_POINTS |
35 | #include <trace/events/signal.h> | 36 | #include <trace/events/signal.h> |
36 | 37 | ||
@@ -679,23 +680,17 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info) | |||
679 | * No need to set need_resched since signal event passing | 680 | * No need to set need_resched since signal event passing |
680 | * goes through ->blocked | 681 | * goes through ->blocked |
681 | */ | 682 | */ |
682 | void signal_wake_up(struct task_struct *t, int resume) | 683 | void signal_wake_up_state(struct task_struct *t, unsigned int state) |
683 | { | 684 | { |
684 | unsigned int mask; | ||
685 | |||
686 | set_tsk_thread_flag(t, TIF_SIGPENDING); | 685 | set_tsk_thread_flag(t, TIF_SIGPENDING); |
687 | |||
688 | /* | 686 | /* |
689 | * For SIGKILL, we want to wake it up in the stopped/traced/killable | 687 | * TASK_WAKEKILL also means wake it up in the stopped/traced/killable |
690 | * case. We don't check t->state here because there is a race with it | 688 | * case. We don't check t->state here because there is a race with it |
691 | * executing another processor and just now entering stopped state. | 689 | * executing another processor and just now entering stopped state. |
692 | * By using wake_up_state, we ensure the process will wake up and | 690 | * By using wake_up_state, we ensure the process will wake up and |
693 | * handle its death signal. | 691 | * handle its death signal. |
694 | */ | 692 | */ |
695 | mask = TASK_INTERRUPTIBLE; | 693 | if (!wake_up_state(t, state | TASK_INTERRUPTIBLE)) |
696 | if (resume) | ||
697 | mask |= TASK_WAKEKILL; | ||
698 | if (!wake_up_state(t, mask)) | ||
699 | kick_process(t); | 694 | kick_process(t); |
700 | } | 695 | } |
701 | 696 | ||
@@ -843,7 +838,7 @@ static void ptrace_trap_notify(struct task_struct *t) | |||
843 | assert_spin_locked(&t->sighand->siglock); | 838 | assert_spin_locked(&t->sighand->siglock); |
844 | 839 | ||
845 | task_set_jobctl_pending(t, JOBCTL_TRAP_NOTIFY); | 840 | task_set_jobctl_pending(t, JOBCTL_TRAP_NOTIFY); |
846 | signal_wake_up(t, t->jobctl & JOBCTL_LISTENING); | 841 | ptrace_signal_wake_up(t, t->jobctl & JOBCTL_LISTENING); |
847 | } | 842 | } |
848 | 843 | ||
849 | /* | 844 | /* |
@@ -1799,6 +1794,10 @@ static inline int may_ptrace_stop(void) | |||
1799 | * If SIGKILL was already sent before the caller unlocked | 1794 | * If SIGKILL was already sent before the caller unlocked |
1800 | * ->siglock we must see ->core_state != NULL. Otherwise it | 1795 | * ->siglock we must see ->core_state != NULL. Otherwise it |
1801 | * is safe to enter schedule(). | 1796 | * is safe to enter schedule(). |
1797 | * | ||
1798 | * This is almost outdated, a task with the pending SIGKILL can't | ||
1799 | * block in TASK_TRACED. But PTRACE_EVENT_EXIT can be reported | ||
1800 | * after SIGKILL was already dequeued. | ||
1802 | */ | 1801 | */ |
1803 | if (unlikely(current->mm->core_state) && | 1802 | if (unlikely(current->mm->core_state) && |
1804 | unlikely(current->mm == current->parent->mm)) | 1803 | unlikely(current->mm == current->parent->mm)) |
@@ -1924,6 +1923,7 @@ static void ptrace_stop(int exit_code, int why, int clear_code, siginfo_t *info) | |||
1924 | if (gstop_done) | 1923 | if (gstop_done) |
1925 | do_notify_parent_cldstop(current, false, why); | 1924 | do_notify_parent_cldstop(current, false, why); |
1926 | 1925 | ||
1926 | /* tasklist protects us from ptrace_freeze_traced() */ | ||
1927 | __set_current_state(TASK_RUNNING); | 1927 | __set_current_state(TASK_RUNNING); |
1928 | if (clear_code) | 1928 | if (clear_code) |
1929 | current->exit_code = 0; | 1929 | current->exit_code = 0; |
@@ -2527,11 +2527,8 @@ static void __set_task_blocked(struct task_struct *tsk, const sigset_t *newset) | |||
2527 | */ | 2527 | */ |
2528 | void set_current_blocked(sigset_t *newset) | 2528 | void set_current_blocked(sigset_t *newset) |
2529 | { | 2529 | { |
2530 | struct task_struct *tsk = current; | ||
2531 | sigdelsetmask(newset, sigmask(SIGKILL) | sigmask(SIGSTOP)); | 2530 | sigdelsetmask(newset, sigmask(SIGKILL) | sigmask(SIGSTOP)); |
2532 | spin_lock_irq(&tsk->sighand->siglock); | 2531 | __set_current_blocked(newset); |
2533 | __set_task_blocked(tsk, newset); | ||
2534 | spin_unlock_irq(&tsk->sighand->siglock); | ||
2535 | } | 2532 | } |
2536 | 2533 | ||
2537 | void __set_current_blocked(const sigset_t *newset) | 2534 | void __set_current_blocked(const sigset_t *newset) |
@@ -3094,6 +3091,80 @@ do_sigaltstack (const stack_t __user *uss, stack_t __user *uoss, unsigned long s | |||
3094 | out: | 3091 | out: |
3095 | return error; | 3092 | return error; |
3096 | } | 3093 | } |
3094 | #ifdef CONFIG_GENERIC_SIGALTSTACK | ||
3095 | SYSCALL_DEFINE2(sigaltstack,const stack_t __user *,uss, stack_t __user *,uoss) | ||
3096 | { | ||
3097 | return do_sigaltstack(uss, uoss, current_user_stack_pointer()); | ||
3098 | } | ||
3099 | #endif | ||
3100 | |||
3101 | int restore_altstack(const stack_t __user *uss) | ||
3102 | { | ||
3103 | int err = do_sigaltstack(uss, NULL, current_user_stack_pointer()); | ||
3104 | /* squash all but EFAULT for now */ | ||
3105 | return err == -EFAULT ? err : 0; | ||
3106 | } | ||
3107 | |||
3108 | int __save_altstack(stack_t __user *uss, unsigned long sp) | ||
3109 | { | ||
3110 | struct task_struct *t = current; | ||
3111 | return __put_user((void __user *)t->sas_ss_sp, &uss->ss_sp) | | ||
3112 | __put_user(sas_ss_flags(sp), &uss->ss_flags) | | ||
3113 | __put_user(t->sas_ss_size, &uss->ss_size); | ||
3114 | } | ||
3115 | |||
3116 | #ifdef CONFIG_COMPAT | ||
3117 | #ifdef CONFIG_GENERIC_SIGALTSTACK | ||
3118 | COMPAT_SYSCALL_DEFINE2(sigaltstack, | ||
3119 | const compat_stack_t __user *, uss_ptr, | ||
3120 | compat_stack_t __user *, uoss_ptr) | ||
3121 | { | ||
3122 | stack_t uss, uoss; | ||
3123 | int ret; | ||
3124 | mm_segment_t seg; | ||
3125 | |||
3126 | if (uss_ptr) { | ||
3127 | compat_stack_t uss32; | ||
3128 | |||
3129 | memset(&uss, 0, sizeof(stack_t)); | ||
3130 | if (copy_from_user(&uss32, uss_ptr, sizeof(compat_stack_t))) | ||
3131 | return -EFAULT; | ||
3132 | uss.ss_sp = compat_ptr(uss32.ss_sp); | ||
3133 | uss.ss_flags = uss32.ss_flags; | ||
3134 | uss.ss_size = uss32.ss_size; | ||
3135 | } | ||
3136 | seg = get_fs(); | ||
3137 | set_fs(KERNEL_DS); | ||
3138 | ret = do_sigaltstack((stack_t __force __user *) (uss_ptr ? &uss : NULL), | ||
3139 | (stack_t __force __user *) &uoss, | ||
3140 | compat_user_stack_pointer()); | ||
3141 | set_fs(seg); | ||
3142 | if (ret >= 0 && uoss_ptr) { | ||
3143 | if (!access_ok(VERIFY_WRITE, uoss_ptr, sizeof(compat_stack_t)) || | ||
3144 | __put_user(ptr_to_compat(uoss.ss_sp), &uoss_ptr->ss_sp) || | ||
3145 | __put_user(uoss.ss_flags, &uoss_ptr->ss_flags) || | ||
3146 | __put_user(uoss.ss_size, &uoss_ptr->ss_size)) | ||
3147 | ret = -EFAULT; | ||
3148 | } | ||
3149 | return ret; | ||
3150 | } | ||
3151 | |||
3152 | int compat_restore_altstack(const compat_stack_t __user *uss) | ||
3153 | { | ||
3154 | int err = compat_sys_sigaltstack(uss, NULL); | ||
3155 | /* squash all but -EFAULT for now */ | ||
3156 | return err == -EFAULT ? err : 0; | ||
3157 | } | ||
3158 | |||
3159 | int __compat_save_altstack(compat_stack_t __user *uss, unsigned long sp) | ||
3160 | { | ||
3161 | struct task_struct *t = current; | ||
3162 | return __put_user(ptr_to_compat((void __user *)t->sas_ss_sp), &uss->ss_sp) | | ||
3163 | __put_user(sas_ss_flags(sp), &uss->ss_flags) | | ||
3164 | __put_user(t->sas_ss_size, &uss->ss_size); | ||
3165 | } | ||
3166 | #endif | ||
3167 | #endif | ||
3097 | 3168 | ||
3098 | #ifdef __ARCH_WANT_SYS_SIGPENDING | 3169 | #ifdef __ARCH_WANT_SYS_SIGPENDING |
3099 | 3170 | ||
@@ -3130,7 +3201,6 @@ SYSCALL_DEFINE3(sigprocmask, int, how, old_sigset_t __user *, nset, | |||
3130 | if (nset) { | 3201 | if (nset) { |
3131 | if (copy_from_user(&new_set, nset, sizeof(*nset))) | 3202 | if (copy_from_user(&new_set, nset, sizeof(*nset))) |
3132 | return -EFAULT; | 3203 | return -EFAULT; |
3133 | new_set &= ~(sigmask(SIGKILL) | sigmask(SIGSTOP)); | ||
3134 | 3204 | ||
3135 | new_blocked = current->blocked; | 3205 | new_blocked = current->blocked; |
3136 | 3206 | ||
@@ -3148,7 +3218,7 @@ SYSCALL_DEFINE3(sigprocmask, int, how, old_sigset_t __user *, nset, | |||
3148 | return -EINVAL; | 3218 | return -EINVAL; |
3149 | } | 3219 | } |
3150 | 3220 | ||
3151 | __set_current_blocked(&new_blocked); | 3221 | set_current_blocked(&new_blocked); |
3152 | } | 3222 | } |
3153 | 3223 | ||
3154 | if (oset) { | 3224 | if (oset) { |
@@ -3212,6 +3282,7 @@ SYSCALL_DEFINE1(ssetmask, int, newmask) | |||
3212 | int old = current->blocked.sig[0]; | 3282 | int old = current->blocked.sig[0]; |
3213 | sigset_t newset; | 3283 | sigset_t newset; |
3214 | 3284 | ||
3285 | siginitset(&newset, newmask); | ||
3215 | set_current_blocked(&newset); | 3286 | set_current_blocked(&newset); |
3216 | 3287 | ||
3217 | return old; | 3288 | return old; |
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index dbff751e4086..395084d4ce16 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c | |||
@@ -25,6 +25,7 @@ cond_syscall(sys_swapoff); | |||
25 | cond_syscall(sys_kexec_load); | 25 | cond_syscall(sys_kexec_load); |
26 | cond_syscall(compat_sys_kexec_load); | 26 | cond_syscall(compat_sys_kexec_load); |
27 | cond_syscall(sys_init_module); | 27 | cond_syscall(sys_init_module); |
28 | cond_syscall(sys_finit_module); | ||
28 | cond_syscall(sys_delete_module); | 29 | cond_syscall(sys_delete_module); |
29 | cond_syscall(sys_socketpair); | 30 | cond_syscall(sys_socketpair); |
30 | cond_syscall(sys_bind); | 31 | cond_syscall(sys_bind); |
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 3ffe4c5ad3f3..41473b4ad7a4 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c | |||
@@ -3998,7 +3998,7 @@ static int ftrace_module_notify(struct notifier_block *self, | |||
3998 | 3998 | ||
3999 | struct notifier_block ftrace_module_nb = { | 3999 | struct notifier_block ftrace_module_nb = { |
4000 | .notifier_call = ftrace_module_notify, | 4000 | .notifier_call = ftrace_module_notify, |
4001 | .priority = 0, | 4001 | .priority = INT_MAX, /* Run before anything that can use kprobes */ |
4002 | }; | 4002 | }; |
4003 | 4003 | ||
4004 | extern unsigned long __start_mcount_loc[]; | 4004 | extern unsigned long __start_mcount_loc[]; |
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 61e081b4ba11..3c13e46d7d24 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c | |||
@@ -2899,6 +2899,8 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf, | |||
2899 | if (copy_from_user(&buf, ubuf, cnt)) | 2899 | if (copy_from_user(&buf, ubuf, cnt)) |
2900 | return -EFAULT; | 2900 | return -EFAULT; |
2901 | 2901 | ||
2902 | buf[cnt] = 0; | ||
2903 | |||
2902 | trace_set_options(buf); | 2904 | trace_set_options(buf); |
2903 | 2905 | ||
2904 | *ppos += cnt; | 2906 | *ppos += cnt; |
@@ -3034,6 +3036,31 @@ static void set_buffer_entries(struct trace_array *tr, unsigned long val) | |||
3034 | tr->data[cpu]->entries = val; | 3036 | tr->data[cpu]->entries = val; |
3035 | } | 3037 | } |
3036 | 3038 | ||
3039 | /* resize @tr's buffer to the size of @size_tr's entries */ | ||
3040 | static int resize_buffer_duplicate_size(struct trace_array *tr, | ||
3041 | struct trace_array *size_tr, int cpu_id) | ||
3042 | { | ||
3043 | int cpu, ret = 0; | ||
3044 | |||
3045 | if (cpu_id == RING_BUFFER_ALL_CPUS) { | ||
3046 | for_each_tracing_cpu(cpu) { | ||
3047 | ret = ring_buffer_resize(tr->buffer, | ||
3048 | size_tr->data[cpu]->entries, cpu); | ||
3049 | if (ret < 0) | ||
3050 | break; | ||
3051 | tr->data[cpu]->entries = size_tr->data[cpu]->entries; | ||
3052 | } | ||
3053 | } else { | ||
3054 | ret = ring_buffer_resize(tr->buffer, | ||
3055 | size_tr->data[cpu_id]->entries, cpu_id); | ||
3056 | if (ret == 0) | ||
3057 | tr->data[cpu_id]->entries = | ||
3058 | size_tr->data[cpu_id]->entries; | ||
3059 | } | ||
3060 | |||
3061 | return ret; | ||
3062 | } | ||
3063 | |||
3037 | static int __tracing_resize_ring_buffer(unsigned long size, int cpu) | 3064 | static int __tracing_resize_ring_buffer(unsigned long size, int cpu) |
3038 | { | 3065 | { |
3039 | int ret; | 3066 | int ret; |
@@ -3058,23 +3085,8 @@ static int __tracing_resize_ring_buffer(unsigned long size, int cpu) | |||
3058 | 3085 | ||
3059 | ret = ring_buffer_resize(max_tr.buffer, size, cpu); | 3086 | ret = ring_buffer_resize(max_tr.buffer, size, cpu); |
3060 | if (ret < 0) { | 3087 | if (ret < 0) { |
3061 | int r = 0; | 3088 | int r = resize_buffer_duplicate_size(&global_trace, |
3062 | 3089 | &global_trace, cpu); | |
3063 | if (cpu == RING_BUFFER_ALL_CPUS) { | ||
3064 | int i; | ||
3065 | for_each_tracing_cpu(i) { | ||
3066 | r = ring_buffer_resize(global_trace.buffer, | ||
3067 | global_trace.data[i]->entries, | ||
3068 | i); | ||
3069 | if (r < 0) | ||
3070 | break; | ||
3071 | } | ||
3072 | } else { | ||
3073 | r = ring_buffer_resize(global_trace.buffer, | ||
3074 | global_trace.data[cpu]->entries, | ||
3075 | cpu); | ||
3076 | } | ||
3077 | |||
3078 | if (r < 0) { | 3090 | if (r < 0) { |
3079 | /* | 3091 | /* |
3080 | * AARGH! We are left with different | 3092 | * AARGH! We are left with different |
@@ -3212,17 +3224,11 @@ static int tracing_set_tracer(const char *buf) | |||
3212 | 3224 | ||
3213 | topts = create_trace_option_files(t); | 3225 | topts = create_trace_option_files(t); |
3214 | if (t->use_max_tr) { | 3226 | if (t->use_max_tr) { |
3215 | int cpu; | ||
3216 | /* we need to make per cpu buffer sizes equivalent */ | 3227 | /* we need to make per cpu buffer sizes equivalent */ |
3217 | for_each_tracing_cpu(cpu) { | 3228 | ret = resize_buffer_duplicate_size(&max_tr, &global_trace, |
3218 | ret = ring_buffer_resize(max_tr.buffer, | 3229 | RING_BUFFER_ALL_CPUS); |
3219 | global_trace.data[cpu]->entries, | 3230 | if (ret < 0) |
3220 | cpu); | 3231 | goto out; |
3221 | if (ret < 0) | ||
3222 | goto out; | ||
3223 | max_tr.data[cpu]->entries = | ||
3224 | global_trace.data[cpu]->entries; | ||
3225 | } | ||
3226 | } | 3232 | } |
3227 | 3233 | ||
3228 | if (t->init) { | 3234 | if (t->init) { |
@@ -3448,7 +3454,7 @@ static int tracing_wait_pipe(struct file *filp) | |||
3448 | return -EINTR; | 3454 | return -EINTR; |
3449 | 3455 | ||
3450 | /* | 3456 | /* |
3451 | * We block until we read something and tracing is enabled. | 3457 | * We block until we read something and tracing is disabled. |
3452 | * We still block if tracing is disabled, but we have never | 3458 | * We still block if tracing is disabled, but we have never |
3453 | * read anything. This allows a user to cat this file, and | 3459 | * read anything. This allows a user to cat this file, and |
3454 | * then enable tracing. But after we have read something, | 3460 | * then enable tracing. But after we have read something, |
@@ -3456,7 +3462,7 @@ static int tracing_wait_pipe(struct file *filp) | |||
3456 | * | 3462 | * |
3457 | * iter->pos will be 0 if we haven't read anything. | 3463 | * iter->pos will be 0 if we haven't read anything. |
3458 | */ | 3464 | */ |
3459 | if (tracing_is_enabled() && iter->pos) | 3465 | if (!tracing_is_enabled() && iter->pos) |
3460 | break; | 3466 | break; |
3461 | } | 3467 | } |
3462 | 3468 | ||
@@ -4271,13 +4277,11 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, | |||
4271 | return -ENOMEM; | 4277 | return -ENOMEM; |
4272 | 4278 | ||
4273 | if (*ppos & (PAGE_SIZE - 1)) { | 4279 | if (*ppos & (PAGE_SIZE - 1)) { |
4274 | WARN_ONCE(1, "Ftrace: previous read must page-align\n"); | ||
4275 | ret = -EINVAL; | 4280 | ret = -EINVAL; |
4276 | goto out; | 4281 | goto out; |
4277 | } | 4282 | } |
4278 | 4283 | ||
4279 | if (len & (PAGE_SIZE - 1)) { | 4284 | if (len & (PAGE_SIZE - 1)) { |
4280 | WARN_ONCE(1, "Ftrace: splice_read should page-align\n"); | ||
4281 | if (len < PAGE_SIZE) { | 4285 | if (len < PAGE_SIZE) { |
4282 | ret = -EINVAL; | 4286 | ret = -EINVAL; |
4283 | goto out; | 4287 | goto out; |
@@ -4813,10 +4817,17 @@ rb_simple_write(struct file *filp, const char __user *ubuf, | |||
4813 | return ret; | 4817 | return ret; |
4814 | 4818 | ||
4815 | if (buffer) { | 4819 | if (buffer) { |
4816 | if (val) | 4820 | mutex_lock(&trace_types_lock); |
4821 | if (val) { | ||
4817 | ring_buffer_record_on(buffer); | 4822 | ring_buffer_record_on(buffer); |
4818 | else | 4823 | if (current_trace->start) |
4824 | current_trace->start(tr); | ||
4825 | } else { | ||
4819 | ring_buffer_record_off(buffer); | 4826 | ring_buffer_record_off(buffer); |
4827 | if (current_trace->stop) | ||
4828 | current_trace->stop(tr); | ||
4829 | } | ||
4830 | mutex_unlock(&trace_types_lock); | ||
4820 | } | 4831 | } |
4821 | 4832 | ||
4822 | (*ppos)++; | 4833 | (*ppos)++; |
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index 0c1b165778e5..42ca822fc701 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c | |||
@@ -33,7 +33,6 @@ static unsigned long max_stack_size; | |||
33 | static arch_spinlock_t max_stack_lock = | 33 | static arch_spinlock_t max_stack_lock = |
34 | (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; | 34 | (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; |
35 | 35 | ||
36 | static int stack_trace_disabled __read_mostly; | ||
37 | static DEFINE_PER_CPU(int, trace_active); | 36 | static DEFINE_PER_CPU(int, trace_active); |
38 | static DEFINE_MUTEX(stack_sysctl_mutex); | 37 | static DEFINE_MUTEX(stack_sysctl_mutex); |
39 | 38 | ||
@@ -116,9 +115,6 @@ stack_trace_call(unsigned long ip, unsigned long parent_ip, | |||
116 | { | 115 | { |
117 | int cpu; | 116 | int cpu; |
118 | 117 | ||
119 | if (unlikely(!ftrace_enabled || stack_trace_disabled)) | ||
120 | return; | ||
121 | |||
122 | preempt_disable_notrace(); | 118 | preempt_disable_notrace(); |
123 | 119 | ||
124 | cpu = raw_smp_processor_id(); | 120 | cpu = raw_smp_processor_id(); |
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index f5975ccf9348..2b042c42fbc4 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c | |||
@@ -799,7 +799,7 @@ static int userns_install(struct nsproxy *nsproxy, void *ns) | |||
799 | if (user_ns == current_user_ns()) | 799 | if (user_ns == current_user_ns()) |
800 | return -EINVAL; | 800 | return -EINVAL; |
801 | 801 | ||
802 | /* Threaded many not enter a different user namespace */ | 802 | /* Threaded processes may not enter a different user namespace */ |
803 | if (atomic_read(¤t->mm->mm_users) > 1) | 803 | if (atomic_read(¤t->mm->mm_users) > 1) |
804 | return -EINVAL; | 804 | return -EINVAL; |
805 | 805 | ||
diff --git a/kernel/utsname.c b/kernel/utsname.c index f6336d51d64c..08b197e8c485 100644 --- a/kernel/utsname.c +++ b/kernel/utsname.c | |||
@@ -113,7 +113,8 @@ static int utsns_install(struct nsproxy *nsproxy, void *new) | |||
113 | { | 113 | { |
114 | struct uts_namespace *ns = new; | 114 | struct uts_namespace *ns = new; |
115 | 115 | ||
116 | if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN)) | 116 | if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN) || |
117 | !nsown_capable(CAP_SYS_ADMIN)) | ||
117 | return -EPERM; | 118 | return -EPERM; |
118 | 119 | ||
119 | get_uts_ns(ns); | 120 | get_uts_ns(ns); |
diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 997c6a16ec22..75a2ab3d0b02 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c | |||
@@ -344,6 +344,10 @@ static void watchdog_enable(unsigned int cpu) | |||
344 | { | 344 | { |
345 | struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer); | 345 | struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer); |
346 | 346 | ||
347 | /* kick off the timer for the hardlockup detector */ | ||
348 | hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | ||
349 | hrtimer->function = watchdog_timer_fn; | ||
350 | |||
347 | if (!watchdog_enabled) { | 351 | if (!watchdog_enabled) { |
348 | kthread_park(current); | 352 | kthread_park(current); |
349 | return; | 353 | return; |
@@ -352,10 +356,6 @@ static void watchdog_enable(unsigned int cpu) | |||
352 | /* Enable the perf event */ | 356 | /* Enable the perf event */ |
353 | watchdog_nmi_enable(cpu); | 357 | watchdog_nmi_enable(cpu); |
354 | 358 | ||
355 | /* kick off the timer for the hardlockup detector */ | ||
356 | hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | ||
357 | hrtimer->function = watchdog_timer_fn; | ||
358 | |||
359 | /* done here because hrtimer_start can only pin to smp_processor_id() */ | 359 | /* done here because hrtimer_start can only pin to smp_processor_id() */ |
360 | hrtimer_start(hrtimer, ns_to_ktime(sample_period), | 360 | hrtimer_start(hrtimer, ns_to_ktime(sample_period), |
361 | HRTIMER_MODE_REL_PINNED); | 361 | HRTIMER_MODE_REL_PINNED); |
@@ -369,9 +369,6 @@ static void watchdog_disable(unsigned int cpu) | |||
369 | { | 369 | { |
370 | struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer); | 370 | struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer); |
371 | 371 | ||
372 | if (!watchdog_enabled) | ||
373 | return; | ||
374 | |||
375 | watchdog_set_prio(SCHED_NORMAL, 0); | 372 | watchdog_set_prio(SCHED_NORMAL, 0); |
376 | hrtimer_cancel(hrtimer); | 373 | hrtimer_cancel(hrtimer); |
377 | /* disable the perf event */ | 374 | /* disable the perf event */ |