aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-08-08 18:57:47 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-08-08 18:57:47 -0400
commit8065be8d032f38da25b54bf077a05a30d9ce9f2a (patch)
tree32a7baf4b40e0240ab4b9dd6f2bbe6129929bb66 /kernel
parent27d438c56009e5ae632de36fe70985d1aab5e344 (diff)
parentecc265fe9e09e32a3573b2ba26e79b2099eb8bbb (diff)
Merge branch 'akpm' (second patchbomb from Andrew Morton)
Merge more incoming from Andrew Morton: "Two new syscalls: memfd_create in "shm: add memfd_create() syscall" kexec_file_load in "kexec: implementation of new syscall kexec_file_load" And: - Most (all?) of the rest of MM - Lots of the usual misc bits - fs/autofs4 - drivers/rtc - fs/nilfs - procfs - fork.c, exec.c - more in lib/ - rapidio - Janitorial work in filesystems: fs/ufs, fs/reiserfs, fs/adfs, fs/cramfs, fs/romfs, fs/qnx6. - initrd/initramfs work - "file sealing" and the memfd_create() syscall, in tmpfs - add pci_zalloc_consistent, use it in lots of places - MAINTAINERS maintenance - kexec feature work" * emailed patches from Andrew Morton <akpm@linux-foundation.org: (193 commits) MAINTAINERS: update nomadik patterns MAINTAINERS: update usb/gadget patterns MAINTAINERS: update DMA BUFFER SHARING patterns kexec: verify the signature of signed PE bzImage kexec: support kexec/kdump on EFI systems kexec: support for kexec on panic using new system call kexec-bzImage64: support for loading bzImage using 64bit entry kexec: load and relocate purgatory at kernel load time purgatory: core purgatory functionality purgatory/sha256: provide implementation of sha256 in purgaotory context kexec: implementation of new syscall kexec_file_load kexec: new syscall kexec_file_load() declaration kexec: make kexec_segment user buffer pointer a union resource: provide new functions to walk through resources kexec: use common function for kimage_normal_alloc() and kimage_crash_alloc() kexec: move segment verification code in a separate function kexec: rename unusebale_pages to unusable_pages kernel: build bin2c based on config option CONFIG_BUILD_BIN2C bin2c: move bin2c in scripts/basic shm: wait for pins to be released when sealing ...
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile2
-rw-r--r--kernel/acct.c30
-rw-r--r--kernel/bounds.c2
-rw-r--r--kernel/events/uprobes.c15
-rw-r--r--kernel/exit.c49
-rw-r--r--kernel/fork.c79
-rw-r--r--kernel/gcov/fs.c3
-rw-r--r--kernel/kallsyms.c2
-rw-r--r--kernel/kexec.c1291
-rw-r--r--kernel/panic.c1
-rw-r--r--kernel/resource.c101
-rw-r--r--kernel/sys_ni.c2
-rw-r--r--kernel/test_kprobes.c87
-rw-r--r--kernel/user_namespace.c6
-rw-r--r--kernel/watchdog.c1
15 files changed, 1395 insertions, 276 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index 0026cf531769..dc5c77544fd6 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -105,7 +105,7 @@ targets += config_data.gz
105$(obj)/config_data.gz: $(KCONFIG_CONFIG) FORCE 105$(obj)/config_data.gz: $(KCONFIG_CONFIG) FORCE
106 $(call if_changed,gzip) 106 $(call if_changed,gzip)
107 107
108 filechk_ikconfiggz = (echo "static const char kernel_config_data[] __used = MAGIC_START"; cat $< | scripts/bin2c; echo "MAGIC_END;") 108 filechk_ikconfiggz = (echo "static const char kernel_config_data[] __used = MAGIC_START"; cat $< | scripts/basic/bin2c; echo "MAGIC_END;")
109targets += config_data.h 109targets += config_data.h
110$(obj)/config_data.h: $(obj)/config_data.gz FORCE 110$(obj)/config_data.h: $(obj)/config_data.gz FORCE
111 $(call filechk,ikconfiggz) 111 $(call filechk,ikconfiggz)
diff --git a/kernel/acct.c b/kernel/acct.c
index a1844f14c6d6..51793520566f 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -141,12 +141,12 @@ static int check_free_space(struct bsd_acct_struct *acct, struct file *file)
141 if (acct->active) { 141 if (acct->active) {
142 if (act < 0) { 142 if (act < 0) {
143 acct->active = 0; 143 acct->active = 0;
144 printk(KERN_INFO "Process accounting paused\n"); 144 pr_info("Process accounting paused\n");
145 } 145 }
146 } else { 146 } else {
147 if (act > 0) { 147 if (act > 0) {
148 acct->active = 1; 148 acct->active = 1;
149 printk(KERN_INFO "Process accounting resumed\n"); 149 pr_info("Process accounting resumed\n");
150 } 150 }
151 } 151 }
152 152
@@ -261,6 +261,7 @@ SYSCALL_DEFINE1(acct, const char __user *, name)
261 261
262 if (name) { 262 if (name) {
263 struct filename *tmp = getname(name); 263 struct filename *tmp = getname(name);
264
264 if (IS_ERR(tmp)) 265 if (IS_ERR(tmp))
265 return PTR_ERR(tmp); 266 return PTR_ERR(tmp);
266 error = acct_on(tmp); 267 error = acct_on(tmp);
@@ -376,7 +377,7 @@ static comp_t encode_comp_t(unsigned long value)
376 return exp; 377 return exp;
377} 378}
378 379
379#if ACCT_VERSION==1 || ACCT_VERSION==2 380#if ACCT_VERSION == 1 || ACCT_VERSION == 2
380/* 381/*
381 * encode an u64 into a comp2_t (24 bits) 382 * encode an u64 into a comp2_t (24 bits)
382 * 383 *
@@ -389,7 +390,7 @@ static comp_t encode_comp_t(unsigned long value)
389#define MANTSIZE2 20 /* 20 bit mantissa. */ 390#define MANTSIZE2 20 /* 20 bit mantissa. */
390#define EXPSIZE2 5 /* 5 bit base 2 exponent. */ 391#define EXPSIZE2 5 /* 5 bit base 2 exponent. */
391#define MAXFRACT2 ((1ul << MANTSIZE2) - 1) /* Maximum fractional value. */ 392#define MAXFRACT2 ((1ul << MANTSIZE2) - 1) /* Maximum fractional value. */
392#define MAXEXP2 ((1 <<EXPSIZE2) - 1) /* Maximum exponent. */ 393#define MAXEXP2 ((1 << EXPSIZE2) - 1) /* Maximum exponent. */
393 394
394static comp2_t encode_comp2_t(u64 value) 395static comp2_t encode_comp2_t(u64 value)
395{ 396{
@@ -420,7 +421,7 @@ static comp2_t encode_comp2_t(u64 value)
420} 421}
421#endif 422#endif
422 423
423#if ACCT_VERSION==3 424#if ACCT_VERSION == 3
424/* 425/*
425 * encode an u64 into a 32 bit IEEE float 426 * encode an u64 into a 32 bit IEEE float
426 */ 427 */
@@ -429,8 +430,9 @@ static u32 encode_float(u64 value)
429 unsigned exp = 190; 430 unsigned exp = 190;
430 unsigned u; 431 unsigned u;
431 432
432 if (value==0) return 0; 433 if (value == 0)
433 while ((s64)value > 0){ 434 return 0;
435 while ((s64)value > 0) {
434 value <<= 1; 436 value <<= 1;
435 exp--; 437 exp--;
436 } 438 }
@@ -486,16 +488,17 @@ static void do_acct_process(struct bsd_acct_struct *acct,
486 run_time -= current->group_leader->start_time; 488 run_time -= current->group_leader->start_time;
487 /* convert nsec -> AHZ */ 489 /* convert nsec -> AHZ */
488 elapsed = nsec_to_AHZ(run_time); 490 elapsed = nsec_to_AHZ(run_time);
489#if ACCT_VERSION==3 491#if ACCT_VERSION == 3
490 ac.ac_etime = encode_float(elapsed); 492 ac.ac_etime = encode_float(elapsed);
491#else 493#else
492 ac.ac_etime = encode_comp_t(elapsed < (unsigned long) -1l ? 494 ac.ac_etime = encode_comp_t(elapsed < (unsigned long) -1l ?
493 (unsigned long) elapsed : (unsigned long) -1l); 495 (unsigned long) elapsed : (unsigned long) -1l);
494#endif 496#endif
495#if ACCT_VERSION==1 || ACCT_VERSION==2 497#if ACCT_VERSION == 1 || ACCT_VERSION == 2
496 { 498 {
497 /* new enlarged etime field */ 499 /* new enlarged etime field */
498 comp2_t etime = encode_comp2_t(elapsed); 500 comp2_t etime = encode_comp2_t(elapsed);
501
499 ac.ac_etime_hi = etime >> 16; 502 ac.ac_etime_hi = etime >> 16;
500 ac.ac_etime_lo = (u16) etime; 503 ac.ac_etime_lo = (u16) etime;
501 } 504 }
@@ -505,15 +508,15 @@ static void do_acct_process(struct bsd_acct_struct *acct,
505 /* we really need to bite the bullet and change layout */ 508 /* we really need to bite the bullet and change layout */
506 ac.ac_uid = from_kuid_munged(file->f_cred->user_ns, orig_cred->uid); 509 ac.ac_uid = from_kuid_munged(file->f_cred->user_ns, orig_cred->uid);
507 ac.ac_gid = from_kgid_munged(file->f_cred->user_ns, orig_cred->gid); 510 ac.ac_gid = from_kgid_munged(file->f_cred->user_ns, orig_cred->gid);
508#if ACCT_VERSION==2 511#if ACCT_VERSION == 2
509 ac.ac_ahz = AHZ; 512 ac.ac_ahz = AHZ;
510#endif 513#endif
511#if ACCT_VERSION==1 || ACCT_VERSION==2 514#if ACCT_VERSION == 1 || ACCT_VERSION == 2
512 /* backward-compatible 16 bit fields */ 515 /* backward-compatible 16 bit fields */
513 ac.ac_uid16 = ac.ac_uid; 516 ac.ac_uid16 = ac.ac_uid;
514 ac.ac_gid16 = ac.ac_gid; 517 ac.ac_gid16 = ac.ac_gid;
515#endif 518#endif
516#if ACCT_VERSION==3 519#if ACCT_VERSION == 3
517 ac.ac_pid = task_tgid_nr_ns(current, ns); 520 ac.ac_pid = task_tgid_nr_ns(current, ns);
518 rcu_read_lock(); 521 rcu_read_lock();
519 ac.ac_ppid = task_tgid_nr_ns(rcu_dereference(current->real_parent), ns); 522 ac.ac_ppid = task_tgid_nr_ns(rcu_dereference(current->real_parent), ns);
@@ -574,6 +577,7 @@ void acct_collect(long exitcode, int group_dead)
574 577
575 if (group_dead && current->mm) { 578 if (group_dead && current->mm) {
576 struct vm_area_struct *vma; 579 struct vm_area_struct *vma;
580
577 down_read(&current->mm->mmap_sem); 581 down_read(&current->mm->mmap_sem);
578 vma = current->mm->mmap; 582 vma = current->mm->mmap;
579 while (vma) { 583 while (vma) {
diff --git a/kernel/bounds.c b/kernel/bounds.c
index 9fd4246b04b8..e1d1d1952bfa 100644
--- a/kernel/bounds.c
+++ b/kernel/bounds.c
@@ -9,7 +9,6 @@
9#include <linux/page-flags.h> 9#include <linux/page-flags.h>
10#include <linux/mmzone.h> 10#include <linux/mmzone.h>
11#include <linux/kbuild.h> 11#include <linux/kbuild.h>
12#include <linux/page_cgroup.h>
13#include <linux/log2.h> 12#include <linux/log2.h>
14#include <linux/spinlock_types.h> 13#include <linux/spinlock_types.h>
15 14
@@ -18,7 +17,6 @@ void foo(void)
18 /* The enum constants to put into include/generated/bounds.h */ 17 /* The enum constants to put into include/generated/bounds.h */
19 DEFINE(NR_PAGEFLAGS, __NR_PAGEFLAGS); 18 DEFINE(NR_PAGEFLAGS, __NR_PAGEFLAGS);
20 DEFINE(MAX_NR_ZONES, __MAX_NR_ZONES); 19 DEFINE(MAX_NR_ZONES, __MAX_NR_ZONES);
21 DEFINE(NR_PCG_FLAGS, __NR_PCG_FLAGS);
22#ifdef CONFIG_SMP 20#ifdef CONFIG_SMP
23 DEFINE(NR_CPUS_BITS, ilog2(CONFIG_NR_CPUS)); 21 DEFINE(NR_CPUS_BITS, ilog2(CONFIG_NR_CPUS));
24#endif 22#endif
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 6f3254e8c137..1d0af8a2c646 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -167,6 +167,11 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
167 /* For mmu_notifiers */ 167 /* For mmu_notifiers */
168 const unsigned long mmun_start = addr; 168 const unsigned long mmun_start = addr;
169 const unsigned long mmun_end = addr + PAGE_SIZE; 169 const unsigned long mmun_end = addr + PAGE_SIZE;
170 struct mem_cgroup *memcg;
171
172 err = mem_cgroup_try_charge(kpage, vma->vm_mm, GFP_KERNEL, &memcg);
173 if (err)
174 return err;
170 175
171 /* For try_to_free_swap() and munlock_vma_page() below */ 176 /* For try_to_free_swap() and munlock_vma_page() below */
172 lock_page(page); 177 lock_page(page);
@@ -179,6 +184,8 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
179 184
180 get_page(kpage); 185 get_page(kpage);
181 page_add_new_anon_rmap(kpage, vma, addr); 186 page_add_new_anon_rmap(kpage, vma, addr);
187 mem_cgroup_commit_charge(kpage, memcg, false);
188 lru_cache_add_active_or_unevictable(kpage, vma);
182 189
183 if (!PageAnon(page)) { 190 if (!PageAnon(page)) {
184 dec_mm_counter(mm, MM_FILEPAGES); 191 dec_mm_counter(mm, MM_FILEPAGES);
@@ -200,6 +207,7 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
200 207
201 err = 0; 208 err = 0;
202 unlock: 209 unlock:
210 mem_cgroup_cancel_charge(kpage, memcg);
203 mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); 211 mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
204 unlock_page(page); 212 unlock_page(page);
205 return err; 213 return err;
@@ -315,18 +323,11 @@ retry:
315 if (!new_page) 323 if (!new_page)
316 goto put_old; 324 goto put_old;
317 325
318 if (mem_cgroup_charge_anon(new_page, mm, GFP_KERNEL))
319 goto put_new;
320
321 __SetPageUptodate(new_page); 326 __SetPageUptodate(new_page);
322 copy_highpage(new_page, old_page); 327 copy_highpage(new_page, old_page);
323 copy_to_page(new_page, vaddr, &opcode, UPROBE_SWBP_INSN_SIZE); 328 copy_to_page(new_page, vaddr, &opcode, UPROBE_SWBP_INSN_SIZE);
324 329
325 ret = __replace_page(vma, vaddr, old_page, new_page); 330 ret = __replace_page(vma, vaddr, old_page, new_page);
326 if (ret)
327 mem_cgroup_uncharge_page(new_page);
328
329put_new:
330 page_cache_release(new_page); 331 page_cache_release(new_page);
331put_old: 332put_old:
332 put_page(old_page); 333 put_page(old_page);
diff --git a/kernel/exit.c b/kernel/exit.c
index 88c6b3e42583..32c58f7433a3 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -59,7 +59,7 @@
59#include <asm/pgtable.h> 59#include <asm/pgtable.h>
60#include <asm/mmu_context.h> 60#include <asm/mmu_context.h>
61 61
62static void exit_mm(struct task_struct * tsk); 62static void exit_mm(struct task_struct *tsk);
63 63
64static void __unhash_process(struct task_struct *p, bool group_dead) 64static void __unhash_process(struct task_struct *p, bool group_dead)
65{ 65{
@@ -151,7 +151,7 @@ static void __exit_signal(struct task_struct *tsk)
151 spin_unlock(&sighand->siglock); 151 spin_unlock(&sighand->siglock);
152 152
153 __cleanup_sighand(sighand); 153 __cleanup_sighand(sighand);
154 clear_tsk_thread_flag(tsk,TIF_SIGPENDING); 154 clear_tsk_thread_flag(tsk, TIF_SIGPENDING);
155 if (group_dead) { 155 if (group_dead) {
156 flush_sigqueue(&sig->shared_pending); 156 flush_sigqueue(&sig->shared_pending);
157 tty_kref_put(tty); 157 tty_kref_put(tty);
@@ -168,7 +168,7 @@ static void delayed_put_task_struct(struct rcu_head *rhp)
168} 168}
169 169
170 170
171void release_task(struct task_struct * p) 171void release_task(struct task_struct *p)
172{ 172{
173 struct task_struct *leader; 173 struct task_struct *leader;
174 int zap_leader; 174 int zap_leader;
@@ -192,7 +192,8 @@ repeat:
192 */ 192 */
193 zap_leader = 0; 193 zap_leader = 0;
194 leader = p->group_leader; 194 leader = p->group_leader;
195 if (leader != p && thread_group_empty(leader) && leader->exit_state == EXIT_ZOMBIE) { 195 if (leader != p && thread_group_empty(leader)
196 && leader->exit_state == EXIT_ZOMBIE) {
196 /* 197 /*
197 * If we were the last child thread and the leader has 198 * If we were the last child thread and the leader has
198 * exited already, and the leader's parent ignores SIGCHLD, 199 * exited already, and the leader's parent ignores SIGCHLD,
@@ -241,7 +242,8 @@ struct pid *session_of_pgrp(struct pid *pgrp)
241 * 242 *
242 * "I ask you, have you ever known what it is to be an orphan?" 243 * "I ask you, have you ever known what it is to be an orphan?"
243 */ 244 */
244static int will_become_orphaned_pgrp(struct pid *pgrp, struct task_struct *ignored_task) 245static int will_become_orphaned_pgrp(struct pid *pgrp,
246 struct task_struct *ignored_task)
245{ 247{
246 struct task_struct *p; 248 struct task_struct *p;
247 249
@@ -294,9 +296,9 @@ kill_orphaned_pgrp(struct task_struct *tsk, struct task_struct *parent)
294 struct task_struct *ignored_task = tsk; 296 struct task_struct *ignored_task = tsk;
295 297
296 if (!parent) 298 if (!parent)
297 /* exit: our father is in a different pgrp than 299 /* exit: our father is in a different pgrp than
298 * we are and we were the only connection outside. 300 * we are and we were the only connection outside.
299 */ 301 */
300 parent = tsk->real_parent; 302 parent = tsk->real_parent;
301 else 303 else
302 /* reparent: our child is in a different pgrp than 304 /* reparent: our child is in a different pgrp than
@@ -405,7 +407,7 @@ assign_new_owner:
405 * Turn us into a lazy TLB process if we 407 * Turn us into a lazy TLB process if we
406 * aren't already.. 408 * aren't already..
407 */ 409 */
408static void exit_mm(struct task_struct * tsk) 410static void exit_mm(struct task_struct *tsk)
409{ 411{
410 struct mm_struct *mm = tsk->mm; 412 struct mm_struct *mm = tsk->mm;
411 struct core_state *core_state; 413 struct core_state *core_state;
@@ -425,6 +427,7 @@ static void exit_mm(struct task_struct * tsk)
425 core_state = mm->core_state; 427 core_state = mm->core_state;
426 if (core_state) { 428 if (core_state) {
427 struct core_thread self; 429 struct core_thread self;
430
428 up_read(&mm->mmap_sem); 431 up_read(&mm->mmap_sem);
429 432
430 self.task = tsk; 433 self.task = tsk;
@@ -566,6 +569,7 @@ static void forget_original_parent(struct task_struct *father)
566 569
567 list_for_each_entry_safe(p, n, &father->children, sibling) { 570 list_for_each_entry_safe(p, n, &father->children, sibling) {
568 struct task_struct *t = p; 571 struct task_struct *t = p;
572
569 do { 573 do {
570 t->real_parent = reaper; 574 t->real_parent = reaper;
571 if (t->parent == father) { 575 if (t->parent == father) {
@@ -599,7 +603,7 @@ static void exit_notify(struct task_struct *tsk, int group_dead)
599 /* 603 /*
600 * This does two things: 604 * This does two things:
601 * 605 *
602 * A. Make init inherit all the child processes 606 * A. Make init inherit all the child processes
603 * B. Check to see if any process groups have become orphaned 607 * B. Check to see if any process groups have become orphaned
604 * as a result of our exiting, and if they have any stopped 608 * as a result of our exiting, and if they have any stopped
605 * jobs, send them a SIGHUP and then a SIGCONT. (POSIX 3.2.2.2) 609 * jobs, send them a SIGHUP and then a SIGCONT. (POSIX 3.2.2.2)
@@ -649,9 +653,8 @@ static void check_stack_usage(void)
649 653
650 spin_lock(&low_water_lock); 654 spin_lock(&low_water_lock);
651 if (free < lowest_to_date) { 655 if (free < lowest_to_date) {
652 printk(KERN_WARNING "%s (%d) used greatest stack depth: " 656 pr_warn("%s (%d) used greatest stack depth: %lu bytes left\n",
653 "%lu bytes left\n", 657 current->comm, task_pid_nr(current), free);
654 current->comm, task_pid_nr(current), free);
655 lowest_to_date = free; 658 lowest_to_date = free;
656 } 659 }
657 spin_unlock(&low_water_lock); 660 spin_unlock(&low_water_lock);
@@ -692,8 +695,7 @@ void do_exit(long code)
692 * leave this task alone and wait for reboot. 695 * leave this task alone and wait for reboot.
693 */ 696 */
694 if (unlikely(tsk->flags & PF_EXITING)) { 697 if (unlikely(tsk->flags & PF_EXITING)) {
695 printk(KERN_ALERT 698 pr_alert("Fixing recursive fault but reboot is needed!\n");
696 "Fixing recursive fault but reboot is needed!\n");
697 /* 699 /*
698 * We can do this unlocked here. The futex code uses 700 * We can do this unlocked here. The futex code uses
699 * this flag just to verify whether the pi state 701 * this flag just to verify whether the pi state
@@ -717,9 +719,9 @@ void do_exit(long code)
717 raw_spin_unlock_wait(&tsk->pi_lock); 719 raw_spin_unlock_wait(&tsk->pi_lock);
718 720
719 if (unlikely(in_atomic())) 721 if (unlikely(in_atomic()))
720 printk(KERN_INFO "note: %s[%d] exited with preempt_count %d\n", 722 pr_info("note: %s[%d] exited with preempt_count %d\n",
721 current->comm, task_pid_nr(current), 723 current->comm, task_pid_nr(current),
722 preempt_count()); 724 preempt_count());
723 725
724 acct_update_integrals(tsk); 726 acct_update_integrals(tsk);
725 /* sync mm's RSS info before statistics gathering */ 727 /* sync mm's RSS info before statistics gathering */
@@ -837,7 +839,6 @@ void do_exit(long code)
837 for (;;) 839 for (;;)
838 cpu_relax(); /* For when BUG is null */ 840 cpu_relax(); /* For when BUG is null */
839} 841}
840
841EXPORT_SYMBOL_GPL(do_exit); 842EXPORT_SYMBOL_GPL(do_exit);
842 843
843void complete_and_exit(struct completion *comp, long code) 844void complete_and_exit(struct completion *comp, long code)
@@ -847,7 +848,6 @@ void complete_and_exit(struct completion *comp, long code)
847 848
848 do_exit(code); 849 do_exit(code);
849} 850}
850
851EXPORT_SYMBOL(complete_and_exit); 851EXPORT_SYMBOL(complete_and_exit);
852 852
853SYSCALL_DEFINE1(exit, int, error_code) 853SYSCALL_DEFINE1(exit, int, error_code)
@@ -870,6 +870,7 @@ do_group_exit(int exit_code)
870 exit_code = sig->group_exit_code; 870 exit_code = sig->group_exit_code;
871 else if (!thread_group_empty(current)) { 871 else if (!thread_group_empty(current)) {
872 struct sighand_struct *const sighand = current->sighand; 872 struct sighand_struct *const sighand = current->sighand;
873
873 spin_lock_irq(&sighand->siglock); 874 spin_lock_irq(&sighand->siglock);
874 if (signal_group_exit(sig)) 875 if (signal_group_exit(sig))
875 /* Another thread got here before we took the lock. */ 876 /* Another thread got here before we took the lock. */
@@ -1034,9 +1035,9 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
1034 * as other threads in the parent group can be right 1035 * as other threads in the parent group can be right
1035 * here reaping other children at the same time. 1036 * here reaping other children at the same time.
1036 * 1037 *
1037 * We use thread_group_cputime_adjusted() to get times for the thread 1038 * We use thread_group_cputime_adjusted() to get times for
1038 * group, which consolidates times for all threads in the 1039 * the thread group, which consolidates times for all threads
1039 * group including the group leader. 1040 * in the group including the group leader.
1040 */ 1041 */
1041 thread_group_cputime_adjusted(p, &tgutime, &tgstime); 1042 thread_group_cputime_adjusted(p, &tgutime, &tgstime);
1042 spin_lock_irq(&p->real_parent->sighand->siglock); 1043 spin_lock_irq(&p->real_parent->sighand->siglock);
@@ -1418,6 +1419,7 @@ static int do_wait_thread(struct wait_opts *wo, struct task_struct *tsk)
1418 1419
1419 list_for_each_entry(p, &tsk->children, sibling) { 1420 list_for_each_entry(p, &tsk->children, sibling) {
1420 int ret = wait_consider_task(wo, 0, p); 1421 int ret = wait_consider_task(wo, 0, p);
1422
1421 if (ret) 1423 if (ret)
1422 return ret; 1424 return ret;
1423 } 1425 }
@@ -1431,6 +1433,7 @@ static int ptrace_do_wait(struct wait_opts *wo, struct task_struct *tsk)
1431 1433
1432 list_for_each_entry(p, &tsk->ptraced, ptrace_entry) { 1434 list_for_each_entry(p, &tsk->ptraced, ptrace_entry) {
1433 int ret = wait_consider_task(wo, 1, p); 1435 int ret = wait_consider_task(wo, 1, p);
1436
1434 if (ret) 1437 if (ret)
1435 return ret; 1438 return ret;
1436 } 1439 }
diff --git a/kernel/fork.c b/kernel/fork.c
index fbd3497b221f..1380d8ace334 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -374,12 +374,11 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
374 */ 374 */
375 down_write_nested(&mm->mmap_sem, SINGLE_DEPTH_NESTING); 375 down_write_nested(&mm->mmap_sem, SINGLE_DEPTH_NESTING);
376 376
377 mm->locked_vm = 0; 377 mm->total_vm = oldmm->total_vm;
378 mm->mmap = NULL; 378 mm->shared_vm = oldmm->shared_vm;
379 mm->vmacache_seqnum = 0; 379 mm->exec_vm = oldmm->exec_vm;
380 mm->map_count = 0; 380 mm->stack_vm = oldmm->stack_vm;
381 cpumask_clear(mm_cpumask(mm)); 381
382 mm->mm_rb = RB_ROOT;
383 rb_link = &mm->mm_rb.rb_node; 382 rb_link = &mm->mm_rb.rb_node;
384 rb_parent = NULL; 383 rb_parent = NULL;
385 pprev = &mm->mmap; 384 pprev = &mm->mmap;
@@ -430,7 +429,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
430 atomic_dec(&inode->i_writecount); 429 atomic_dec(&inode->i_writecount);
431 mutex_lock(&mapping->i_mmap_mutex); 430 mutex_lock(&mapping->i_mmap_mutex);
432 if (tmp->vm_flags & VM_SHARED) 431 if (tmp->vm_flags & VM_SHARED)
433 mapping->i_mmap_writable++; 432 atomic_inc(&mapping->i_mmap_writable);
434 flush_dcache_mmap_lock(mapping); 433 flush_dcache_mmap_lock(mapping);
435 /* insert tmp into the share list, just after mpnt */ 434 /* insert tmp into the share list, just after mpnt */
436 if (unlikely(tmp->vm_flags & VM_NONLINEAR)) 435 if (unlikely(tmp->vm_flags & VM_NONLINEAR))
@@ -536,19 +535,37 @@ static void mm_init_aio(struct mm_struct *mm)
536#endif 535#endif
537} 536}
538 537
538static void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
539{
540#ifdef CONFIG_MEMCG
541 mm->owner = p;
542#endif
543}
544
539static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p) 545static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p)
540{ 546{
547 mm->mmap = NULL;
548 mm->mm_rb = RB_ROOT;
549 mm->vmacache_seqnum = 0;
541 atomic_set(&mm->mm_users, 1); 550 atomic_set(&mm->mm_users, 1);
542 atomic_set(&mm->mm_count, 1); 551 atomic_set(&mm->mm_count, 1);
543 init_rwsem(&mm->mmap_sem); 552 init_rwsem(&mm->mmap_sem);
544 INIT_LIST_HEAD(&mm->mmlist); 553 INIT_LIST_HEAD(&mm->mmlist);
545 mm->core_state = NULL; 554 mm->core_state = NULL;
546 atomic_long_set(&mm->nr_ptes, 0); 555 atomic_long_set(&mm->nr_ptes, 0);
556 mm->map_count = 0;
557 mm->locked_vm = 0;
558 mm->pinned_vm = 0;
547 memset(&mm->rss_stat, 0, sizeof(mm->rss_stat)); 559 memset(&mm->rss_stat, 0, sizeof(mm->rss_stat));
548 spin_lock_init(&mm->page_table_lock); 560 spin_lock_init(&mm->page_table_lock);
561 mm_init_cpumask(mm);
549 mm_init_aio(mm); 562 mm_init_aio(mm);
550 mm_init_owner(mm, p); 563 mm_init_owner(mm, p);
564 mmu_notifier_mm_init(mm);
551 clear_tlb_flush_pending(mm); 565 clear_tlb_flush_pending(mm);
566#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS
567 mm->pmd_huge_pte = NULL;
568#endif
552 569
553 if (current->mm) { 570 if (current->mm) {
554 mm->flags = current->mm->flags & MMF_INIT_MASK; 571 mm->flags = current->mm->flags & MMF_INIT_MASK;
@@ -558,11 +575,17 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p)
558 mm->def_flags = 0; 575 mm->def_flags = 0;
559 } 576 }
560 577
561 if (likely(!mm_alloc_pgd(mm))) { 578 if (mm_alloc_pgd(mm))
562 mmu_notifier_mm_init(mm); 579 goto fail_nopgd;
563 return mm; 580
564 } 581 if (init_new_context(p, mm))
582 goto fail_nocontext;
583
584 return mm;
565 585
586fail_nocontext:
587 mm_free_pgd(mm);
588fail_nopgd:
566 free_mm(mm); 589 free_mm(mm);
567 return NULL; 590 return NULL;
568} 591}
@@ -596,7 +619,6 @@ struct mm_struct *mm_alloc(void)
596 return NULL; 619 return NULL;
597 620
598 memset(mm, 0, sizeof(*mm)); 621 memset(mm, 0, sizeof(*mm));
599 mm_init_cpumask(mm);
600 return mm_init(mm, current); 622 return mm_init(mm, current);
601} 623}
602 624
@@ -828,17 +850,10 @@ static struct mm_struct *dup_mm(struct task_struct *tsk)
828 goto fail_nomem; 850 goto fail_nomem;
829 851
830 memcpy(mm, oldmm, sizeof(*mm)); 852 memcpy(mm, oldmm, sizeof(*mm));
831 mm_init_cpumask(mm);
832 853
833#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS
834 mm->pmd_huge_pte = NULL;
835#endif
836 if (!mm_init(mm, tsk)) 854 if (!mm_init(mm, tsk))
837 goto fail_nomem; 855 goto fail_nomem;
838 856
839 if (init_new_context(tsk, mm))
840 goto fail_nocontext;
841
842 dup_mm_exe_file(oldmm, mm); 857 dup_mm_exe_file(oldmm, mm);
843 858
844 err = dup_mmap(mm, oldmm); 859 err = dup_mmap(mm, oldmm);
@@ -860,15 +875,6 @@ free_pt:
860 875
861fail_nomem: 876fail_nomem:
862 return NULL; 877 return NULL;
863
864fail_nocontext:
865 /*
866 * If init_new_context() failed, we cannot use mmput() to free the mm
867 * because it calls destroy_context()
868 */
869 mm_free_pgd(mm);
870 free_mm(mm);
871 return NULL;
872} 878}
873 879
874static int copy_mm(unsigned long clone_flags, struct task_struct *tsk) 880static int copy_mm(unsigned long clone_flags, struct task_struct *tsk)
@@ -1140,13 +1146,6 @@ static void rt_mutex_init_task(struct task_struct *p)
1140#endif 1146#endif
1141} 1147}
1142 1148
1143#ifdef CONFIG_MEMCG
1144void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
1145{
1146 mm->owner = p;
1147}
1148#endif /* CONFIG_MEMCG */
1149
1150/* 1149/*
1151 * Initialize POSIX timer handling for a single task. 1150 * Initialize POSIX timer handling for a single task.
1152 */ 1151 */
@@ -1346,10 +1345,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1346#ifdef CONFIG_DEBUG_MUTEXES 1345#ifdef CONFIG_DEBUG_MUTEXES
1347 p->blocked_on = NULL; /* not blocked yet */ 1346 p->blocked_on = NULL; /* not blocked yet */
1348#endif 1347#endif
1349#ifdef CONFIG_MEMCG
1350 p->memcg_batch.do_batch = 0;
1351 p->memcg_batch.memcg = NULL;
1352#endif
1353#ifdef CONFIG_BCACHE 1348#ifdef CONFIG_BCACHE
1354 p->sequential_io = 0; 1349 p->sequential_io = 0;
1355 p->sequential_io_avg = 0; 1350 p->sequential_io_avg = 0;
@@ -1367,6 +1362,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1367 if (retval) 1362 if (retval)
1368 goto bad_fork_cleanup_policy; 1363 goto bad_fork_cleanup_policy;
1369 /* copy all the process information */ 1364 /* copy all the process information */
1365 shm_init_task(p);
1370 retval = copy_semundo(clone_flags, p); 1366 retval = copy_semundo(clone_flags, p);
1371 if (retval) 1367 if (retval)
1372 goto bad_fork_cleanup_audit; 1368 goto bad_fork_cleanup_audit;
@@ -1918,6 +1914,11 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
1918 */ 1914 */
1919 exit_sem(current); 1915 exit_sem(current);
1920 } 1916 }
1917 if (unshare_flags & CLONE_NEWIPC) {
1918 /* Orphan segments in old ns (see sem above). */
1919 exit_shm(current);
1920 shm_init_task(current);
1921 }
1921 1922
1922 if (new_nsproxy) 1923 if (new_nsproxy)
1923 switch_task_namespaces(current, new_nsproxy); 1924 switch_task_namespaces(current, new_nsproxy);
diff --git a/kernel/gcov/fs.c b/kernel/gcov/fs.c
index 15ff01a76379..edf67c493a8e 100644
--- a/kernel/gcov/fs.c
+++ b/kernel/gcov/fs.c
@@ -784,8 +784,7 @@ static __init int gcov_fs_init(void)
784 784
785err_remove: 785err_remove:
786 pr_err("init failed\n"); 786 pr_err("init failed\n");
787 if (root_node.dentry) 787 debugfs_remove(root_node.dentry);
788 debugfs_remove(root_node.dentry);
789 788
790 return rc; 789 return rc;
791} 790}
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index cb0cf37dac3a..ae5167087845 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -364,7 +364,7 @@ static int __sprint_symbol(char *buffer, unsigned long address,
364 address += symbol_offset; 364 address += symbol_offset;
365 name = kallsyms_lookup(address, &size, &offset, &modname, buffer); 365 name = kallsyms_lookup(address, &size, &offset, &modname, buffer);
366 if (!name) 366 if (!name)
367 return sprintf(buffer, "0x%lx", address); 367 return sprintf(buffer, "0x%lx", address - symbol_offset);
368 368
369 if (name != buffer) 369 if (name != buffer)
370 strcpy(buffer, name); 370 strcpy(buffer, name);
diff --git a/kernel/kexec.c b/kernel/kexec.c
index 4b8f0c925884..0b49a0a58102 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -6,6 +6,8 @@
6 * Version 2. See the file COPYING for more details. 6 * Version 2. See the file COPYING for more details.
7 */ 7 */
8 8
9#define pr_fmt(fmt) "kexec: " fmt
10
9#include <linux/capability.h> 11#include <linux/capability.h>
10#include <linux/mm.h> 12#include <linux/mm.h>
11#include <linux/file.h> 13#include <linux/file.h>
@@ -40,6 +42,9 @@
40#include <asm/io.h> 42#include <asm/io.h>
41#include <asm/sections.h> 43#include <asm/sections.h>
42 44
45#include <crypto/hash.h>
46#include <crypto/sha.h>
47
43/* Per cpu memory for storing cpu states in case of system crash. */ 48/* Per cpu memory for storing cpu states in case of system crash. */
44note_buf_t __percpu *crash_notes; 49note_buf_t __percpu *crash_notes;
45 50
@@ -52,6 +57,15 @@ size_t vmcoreinfo_max_size = sizeof(vmcoreinfo_data);
52/* Flag to indicate we are going to kexec a new kernel */ 57/* Flag to indicate we are going to kexec a new kernel */
53bool kexec_in_progress = false; 58bool kexec_in_progress = false;
54 59
60/*
61 * Declare these symbols weak so that if architecture provides a purgatory,
62 * these will be overridden.
63 */
64char __weak kexec_purgatory[0];
65size_t __weak kexec_purgatory_size = 0;
66
67static int kexec_calculate_store_digests(struct kimage *image);
68
55/* Location of the reserved area for the crash kernel */ 69/* Location of the reserved area for the crash kernel */
56struct resource crashk_res = { 70struct resource crashk_res = {
57 .name = "Crash kernel", 71 .name = "Crash kernel",
@@ -125,45 +139,27 @@ static struct page *kimage_alloc_page(struct kimage *image,
125 gfp_t gfp_mask, 139 gfp_t gfp_mask,
126 unsigned long dest); 140 unsigned long dest);
127 141
128static int do_kimage_alloc(struct kimage **rimage, unsigned long entry, 142static int copy_user_segment_list(struct kimage *image,
129 unsigned long nr_segments, 143 unsigned long nr_segments,
130 struct kexec_segment __user *segments) 144 struct kexec_segment __user *segments)
131{ 145{
146 int ret;
132 size_t segment_bytes; 147 size_t segment_bytes;
133 struct kimage *image;
134 unsigned long i;
135 int result;
136
137 /* Allocate a controlling structure */
138 result = -ENOMEM;
139 image = kzalloc(sizeof(*image), GFP_KERNEL);
140 if (!image)
141 goto out;
142
143 image->head = 0;
144 image->entry = &image->head;
145 image->last_entry = &image->head;
146 image->control_page = ~0; /* By default this does not apply */
147 image->start = entry;
148 image->type = KEXEC_TYPE_DEFAULT;
149
150 /* Initialize the list of control pages */
151 INIT_LIST_HEAD(&image->control_pages);
152
153 /* Initialize the list of destination pages */
154 INIT_LIST_HEAD(&image->dest_pages);
155
156 /* Initialize the list of unusable pages */
157 INIT_LIST_HEAD(&image->unuseable_pages);
158 148
159 /* Read in the segments */ 149 /* Read in the segments */
160 image->nr_segments = nr_segments; 150 image->nr_segments = nr_segments;
161 segment_bytes = nr_segments * sizeof(*segments); 151 segment_bytes = nr_segments * sizeof(*segments);
162 result = copy_from_user(image->segment, segments, segment_bytes); 152 ret = copy_from_user(image->segment, segments, segment_bytes);
163 if (result) { 153 if (ret)
164 result = -EFAULT; 154 ret = -EFAULT;
165 goto out; 155
166 } 156 return ret;
157}
158
159static int sanity_check_segment_list(struct kimage *image)
160{
161 int result, i;
162 unsigned long nr_segments = image->nr_segments;
167 163
168 /* 164 /*
169 * Verify we have good destination addresses. The caller is 165 * Verify we have good destination addresses. The caller is
@@ -185,9 +181,9 @@ static int do_kimage_alloc(struct kimage **rimage, unsigned long entry,
185 mstart = image->segment[i].mem; 181 mstart = image->segment[i].mem;
186 mend = mstart + image->segment[i].memsz; 182 mend = mstart + image->segment[i].memsz;
187 if ((mstart & ~PAGE_MASK) || (mend & ~PAGE_MASK)) 183 if ((mstart & ~PAGE_MASK) || (mend & ~PAGE_MASK))
188 goto out; 184 return result;
189 if (mend >= KEXEC_DESTINATION_MEMORY_LIMIT) 185 if (mend >= KEXEC_DESTINATION_MEMORY_LIMIT)
190 goto out; 186 return result;
191 } 187 }
192 188
193 /* Verify our destination addresses do not overlap. 189 /* Verify our destination addresses do not overlap.
@@ -208,7 +204,7 @@ static int do_kimage_alloc(struct kimage **rimage, unsigned long entry,
208 pend = pstart + image->segment[j].memsz; 204 pend = pstart + image->segment[j].memsz;
209 /* Do the segments overlap ? */ 205 /* Do the segments overlap ? */
210 if ((mend > pstart) && (mstart < pend)) 206 if ((mend > pstart) && (mstart < pend))
211 goto out; 207 return result;
212 } 208 }
213 } 209 }
214 210
@@ -220,130 +216,401 @@ static int do_kimage_alloc(struct kimage **rimage, unsigned long entry,
220 result = -EINVAL; 216 result = -EINVAL;
221 for (i = 0; i < nr_segments; i++) { 217 for (i = 0; i < nr_segments; i++) {
222 if (image->segment[i].bufsz > image->segment[i].memsz) 218 if (image->segment[i].bufsz > image->segment[i].memsz)
223 goto out; 219 return result;
224 } 220 }
225 221
226 result = 0; 222 /*
227out: 223 * Verify we have good destination addresses. Normally
228 if (result == 0) 224 * the caller is responsible for making certain we don't
229 *rimage = image; 225 * attempt to load the new image into invalid or reserved
230 else 226 * areas of RAM. But crash kernels are preloaded into a
231 kfree(image); 227 * reserved area of ram. We must ensure the addresses
228 * are in the reserved area otherwise preloading the
229 * kernel could corrupt things.
230 */
232 231
233 return result; 232 if (image->type == KEXEC_TYPE_CRASH) {
233 result = -EADDRNOTAVAIL;
234 for (i = 0; i < nr_segments; i++) {
235 unsigned long mstart, mend;
236
237 mstart = image->segment[i].mem;
238 mend = mstart + image->segment[i].memsz - 1;
239 /* Ensure we are within the crash kernel limits */
240 if ((mstart < crashk_res.start) ||
241 (mend > crashk_res.end))
242 return result;
243 }
244 }
234 245
246 return 0;
247}
248
249static struct kimage *do_kimage_alloc_init(void)
250{
251 struct kimage *image;
252
253 /* Allocate a controlling structure */
254 image = kzalloc(sizeof(*image), GFP_KERNEL);
255 if (!image)
256 return NULL;
257
258 image->head = 0;
259 image->entry = &image->head;
260 image->last_entry = &image->head;
261 image->control_page = ~0; /* By default this does not apply */
262 image->type = KEXEC_TYPE_DEFAULT;
263
264 /* Initialize the list of control pages */
265 INIT_LIST_HEAD(&image->control_pages);
266
267 /* Initialize the list of destination pages */
268 INIT_LIST_HEAD(&image->dest_pages);
269
270 /* Initialize the list of unusable pages */
271 INIT_LIST_HEAD(&image->unusable_pages);
272
273 return image;
235} 274}
236 275
237static void kimage_free_page_list(struct list_head *list); 276static void kimage_free_page_list(struct list_head *list);
238 277
239static int kimage_normal_alloc(struct kimage **rimage, unsigned long entry, 278static int kimage_alloc_init(struct kimage **rimage, unsigned long entry,
240 unsigned long nr_segments, 279 unsigned long nr_segments,
241 struct kexec_segment __user *segments) 280 struct kexec_segment __user *segments,
281 unsigned long flags)
242{ 282{
243 int result; 283 int ret;
244 struct kimage *image; 284 struct kimage *image;
285 bool kexec_on_panic = flags & KEXEC_ON_CRASH;
286
287 if (kexec_on_panic) {
288 /* Verify we have a valid entry point */
289 if ((entry < crashk_res.start) || (entry > crashk_res.end))
290 return -EADDRNOTAVAIL;
291 }
245 292
246 /* Allocate and initialize a controlling structure */ 293 /* Allocate and initialize a controlling structure */
247 image = NULL; 294 image = do_kimage_alloc_init();
248 result = do_kimage_alloc(&image, entry, nr_segments, segments); 295 if (!image)
249 if (result) 296 return -ENOMEM;
250 goto out; 297
298 image->start = entry;
299
300 ret = copy_user_segment_list(image, nr_segments, segments);
301 if (ret)
302 goto out_free_image;
303
304 ret = sanity_check_segment_list(image);
305 if (ret)
306 goto out_free_image;
307
308 /* Enable the special crash kernel control page allocation policy. */
309 if (kexec_on_panic) {
310 image->control_page = crashk_res.start;
311 image->type = KEXEC_TYPE_CRASH;
312 }
251 313
252 /* 314 /*
253 * Find a location for the control code buffer, and add it 315 * Find a location for the control code buffer, and add it
254 * the vector of segments so that it's pages will also be 316 * the vector of segments so that it's pages will also be
255 * counted as destination pages. 317 * counted as destination pages.
256 */ 318 */
257 result = -ENOMEM; 319 ret = -ENOMEM;
258 image->control_code_page = kimage_alloc_control_pages(image, 320 image->control_code_page = kimage_alloc_control_pages(image,
259 get_order(KEXEC_CONTROL_PAGE_SIZE)); 321 get_order(KEXEC_CONTROL_PAGE_SIZE));
260 if (!image->control_code_page) { 322 if (!image->control_code_page) {
261 pr_err("Could not allocate control_code_buffer\n"); 323 pr_err("Could not allocate control_code_buffer\n");
262 goto out_free; 324 goto out_free_image;
263 } 325 }
264 326
265 image->swap_page = kimage_alloc_control_pages(image, 0); 327 if (!kexec_on_panic) {
266 if (!image->swap_page) { 328 image->swap_page = kimage_alloc_control_pages(image, 0);
267 pr_err("Could not allocate swap buffer\n"); 329 if (!image->swap_page) {
268 goto out_free; 330 pr_err("Could not allocate swap buffer\n");
331 goto out_free_control_pages;
332 }
269 } 333 }
270 334
271 *rimage = image; 335 *rimage = image;
272 return 0; 336 return 0;
273 337out_free_control_pages:
274out_free:
275 kimage_free_page_list(&image->control_pages); 338 kimage_free_page_list(&image->control_pages);
339out_free_image:
276 kfree(image); 340 kfree(image);
277out: 341 return ret;
278 return result;
279} 342}
280 343
281static int kimage_crash_alloc(struct kimage **rimage, unsigned long entry, 344static int copy_file_from_fd(int fd, void **buf, unsigned long *buf_len)
282 unsigned long nr_segments,
283 struct kexec_segment __user *segments)
284{ 345{
285 int result; 346 struct fd f = fdget(fd);
286 struct kimage *image; 347 int ret;
287 unsigned long i; 348 struct kstat stat;
349 loff_t pos;
350 ssize_t bytes = 0;
288 351
289 image = NULL; 352 if (!f.file)
290 /* Verify we have a valid entry point */ 353 return -EBADF;
291 if ((entry < crashk_res.start) || (entry > crashk_res.end)) { 354
292 result = -EADDRNOTAVAIL; 355 ret = vfs_getattr(&f.file->f_path, &stat);
356 if (ret)
357 goto out;
358
359 if (stat.size > INT_MAX) {
360 ret = -EFBIG;
293 goto out; 361 goto out;
294 } 362 }
295 363
296 /* Allocate and initialize a controlling structure */ 364 /* Don't hand 0 to vmalloc, it whines. */
297 result = do_kimage_alloc(&image, entry, nr_segments, segments); 365 if (stat.size == 0) {
298 if (result) 366 ret = -EINVAL;
299 goto out; 367 goto out;
368 }
300 369
301 /* Enable the special crash kernel control page 370 *buf = vmalloc(stat.size);
302 * allocation policy. 371 if (!*buf) {
303 */ 372 ret = -ENOMEM;
304 image->control_page = crashk_res.start; 373 goto out;
305 image->type = KEXEC_TYPE_CRASH; 374 }
306 375
307 /* 376 pos = 0;
308 * Verify we have good destination addresses. Normally 377 while (pos < stat.size) {
309 * the caller is responsible for making certain we don't 378 bytes = kernel_read(f.file, pos, (char *)(*buf) + pos,
310 * attempt to load the new image into invalid or reserved 379 stat.size - pos);
311 * areas of RAM. But crash kernels are preloaded into a 380 if (bytes < 0) {
312 * reserved area of ram. We must ensure the addresses 381 vfree(*buf);
313 * are in the reserved area otherwise preloading the 382 ret = bytes;
314 * kernel could corrupt things. 383 goto out;
315 */ 384 }
316 result = -EADDRNOTAVAIL;
317 for (i = 0; i < nr_segments; i++) {
318 unsigned long mstart, mend;
319 385
320 mstart = image->segment[i].mem; 386 if (bytes == 0)
321 mend = mstart + image->segment[i].memsz - 1; 387 break;
322 /* Ensure we are within the crash kernel limits */ 388 pos += bytes;
323 if ((mstart < crashk_res.start) || (mend > crashk_res.end))
324 goto out_free;
325 } 389 }
326 390
391 if (pos != stat.size) {
392 ret = -EBADF;
393 vfree(*buf);
394 goto out;
395 }
396
397 *buf_len = pos;
398out:
399 fdput(f);
400 return ret;
401}
402
403/* Architectures can provide this probe function */
404int __weak arch_kexec_kernel_image_probe(struct kimage *image, void *buf,
405 unsigned long buf_len)
406{
407 return -ENOEXEC;
408}
409
410void * __weak arch_kexec_kernel_image_load(struct kimage *image)
411{
412 return ERR_PTR(-ENOEXEC);
413}
414
415void __weak arch_kimage_file_post_load_cleanup(struct kimage *image)
416{
417}
418
419int __weak arch_kexec_kernel_verify_sig(struct kimage *image, void *buf,
420 unsigned long buf_len)
421{
422 return -EKEYREJECTED;
423}
424
425/* Apply relocations of type RELA */
426int __weak
427arch_kexec_apply_relocations_add(const Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
428 unsigned int relsec)
429{
430 pr_err("RELA relocation unsupported.\n");
431 return -ENOEXEC;
432}
433
434/* Apply relocations of type REL */
435int __weak
436arch_kexec_apply_relocations(const Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
437 unsigned int relsec)
438{
439 pr_err("REL relocation unsupported.\n");
440 return -ENOEXEC;
441}
442
443/*
444 * Free up memory used by kernel, initrd, and comand line. This is temporary
445 * memory allocation which is not needed any more after these buffers have
446 * been loaded into separate segments and have been copied elsewhere.
447 */
448static void kimage_file_post_load_cleanup(struct kimage *image)
449{
450 struct purgatory_info *pi = &image->purgatory_info;
451
452 vfree(image->kernel_buf);
453 image->kernel_buf = NULL;
454
455 vfree(image->initrd_buf);
456 image->initrd_buf = NULL;
457
458 kfree(image->cmdline_buf);
459 image->cmdline_buf = NULL;
460
461 vfree(pi->purgatory_buf);
462 pi->purgatory_buf = NULL;
463
464 vfree(pi->sechdrs);
465 pi->sechdrs = NULL;
466
467 /* See if architecture has anything to cleanup post load */
468 arch_kimage_file_post_load_cleanup(image);
469
327 /* 470 /*
328 * Find a location for the control code buffer, and add 471 * Above call should have called into bootloader to free up
329 * the vector of segments so that it's pages will also be 472 * any data stored in kimage->image_loader_data. It should
330 * counted as destination pages. 473 * be ok now to free it up.
331 */ 474 */
332 result = -ENOMEM; 475 kfree(image->image_loader_data);
476 image->image_loader_data = NULL;
477}
478
479/*
480 * In file mode list of segments is prepared by kernel. Copy relevant
481 * data from user space, do error checking, prepare segment list
482 */
483static int
484kimage_file_prepare_segments(struct kimage *image, int kernel_fd, int initrd_fd,
485 const char __user *cmdline_ptr,
486 unsigned long cmdline_len, unsigned flags)
487{
488 int ret = 0;
489 void *ldata;
490
491 ret = copy_file_from_fd(kernel_fd, &image->kernel_buf,
492 &image->kernel_buf_len);
493 if (ret)
494 return ret;
495
496 /* Call arch image probe handlers */
497 ret = arch_kexec_kernel_image_probe(image, image->kernel_buf,
498 image->kernel_buf_len);
499
500 if (ret)
501 goto out;
502
503#ifdef CONFIG_KEXEC_VERIFY_SIG
504 ret = arch_kexec_kernel_verify_sig(image, image->kernel_buf,
505 image->kernel_buf_len);
506 if (ret) {
507 pr_debug("kernel signature verification failed.\n");
508 goto out;
509 }
510 pr_debug("kernel signature verification successful.\n");
511#endif
512 /* It is possible that there no initramfs is being loaded */
513 if (!(flags & KEXEC_FILE_NO_INITRAMFS)) {
514 ret = copy_file_from_fd(initrd_fd, &image->initrd_buf,
515 &image->initrd_buf_len);
516 if (ret)
517 goto out;
518 }
519
520 if (cmdline_len) {
521 image->cmdline_buf = kzalloc(cmdline_len, GFP_KERNEL);
522 if (!image->cmdline_buf) {
523 ret = -ENOMEM;
524 goto out;
525 }
526
527 ret = copy_from_user(image->cmdline_buf, cmdline_ptr,
528 cmdline_len);
529 if (ret) {
530 ret = -EFAULT;
531 goto out;
532 }
533
534 image->cmdline_buf_len = cmdline_len;
535
536 /* command line should be a string with last byte null */
537 if (image->cmdline_buf[cmdline_len - 1] != '\0') {
538 ret = -EINVAL;
539 goto out;
540 }
541 }
542
543 /* Call arch image load handlers */
544 ldata = arch_kexec_kernel_image_load(image);
545
546 if (IS_ERR(ldata)) {
547 ret = PTR_ERR(ldata);
548 goto out;
549 }
550
551 image->image_loader_data = ldata;
552out:
553 /* In case of error, free up all allocated memory in this function */
554 if (ret)
555 kimage_file_post_load_cleanup(image);
556 return ret;
557}
558
559static int
560kimage_file_alloc_init(struct kimage **rimage, int kernel_fd,
561 int initrd_fd, const char __user *cmdline_ptr,
562 unsigned long cmdline_len, unsigned long flags)
563{
564 int ret;
565 struct kimage *image;
566 bool kexec_on_panic = flags & KEXEC_FILE_ON_CRASH;
567
568 image = do_kimage_alloc_init();
569 if (!image)
570 return -ENOMEM;
571
572 image->file_mode = 1;
573
574 if (kexec_on_panic) {
575 /* Enable special crash kernel control page alloc policy. */
576 image->control_page = crashk_res.start;
577 image->type = KEXEC_TYPE_CRASH;
578 }
579
580 ret = kimage_file_prepare_segments(image, kernel_fd, initrd_fd,
581 cmdline_ptr, cmdline_len, flags);
582 if (ret)
583 goto out_free_image;
584
585 ret = sanity_check_segment_list(image);
586 if (ret)
587 goto out_free_post_load_bufs;
588
589 ret = -ENOMEM;
333 image->control_code_page = kimage_alloc_control_pages(image, 590 image->control_code_page = kimage_alloc_control_pages(image,
334 get_order(KEXEC_CONTROL_PAGE_SIZE)); 591 get_order(KEXEC_CONTROL_PAGE_SIZE));
335 if (!image->control_code_page) { 592 if (!image->control_code_page) {
336 pr_err("Could not allocate control_code_buffer\n"); 593 pr_err("Could not allocate control_code_buffer\n");
337 goto out_free; 594 goto out_free_post_load_bufs;
595 }
596
597 if (!kexec_on_panic) {
598 image->swap_page = kimage_alloc_control_pages(image, 0);
599 if (!image->swap_page) {
600 pr_err(KERN_ERR "Could not allocate swap buffer\n");
601 goto out_free_control_pages;
602 }
338 } 603 }
339 604
340 *rimage = image; 605 *rimage = image;
341 return 0; 606 return 0;
342 607out_free_control_pages:
343out_free: 608 kimage_free_page_list(&image->control_pages);
609out_free_post_load_bufs:
610 kimage_file_post_load_cleanup(image);
611out_free_image:
344 kfree(image); 612 kfree(image);
345out: 613 return ret;
346 return result;
347} 614}
348 615
349static int kimage_is_destination_range(struct kimage *image, 616static int kimage_is_destination_range(struct kimage *image,
@@ -609,7 +876,7 @@ static void kimage_free_extra_pages(struct kimage *image)
609 kimage_free_page_list(&image->dest_pages); 876 kimage_free_page_list(&image->dest_pages);
610 877
611 /* Walk through and free any unusable pages I have cached */ 878 /* Walk through and free any unusable pages I have cached */
612 kimage_free_page_list(&image->unuseable_pages); 879 kimage_free_page_list(&image->unusable_pages);
613 880
614} 881}
615static void kimage_terminate(struct kimage *image) 882static void kimage_terminate(struct kimage *image)
@@ -663,6 +930,14 @@ static void kimage_free(struct kimage *image)
663 930
664 /* Free the kexec control pages... */ 931 /* Free the kexec control pages... */
665 kimage_free_page_list(&image->control_pages); 932 kimage_free_page_list(&image->control_pages);
933
934 /*
935 * Free up any temporary buffers allocated. This might hit if
936 * error occurred much later after buffer allocation.
937 */
938 if (image->file_mode)
939 kimage_file_post_load_cleanup(image);
940
666 kfree(image); 941 kfree(image);
667} 942}
668 943
@@ -732,7 +1007,7 @@ static struct page *kimage_alloc_page(struct kimage *image,
732 /* If the page cannot be used file it away */ 1007 /* If the page cannot be used file it away */
733 if (page_to_pfn(page) > 1008 if (page_to_pfn(page) >
734 (KEXEC_SOURCE_MEMORY_LIMIT >> PAGE_SHIFT)) { 1009 (KEXEC_SOURCE_MEMORY_LIMIT >> PAGE_SHIFT)) {
735 list_add(&page->lru, &image->unuseable_pages); 1010 list_add(&page->lru, &image->unusable_pages);
736 continue; 1011 continue;
737 } 1012 }
738 addr = page_to_pfn(page) << PAGE_SHIFT; 1013 addr = page_to_pfn(page) << PAGE_SHIFT;
@@ -791,10 +1066,14 @@ static int kimage_load_normal_segment(struct kimage *image,
791 unsigned long maddr; 1066 unsigned long maddr;
792 size_t ubytes, mbytes; 1067 size_t ubytes, mbytes;
793 int result; 1068 int result;
794 unsigned char __user *buf; 1069 unsigned char __user *buf = NULL;
1070 unsigned char *kbuf = NULL;
795 1071
796 result = 0; 1072 result = 0;
797 buf = segment->buf; 1073 if (image->file_mode)
1074 kbuf = segment->kbuf;
1075 else
1076 buf = segment->buf;
798 ubytes = segment->bufsz; 1077 ubytes = segment->bufsz;
799 mbytes = segment->memsz; 1078 mbytes = segment->memsz;
800 maddr = segment->mem; 1079 maddr = segment->mem;
@@ -826,7 +1105,11 @@ static int kimage_load_normal_segment(struct kimage *image,
826 PAGE_SIZE - (maddr & ~PAGE_MASK)); 1105 PAGE_SIZE - (maddr & ~PAGE_MASK));
827 uchunk = min(ubytes, mchunk); 1106 uchunk = min(ubytes, mchunk);
828 1107
829 result = copy_from_user(ptr, buf, uchunk); 1108 /* For file based kexec, source pages are in kernel memory */
1109 if (image->file_mode)
1110 memcpy(ptr, kbuf, uchunk);
1111 else
1112 result = copy_from_user(ptr, buf, uchunk);
830 kunmap(page); 1113 kunmap(page);
831 if (result) { 1114 if (result) {
832 result = -EFAULT; 1115 result = -EFAULT;
@@ -834,7 +1117,10 @@ static int kimage_load_normal_segment(struct kimage *image,
834 } 1117 }
835 ubytes -= uchunk; 1118 ubytes -= uchunk;
836 maddr += mchunk; 1119 maddr += mchunk;
837 buf += mchunk; 1120 if (image->file_mode)
1121 kbuf += mchunk;
1122 else
1123 buf += mchunk;
838 mbytes -= mchunk; 1124 mbytes -= mchunk;
839 } 1125 }
840out: 1126out:
@@ -851,10 +1137,14 @@ static int kimage_load_crash_segment(struct kimage *image,
851 unsigned long maddr; 1137 unsigned long maddr;
852 size_t ubytes, mbytes; 1138 size_t ubytes, mbytes;
853 int result; 1139 int result;
854 unsigned char __user *buf; 1140 unsigned char __user *buf = NULL;
1141 unsigned char *kbuf = NULL;
855 1142
856 result = 0; 1143 result = 0;
857 buf = segment->buf; 1144 if (image->file_mode)
1145 kbuf = segment->kbuf;
1146 else
1147 buf = segment->buf;
858 ubytes = segment->bufsz; 1148 ubytes = segment->bufsz;
859 mbytes = segment->memsz; 1149 mbytes = segment->memsz;
860 maddr = segment->mem; 1150 maddr = segment->mem;
@@ -877,7 +1167,12 @@ static int kimage_load_crash_segment(struct kimage *image,
877 /* Zero the trailing part of the page */ 1167 /* Zero the trailing part of the page */
878 memset(ptr + uchunk, 0, mchunk - uchunk); 1168 memset(ptr + uchunk, 0, mchunk - uchunk);
879 } 1169 }
880 result = copy_from_user(ptr, buf, uchunk); 1170
1171 /* For file based kexec, source pages are in kernel memory */
1172 if (image->file_mode)
1173 memcpy(ptr, kbuf, uchunk);
1174 else
1175 result = copy_from_user(ptr, buf, uchunk);
881 kexec_flush_icache_page(page); 1176 kexec_flush_icache_page(page);
882 kunmap(page); 1177 kunmap(page);
883 if (result) { 1178 if (result) {
@@ -886,7 +1181,10 @@ static int kimage_load_crash_segment(struct kimage *image,
886 } 1181 }
887 ubytes -= uchunk; 1182 ubytes -= uchunk;
888 maddr += mchunk; 1183 maddr += mchunk;
889 buf += mchunk; 1184 if (image->file_mode)
1185 kbuf += mchunk;
1186 else
1187 buf += mchunk;
890 mbytes -= mchunk; 1188 mbytes -= mchunk;
891 } 1189 }
892out: 1190out:
@@ -986,16 +1284,16 @@ SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments,
986 1284
987 /* Loading another kernel to reboot into */ 1285 /* Loading another kernel to reboot into */
988 if ((flags & KEXEC_ON_CRASH) == 0) 1286 if ((flags & KEXEC_ON_CRASH) == 0)
989 result = kimage_normal_alloc(&image, entry, 1287 result = kimage_alloc_init(&image, entry, nr_segments,
990 nr_segments, segments); 1288 segments, flags);
991 /* Loading another kernel to switch to if this one crashes */ 1289 /* Loading another kernel to switch to if this one crashes */
992 else if (flags & KEXEC_ON_CRASH) { 1290 else if (flags & KEXEC_ON_CRASH) {
993 /* Free any current crash dump kernel before 1291 /* Free any current crash dump kernel before
994 * we corrupt it. 1292 * we corrupt it.
995 */ 1293 */
996 kimage_free(xchg(&kexec_crash_image, NULL)); 1294 kimage_free(xchg(&kexec_crash_image, NULL));
997 result = kimage_crash_alloc(&image, entry, 1295 result = kimage_alloc_init(&image, entry, nr_segments,
998 nr_segments, segments); 1296 segments, flags);
999 crash_map_reserved_pages(); 1297 crash_map_reserved_pages();
1000 } 1298 }
1001 if (result) 1299 if (result)
@@ -1077,6 +1375,82 @@ COMPAT_SYSCALL_DEFINE4(kexec_load, compat_ulong_t, entry,
1077} 1375}
1078#endif 1376#endif
1079 1377
1378SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, int, initrd_fd,
1379 unsigned long, cmdline_len, const char __user *, cmdline_ptr,
1380 unsigned long, flags)
1381{
1382 int ret = 0, i;
1383 struct kimage **dest_image, *image;
1384
1385 /* We only trust the superuser with rebooting the system. */
1386 if (!capable(CAP_SYS_BOOT) || kexec_load_disabled)
1387 return -EPERM;
1388
1389 /* Make sure we have a legal set of flags */
1390 if (flags != (flags & KEXEC_FILE_FLAGS))
1391 return -EINVAL;
1392
1393 image = NULL;
1394
1395 if (!mutex_trylock(&kexec_mutex))
1396 return -EBUSY;
1397
1398 dest_image = &kexec_image;
1399 if (flags & KEXEC_FILE_ON_CRASH)
1400 dest_image = &kexec_crash_image;
1401
1402 if (flags & KEXEC_FILE_UNLOAD)
1403 goto exchange;
1404
1405 /*
1406 * In case of crash, new kernel gets loaded in reserved region. It is
1407 * same memory where old crash kernel might be loaded. Free any
1408 * current crash dump kernel before we corrupt it.
1409 */
1410 if (flags & KEXEC_FILE_ON_CRASH)
1411 kimage_free(xchg(&kexec_crash_image, NULL));
1412
1413 ret = kimage_file_alloc_init(&image, kernel_fd, initrd_fd, cmdline_ptr,
1414 cmdline_len, flags);
1415 if (ret)
1416 goto out;
1417
1418 ret = machine_kexec_prepare(image);
1419 if (ret)
1420 goto out;
1421
1422 ret = kexec_calculate_store_digests(image);
1423 if (ret)
1424 goto out;
1425
1426 for (i = 0; i < image->nr_segments; i++) {
1427 struct kexec_segment *ksegment;
1428
1429 ksegment = &image->segment[i];
1430 pr_debug("Loading segment %d: buf=0x%p bufsz=0x%zx mem=0x%lx memsz=0x%zx\n",
1431 i, ksegment->buf, ksegment->bufsz, ksegment->mem,
1432 ksegment->memsz);
1433
1434 ret = kimage_load_segment(image, &image->segment[i]);
1435 if (ret)
1436 goto out;
1437 }
1438
1439 kimage_terminate(image);
1440
1441 /*
1442 * Free up any temporary buffers allocated which are not needed
1443 * after image has been loaded
1444 */
1445 kimage_file_post_load_cleanup(image);
1446exchange:
1447 image = xchg(dest_image, image);
1448out:
1449 mutex_unlock(&kexec_mutex);
1450 kimage_free(image);
1451 return ret;
1452}
1453
1080void crash_kexec(struct pt_regs *regs) 1454void crash_kexec(struct pt_regs *regs)
1081{ 1455{
1082 /* Take the kexec_mutex here to prevent sys_kexec_load 1456 /* Take the kexec_mutex here to prevent sys_kexec_load
@@ -1632,6 +2006,683 @@ static int __init crash_save_vmcoreinfo_init(void)
1632 2006
1633subsys_initcall(crash_save_vmcoreinfo_init); 2007subsys_initcall(crash_save_vmcoreinfo_init);
1634 2008
2009static int __kexec_add_segment(struct kimage *image, char *buf,
2010 unsigned long bufsz, unsigned long mem,
2011 unsigned long memsz)
2012{
2013 struct kexec_segment *ksegment;
2014
2015 ksegment = &image->segment[image->nr_segments];
2016 ksegment->kbuf = buf;
2017 ksegment->bufsz = bufsz;
2018 ksegment->mem = mem;
2019 ksegment->memsz = memsz;
2020 image->nr_segments++;
2021
2022 return 0;
2023}
2024
2025static int locate_mem_hole_top_down(unsigned long start, unsigned long end,
2026 struct kexec_buf *kbuf)
2027{
2028 struct kimage *image = kbuf->image;
2029 unsigned long temp_start, temp_end;
2030
2031 temp_end = min(end, kbuf->buf_max);
2032 temp_start = temp_end - kbuf->memsz;
2033
2034 do {
2035 /* align down start */
2036 temp_start = temp_start & (~(kbuf->buf_align - 1));
2037
2038 if (temp_start < start || temp_start < kbuf->buf_min)
2039 return 0;
2040
2041 temp_end = temp_start + kbuf->memsz - 1;
2042
2043 /*
2044 * Make sure this does not conflict with any of existing
2045 * segments
2046 */
2047 if (kimage_is_destination_range(image, temp_start, temp_end)) {
2048 temp_start = temp_start - PAGE_SIZE;
2049 continue;
2050 }
2051
2052 /* We found a suitable memory range */
2053 break;
2054 } while (1);
2055
2056 /* If we are here, we found a suitable memory range */
2057 __kexec_add_segment(image, kbuf->buffer, kbuf->bufsz, temp_start,
2058 kbuf->memsz);
2059
2060 /* Success, stop navigating through remaining System RAM ranges */
2061 return 1;
2062}
2063
2064static int locate_mem_hole_bottom_up(unsigned long start, unsigned long end,
2065 struct kexec_buf *kbuf)
2066{
2067 struct kimage *image = kbuf->image;
2068 unsigned long temp_start, temp_end;
2069
2070 temp_start = max(start, kbuf->buf_min);
2071
2072 do {
2073 temp_start = ALIGN(temp_start, kbuf->buf_align);
2074 temp_end = temp_start + kbuf->memsz - 1;
2075
2076 if (temp_end > end || temp_end > kbuf->buf_max)
2077 return 0;
2078 /*
2079 * Make sure this does not conflict with any of existing
2080 * segments
2081 */
2082 if (kimage_is_destination_range(image, temp_start, temp_end)) {
2083 temp_start = temp_start + PAGE_SIZE;
2084 continue;
2085 }
2086
2087 /* We found a suitable memory range */
2088 break;
2089 } while (1);
2090
2091 /* If we are here, we found a suitable memory range */
2092 __kexec_add_segment(image, kbuf->buffer, kbuf->bufsz, temp_start,
2093 kbuf->memsz);
2094
2095 /* Success, stop navigating through remaining System RAM ranges */
2096 return 1;
2097}
2098
2099static int locate_mem_hole_callback(u64 start, u64 end, void *arg)
2100{
2101 struct kexec_buf *kbuf = (struct kexec_buf *)arg;
2102 unsigned long sz = end - start + 1;
2103
2104 /* Returning 0 will take to next memory range */
2105 if (sz < kbuf->memsz)
2106 return 0;
2107
2108 if (end < kbuf->buf_min || start > kbuf->buf_max)
2109 return 0;
2110
2111 /*
2112 * Allocate memory top down with-in ram range. Otherwise bottom up
2113 * allocation.
2114 */
2115 if (kbuf->top_down)
2116 return locate_mem_hole_top_down(start, end, kbuf);
2117 return locate_mem_hole_bottom_up(start, end, kbuf);
2118}
2119
2120/*
2121 * Helper function for placing a buffer in a kexec segment. This assumes
2122 * that kexec_mutex is held.
2123 */
2124int kexec_add_buffer(struct kimage *image, char *buffer, unsigned long bufsz,
2125 unsigned long memsz, unsigned long buf_align,
2126 unsigned long buf_min, unsigned long buf_max,
2127 bool top_down, unsigned long *load_addr)
2128{
2129
2130 struct kexec_segment *ksegment;
2131 struct kexec_buf buf, *kbuf;
2132 int ret;
2133
2134 /* Currently adding segment this way is allowed only in file mode */
2135 if (!image->file_mode)
2136 return -EINVAL;
2137
2138 if (image->nr_segments >= KEXEC_SEGMENT_MAX)
2139 return -EINVAL;
2140
2141 /*
2142 * Make sure we are not trying to add buffer after allocating
2143 * control pages. All segments need to be placed first before
2144 * any control pages are allocated. As control page allocation
2145 * logic goes through list of segments to make sure there are
2146 * no destination overlaps.
2147 */
2148 if (!list_empty(&image->control_pages)) {
2149 WARN_ON(1);
2150 return -EINVAL;
2151 }
2152
2153 memset(&buf, 0, sizeof(struct kexec_buf));
2154 kbuf = &buf;
2155 kbuf->image = image;
2156 kbuf->buffer = buffer;
2157 kbuf->bufsz = bufsz;
2158
2159 kbuf->memsz = ALIGN(memsz, PAGE_SIZE);
2160 kbuf->buf_align = max(buf_align, PAGE_SIZE);
2161 kbuf->buf_min = buf_min;
2162 kbuf->buf_max = buf_max;
2163 kbuf->top_down = top_down;
2164
2165 /* Walk the RAM ranges and allocate a suitable range for the buffer */
2166 if (image->type == KEXEC_TYPE_CRASH)
2167 ret = walk_iomem_res("Crash kernel",
2168 IORESOURCE_MEM | IORESOURCE_BUSY,
2169 crashk_res.start, crashk_res.end, kbuf,
2170 locate_mem_hole_callback);
2171 else
2172 ret = walk_system_ram_res(0, -1, kbuf,
2173 locate_mem_hole_callback);
2174 if (ret != 1) {
2175 /* A suitable memory range could not be found for buffer */
2176 return -EADDRNOTAVAIL;
2177 }
2178
2179 /* Found a suitable memory range */
2180 ksegment = &image->segment[image->nr_segments - 1];
2181 *load_addr = ksegment->mem;
2182 return 0;
2183}
2184
2185/* Calculate and store the digest of segments */
2186static int kexec_calculate_store_digests(struct kimage *image)
2187{
2188 struct crypto_shash *tfm;
2189 struct shash_desc *desc;
2190 int ret = 0, i, j, zero_buf_sz, sha_region_sz;
2191 size_t desc_size, nullsz;
2192 char *digest;
2193 void *zero_buf;
2194 struct kexec_sha_region *sha_regions;
2195 struct purgatory_info *pi = &image->purgatory_info;
2196
2197 zero_buf = __va(page_to_pfn(ZERO_PAGE(0)) << PAGE_SHIFT);
2198 zero_buf_sz = PAGE_SIZE;
2199
2200 tfm = crypto_alloc_shash("sha256", 0, 0);
2201 if (IS_ERR(tfm)) {
2202 ret = PTR_ERR(tfm);
2203 goto out;
2204 }
2205
2206 desc_size = crypto_shash_descsize(tfm) + sizeof(*desc);
2207 desc = kzalloc(desc_size, GFP_KERNEL);
2208 if (!desc) {
2209 ret = -ENOMEM;
2210 goto out_free_tfm;
2211 }
2212
2213 sha_region_sz = KEXEC_SEGMENT_MAX * sizeof(struct kexec_sha_region);
2214 sha_regions = vzalloc(sha_region_sz);
2215 if (!sha_regions)
2216 goto out_free_desc;
2217
2218 desc->tfm = tfm;
2219 desc->flags = 0;
2220
2221 ret = crypto_shash_init(desc);
2222 if (ret < 0)
2223 goto out_free_sha_regions;
2224
2225 digest = kzalloc(SHA256_DIGEST_SIZE, GFP_KERNEL);
2226 if (!digest) {
2227 ret = -ENOMEM;
2228 goto out_free_sha_regions;
2229 }
2230
2231 for (j = i = 0; i < image->nr_segments; i++) {
2232 struct kexec_segment *ksegment;
2233
2234 ksegment = &image->segment[i];
2235 /*
2236 * Skip purgatory as it will be modified once we put digest
2237 * info in purgatory.
2238 */
2239 if (ksegment->kbuf == pi->purgatory_buf)
2240 continue;
2241
2242 ret = crypto_shash_update(desc, ksegment->kbuf,
2243 ksegment->bufsz);
2244 if (ret)
2245 break;
2246
2247 /*
2248 * Assume rest of the buffer is filled with zero and
2249 * update digest accordingly.
2250 */
2251 nullsz = ksegment->memsz - ksegment->bufsz;
2252 while (nullsz) {
2253 unsigned long bytes = nullsz;
2254
2255 if (bytes > zero_buf_sz)
2256 bytes = zero_buf_sz;
2257 ret = crypto_shash_update(desc, zero_buf, bytes);
2258 if (ret)
2259 break;
2260 nullsz -= bytes;
2261 }
2262
2263 if (ret)
2264 break;
2265
2266 sha_regions[j].start = ksegment->mem;
2267 sha_regions[j].len = ksegment->memsz;
2268 j++;
2269 }
2270
2271 if (!ret) {
2272 ret = crypto_shash_final(desc, digest);
2273 if (ret)
2274 goto out_free_digest;
2275 ret = kexec_purgatory_get_set_symbol(image, "sha_regions",
2276 sha_regions, sha_region_sz, 0);
2277 if (ret)
2278 goto out_free_digest;
2279
2280 ret = kexec_purgatory_get_set_symbol(image, "sha256_digest",
2281 digest, SHA256_DIGEST_SIZE, 0);
2282 if (ret)
2283 goto out_free_digest;
2284 }
2285
2286out_free_digest:
2287 kfree(digest);
2288out_free_sha_regions:
2289 vfree(sha_regions);
2290out_free_desc:
2291 kfree(desc);
2292out_free_tfm:
2293 kfree(tfm);
2294out:
2295 return ret;
2296}
2297
2298/* Actually load purgatory. Lot of code taken from kexec-tools */
2299static int __kexec_load_purgatory(struct kimage *image, unsigned long min,
2300 unsigned long max, int top_down)
2301{
2302 struct purgatory_info *pi = &image->purgatory_info;
2303 unsigned long align, buf_align, bss_align, buf_sz, bss_sz, bss_pad;
2304 unsigned long memsz, entry, load_addr, curr_load_addr, bss_addr, offset;
2305 unsigned char *buf_addr, *src;
2306 int i, ret = 0, entry_sidx = -1;
2307 const Elf_Shdr *sechdrs_c;
2308 Elf_Shdr *sechdrs = NULL;
2309 void *purgatory_buf = NULL;
2310
2311 /*
2312 * sechdrs_c points to section headers in purgatory and are read
2313 * only. No modifications allowed.
2314 */
2315 sechdrs_c = (void *)pi->ehdr + pi->ehdr->e_shoff;
2316
2317 /*
2318 * We can not modify sechdrs_c[] and its fields. It is read only.
2319 * Copy it over to a local copy where one can store some temporary
2320 * data and free it at the end. We need to modify ->sh_addr and
2321 * ->sh_offset fields to keep track of permanent and temporary
2322 * locations of sections.
2323 */
2324 sechdrs = vzalloc(pi->ehdr->e_shnum * sizeof(Elf_Shdr));
2325 if (!sechdrs)
2326 return -ENOMEM;
2327
2328 memcpy(sechdrs, sechdrs_c, pi->ehdr->e_shnum * sizeof(Elf_Shdr));
2329
2330 /*
2331 * We seem to have multiple copies of sections. First copy is which
2332 * is embedded in kernel in read only section. Some of these sections
2333 * will be copied to a temporary buffer and relocated. And these
2334 * sections will finally be copied to their final destination at
2335 * segment load time.
2336 *
2337 * Use ->sh_offset to reflect section address in memory. It will
2338 * point to original read only copy if section is not allocatable.
2339 * Otherwise it will point to temporary copy which will be relocated.
2340 *
2341 * Use ->sh_addr to contain final address of the section where it
2342 * will go during execution time.
2343 */
2344 for (i = 0; i < pi->ehdr->e_shnum; i++) {
2345 if (sechdrs[i].sh_type == SHT_NOBITS)
2346 continue;
2347
2348 sechdrs[i].sh_offset = (unsigned long)pi->ehdr +
2349 sechdrs[i].sh_offset;
2350 }
2351
2352 /*
2353 * Identify entry point section and make entry relative to section
2354 * start.
2355 */
2356 entry = pi->ehdr->e_entry;
2357 for (i = 0; i < pi->ehdr->e_shnum; i++) {
2358 if (!(sechdrs[i].sh_flags & SHF_ALLOC))
2359 continue;
2360
2361 if (!(sechdrs[i].sh_flags & SHF_EXECINSTR))
2362 continue;
2363
2364 /* Make entry section relative */
2365 if (sechdrs[i].sh_addr <= pi->ehdr->e_entry &&
2366 ((sechdrs[i].sh_addr + sechdrs[i].sh_size) >
2367 pi->ehdr->e_entry)) {
2368 entry_sidx = i;
2369 entry -= sechdrs[i].sh_addr;
2370 break;
2371 }
2372 }
2373
2374 /* Determine how much memory is needed to load relocatable object. */
2375 buf_align = 1;
2376 bss_align = 1;
2377 buf_sz = 0;
2378 bss_sz = 0;
2379
2380 for (i = 0; i < pi->ehdr->e_shnum; i++) {
2381 if (!(sechdrs[i].sh_flags & SHF_ALLOC))
2382 continue;
2383
2384 align = sechdrs[i].sh_addralign;
2385 if (sechdrs[i].sh_type != SHT_NOBITS) {
2386 if (buf_align < align)
2387 buf_align = align;
2388 buf_sz = ALIGN(buf_sz, align);
2389 buf_sz += sechdrs[i].sh_size;
2390 } else {
2391 /* bss section */
2392 if (bss_align < align)
2393 bss_align = align;
2394 bss_sz = ALIGN(bss_sz, align);
2395 bss_sz += sechdrs[i].sh_size;
2396 }
2397 }
2398
2399 /* Determine the bss padding required to align bss properly */
2400 bss_pad = 0;
2401 if (buf_sz & (bss_align - 1))
2402 bss_pad = bss_align - (buf_sz & (bss_align - 1));
2403
2404 memsz = buf_sz + bss_pad + bss_sz;
2405
2406 /* Allocate buffer for purgatory */
2407 purgatory_buf = vzalloc(buf_sz);
2408 if (!purgatory_buf) {
2409 ret = -ENOMEM;
2410 goto out;
2411 }
2412
2413 if (buf_align < bss_align)
2414 buf_align = bss_align;
2415
2416 /* Add buffer to segment list */
2417 ret = kexec_add_buffer(image, purgatory_buf, buf_sz, memsz,
2418 buf_align, min, max, top_down,
2419 &pi->purgatory_load_addr);
2420 if (ret)
2421 goto out;
2422
2423 /* Load SHF_ALLOC sections */
2424 buf_addr = purgatory_buf;
2425 load_addr = curr_load_addr = pi->purgatory_load_addr;
2426 bss_addr = load_addr + buf_sz + bss_pad;
2427
2428 for (i = 0; i < pi->ehdr->e_shnum; i++) {
2429 if (!(sechdrs[i].sh_flags & SHF_ALLOC))
2430 continue;
2431
2432 align = sechdrs[i].sh_addralign;
2433 if (sechdrs[i].sh_type != SHT_NOBITS) {
2434 curr_load_addr = ALIGN(curr_load_addr, align);
2435 offset = curr_load_addr - load_addr;
2436 /* We already modifed ->sh_offset to keep src addr */
2437 src = (char *) sechdrs[i].sh_offset;
2438 memcpy(buf_addr + offset, src, sechdrs[i].sh_size);
2439
2440 /* Store load address and source address of section */
2441 sechdrs[i].sh_addr = curr_load_addr;
2442
2443 /*
2444 * This section got copied to temporary buffer. Update
2445 * ->sh_offset accordingly.
2446 */
2447 sechdrs[i].sh_offset = (unsigned long)(buf_addr + offset);
2448
2449 /* Advance to the next address */
2450 curr_load_addr += sechdrs[i].sh_size;
2451 } else {
2452 bss_addr = ALIGN(bss_addr, align);
2453 sechdrs[i].sh_addr = bss_addr;
2454 bss_addr += sechdrs[i].sh_size;
2455 }
2456 }
2457
2458 /* Update entry point based on load address of text section */
2459 if (entry_sidx >= 0)
2460 entry += sechdrs[entry_sidx].sh_addr;
2461
2462 /* Make kernel jump to purgatory after shutdown */
2463 image->start = entry;
2464
2465 /* Used later to get/set symbol values */
2466 pi->sechdrs = sechdrs;
2467
2468 /*
2469 * Used later to identify which section is purgatory and skip it
2470 * from checksumming.
2471 */
2472 pi->purgatory_buf = purgatory_buf;
2473 return ret;
2474out:
2475 vfree(sechdrs);
2476 vfree(purgatory_buf);
2477 return ret;
2478}
2479
2480static int kexec_apply_relocations(struct kimage *image)
2481{
2482 int i, ret;
2483 struct purgatory_info *pi = &image->purgatory_info;
2484 Elf_Shdr *sechdrs = pi->sechdrs;
2485
2486 /* Apply relocations */
2487 for (i = 0; i < pi->ehdr->e_shnum; i++) {
2488 Elf_Shdr *section, *symtab;
2489
2490 if (sechdrs[i].sh_type != SHT_RELA &&
2491 sechdrs[i].sh_type != SHT_REL)
2492 continue;
2493
2494 /*
2495 * For section of type SHT_RELA/SHT_REL,
2496 * ->sh_link contains section header index of associated
2497 * symbol table. And ->sh_info contains section header
2498 * index of section to which relocations apply.
2499 */
2500 if (sechdrs[i].sh_info >= pi->ehdr->e_shnum ||
2501 sechdrs[i].sh_link >= pi->ehdr->e_shnum)
2502 return -ENOEXEC;
2503
2504 section = &sechdrs[sechdrs[i].sh_info];
2505 symtab = &sechdrs[sechdrs[i].sh_link];
2506
2507 if (!(section->sh_flags & SHF_ALLOC))
2508 continue;
2509
2510 /*
2511 * symtab->sh_link contain section header index of associated
2512 * string table.
2513 */
2514 if (symtab->sh_link >= pi->ehdr->e_shnum)
2515 /* Invalid section number? */
2516 continue;
2517
2518 /*
2519 * Respective archicture needs to provide support for applying
2520 * relocations of type SHT_RELA/SHT_REL.
2521 */
2522 if (sechdrs[i].sh_type == SHT_RELA)
2523 ret = arch_kexec_apply_relocations_add(pi->ehdr,
2524 sechdrs, i);
2525 else if (sechdrs[i].sh_type == SHT_REL)
2526 ret = arch_kexec_apply_relocations(pi->ehdr,
2527 sechdrs, i);
2528 if (ret)
2529 return ret;
2530 }
2531
2532 return 0;
2533}
2534
2535/* Load relocatable purgatory object and relocate it appropriately */
2536int kexec_load_purgatory(struct kimage *image, unsigned long min,
2537 unsigned long max, int top_down,
2538 unsigned long *load_addr)
2539{
2540 struct purgatory_info *pi = &image->purgatory_info;
2541 int ret;
2542
2543 if (kexec_purgatory_size <= 0)
2544 return -EINVAL;
2545
2546 if (kexec_purgatory_size < sizeof(Elf_Ehdr))
2547 return -ENOEXEC;
2548
2549 pi->ehdr = (Elf_Ehdr *)kexec_purgatory;
2550
2551 if (memcmp(pi->ehdr->e_ident, ELFMAG, SELFMAG) != 0
2552 || pi->ehdr->e_type != ET_REL
2553 || !elf_check_arch(pi->ehdr)
2554 || pi->ehdr->e_shentsize != sizeof(Elf_Shdr))
2555 return -ENOEXEC;
2556
2557 if (pi->ehdr->e_shoff >= kexec_purgatory_size
2558 || (pi->ehdr->e_shnum * sizeof(Elf_Shdr) >
2559 kexec_purgatory_size - pi->ehdr->e_shoff))
2560 return -ENOEXEC;
2561
2562 ret = __kexec_load_purgatory(image, min, max, top_down);
2563 if (ret)
2564 return ret;
2565
2566 ret = kexec_apply_relocations(image);
2567 if (ret)
2568 goto out;
2569
2570 *load_addr = pi->purgatory_load_addr;
2571 return 0;
2572out:
2573 vfree(pi->sechdrs);
2574 vfree(pi->purgatory_buf);
2575 return ret;
2576}
2577
2578static Elf_Sym *kexec_purgatory_find_symbol(struct purgatory_info *pi,
2579 const char *name)
2580{
2581 Elf_Sym *syms;
2582 Elf_Shdr *sechdrs;
2583 Elf_Ehdr *ehdr;
2584 int i, k;
2585 const char *strtab;
2586
2587 if (!pi->sechdrs || !pi->ehdr)
2588 return NULL;
2589
2590 sechdrs = pi->sechdrs;
2591 ehdr = pi->ehdr;
2592
2593 for (i = 0; i < ehdr->e_shnum; i++) {
2594 if (sechdrs[i].sh_type != SHT_SYMTAB)
2595 continue;
2596
2597 if (sechdrs[i].sh_link >= ehdr->e_shnum)
2598 /* Invalid strtab section number */
2599 continue;
2600 strtab = (char *)sechdrs[sechdrs[i].sh_link].sh_offset;
2601 syms = (Elf_Sym *)sechdrs[i].sh_offset;
2602
2603 /* Go through symbols for a match */
2604 for (k = 0; k < sechdrs[i].sh_size/sizeof(Elf_Sym); k++) {
2605 if (ELF_ST_BIND(syms[k].st_info) != STB_GLOBAL)
2606 continue;
2607
2608 if (strcmp(strtab + syms[k].st_name, name) != 0)
2609 continue;
2610
2611 if (syms[k].st_shndx == SHN_UNDEF ||
2612 syms[k].st_shndx >= ehdr->e_shnum) {
2613 pr_debug("Symbol: %s has bad section index %d.\n",
2614 name, syms[k].st_shndx);
2615 return NULL;
2616 }
2617
2618 /* Found the symbol we are looking for */
2619 return &syms[k];
2620 }
2621 }
2622
2623 return NULL;
2624}
2625
2626void *kexec_purgatory_get_symbol_addr(struct kimage *image, const char *name)
2627{
2628 struct purgatory_info *pi = &image->purgatory_info;
2629 Elf_Sym *sym;
2630 Elf_Shdr *sechdr;
2631
2632 sym = kexec_purgatory_find_symbol(pi, name);
2633 if (!sym)
2634 return ERR_PTR(-EINVAL);
2635
2636 sechdr = &pi->sechdrs[sym->st_shndx];
2637
2638 /*
2639 * Returns the address where symbol will finally be loaded after
2640 * kexec_load_segment()
2641 */
2642 return (void *)(sechdr->sh_addr + sym->st_value);
2643}
2644
2645/*
2646 * Get or set value of a symbol. If "get_value" is true, symbol value is
2647 * returned in buf otherwise symbol value is set based on value in buf.
2648 */
2649int kexec_purgatory_get_set_symbol(struct kimage *image, const char *name,
2650 void *buf, unsigned int size, bool get_value)
2651{
2652 Elf_Sym *sym;
2653 Elf_Shdr *sechdrs;
2654 struct purgatory_info *pi = &image->purgatory_info;
2655 char *sym_buf;
2656
2657 sym = kexec_purgatory_find_symbol(pi, name);
2658 if (!sym)
2659 return -EINVAL;
2660
2661 if (sym->st_size != size) {
2662 pr_err("symbol %s size mismatch: expected %lu actual %u\n",
2663 name, (unsigned long)sym->st_size, size);
2664 return -EINVAL;
2665 }
2666
2667 sechdrs = pi->sechdrs;
2668
2669 if (sechdrs[sym->st_shndx].sh_type == SHT_NOBITS) {
2670 pr_err("symbol %s is in a bss section. Cannot %s\n", name,
2671 get_value ? "get" : "set");
2672 return -EINVAL;
2673 }
2674
2675 sym_buf = (unsigned char *)sechdrs[sym->st_shndx].sh_offset +
2676 sym->st_value;
2677
2678 if (get_value)
2679 memcpy((void *)buf, sym_buf, size);
2680 else
2681 memcpy((void *)sym_buf, buf, size);
2682
2683 return 0;
2684}
2685
1635/* 2686/*
1636 * Move into place and start executing a preloaded standalone 2687 * Move into place and start executing a preloaded standalone
1637 * executable. If nothing was preloaded return an error. 2688 * executable. If nothing was preloaded return an error.
diff --git a/kernel/panic.c b/kernel/panic.c
index 62e16cef9cc2..d09dc5c32c67 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -224,6 +224,7 @@ static const struct tnt tnts[] = {
224 { TAINT_FIRMWARE_WORKAROUND, 'I', ' ' }, 224 { TAINT_FIRMWARE_WORKAROUND, 'I', ' ' },
225 { TAINT_OOT_MODULE, 'O', ' ' }, 225 { TAINT_OOT_MODULE, 'O', ' ' },
226 { TAINT_UNSIGNED_MODULE, 'E', ' ' }, 226 { TAINT_UNSIGNED_MODULE, 'E', ' ' },
227 { TAINT_SOFTLOCKUP, 'L', ' ' },
227}; 228};
228 229
229/** 230/**
diff --git a/kernel/resource.c b/kernel/resource.c
index 3c2237ac32db..da14b8d09296 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -59,10 +59,12 @@ static DEFINE_RWLOCK(resource_lock);
59static struct resource *bootmem_resource_free; 59static struct resource *bootmem_resource_free;
60static DEFINE_SPINLOCK(bootmem_resource_lock); 60static DEFINE_SPINLOCK(bootmem_resource_lock);
61 61
62static void *r_next(struct seq_file *m, void *v, loff_t *pos) 62static struct resource *next_resource(struct resource *p, bool sibling_only)
63{ 63{
64 struct resource *p = v; 64 /* Caller wants to traverse through siblings only */
65 (*pos)++; 65 if (sibling_only)
66 return p->sibling;
67
66 if (p->child) 68 if (p->child)
67 return p->child; 69 return p->child;
68 while (!p->sibling && p->parent) 70 while (!p->sibling && p->parent)
@@ -70,6 +72,13 @@ static void *r_next(struct seq_file *m, void *v, loff_t *pos)
70 return p->sibling; 72 return p->sibling;
71} 73}
72 74
75static void *r_next(struct seq_file *m, void *v, loff_t *pos)
76{
77 struct resource *p = v;
78 (*pos)++;
79 return (void *)next_resource(p, false);
80}
81
73#ifdef CONFIG_PROC_FS 82#ifdef CONFIG_PROC_FS
74 83
75enum { MAX_IORES_LEVEL = 5 }; 84enum { MAX_IORES_LEVEL = 5 };
@@ -322,16 +331,19 @@ int release_resource(struct resource *old)
322 331
323EXPORT_SYMBOL(release_resource); 332EXPORT_SYMBOL(release_resource);
324 333
325#if !defined(CONFIG_ARCH_HAS_WALK_MEMORY)
326/* 334/*
327 * Finds the lowest memory reosurce exists within [res->start.res->end) 335 * Finds the lowest iomem reosurce exists with-in [res->start.res->end)
328 * the caller must specify res->start, res->end, res->flags and "name". 336 * the caller must specify res->start, res->end, res->flags and "name".
329 * If found, returns 0, res is overwritten, if not found, returns -1. 337 * If found, returns 0, res is overwritten, if not found, returns -1.
338 * This walks through whole tree and not just first level children
339 * until and unless first_level_children_only is true.
330 */ 340 */
331static int find_next_system_ram(struct resource *res, char *name) 341static int find_next_iomem_res(struct resource *res, char *name,
342 bool first_level_children_only)
332{ 343{
333 resource_size_t start, end; 344 resource_size_t start, end;
334 struct resource *p; 345 struct resource *p;
346 bool sibling_only = false;
335 347
336 BUG_ON(!res); 348 BUG_ON(!res);
337 349
@@ -340,8 +352,14 @@ static int find_next_system_ram(struct resource *res, char *name)
340 BUG_ON(start >= end); 352 BUG_ON(start >= end);
341 353
342 read_lock(&resource_lock); 354 read_lock(&resource_lock);
343 for (p = iomem_resource.child; p ; p = p->sibling) { 355
344 /* system ram is just marked as IORESOURCE_MEM */ 356 if (first_level_children_only) {
357 p = iomem_resource.child;
358 sibling_only = true;
359 } else
360 p = &iomem_resource;
361
362 while ((p = next_resource(p, sibling_only))) {
345 if (p->flags != res->flags) 363 if (p->flags != res->flags)
346 continue; 364 continue;
347 if (name && strcmp(p->name, name)) 365 if (name && strcmp(p->name, name))
@@ -353,6 +371,7 @@ static int find_next_system_ram(struct resource *res, char *name)
353 if ((p->end >= start) && (p->start < end)) 371 if ((p->end >= start) && (p->start < end))
354 break; 372 break;
355 } 373 }
374
356 read_unlock(&resource_lock); 375 read_unlock(&resource_lock);
357 if (!p) 376 if (!p)
358 return -1; 377 return -1;
@@ -365,6 +384,70 @@ static int find_next_system_ram(struct resource *res, char *name)
365} 384}
366 385
367/* 386/*
387 * Walks through iomem resources and calls func() with matching resource
388 * ranges. This walks through whole tree and not just first level children.
389 * All the memory ranges which overlap start,end and also match flags and
390 * name are valid candidates.
391 *
392 * @name: name of resource
393 * @flags: resource flags
394 * @start: start addr
395 * @end: end addr
396 */
397int walk_iomem_res(char *name, unsigned long flags, u64 start, u64 end,
398 void *arg, int (*func)(u64, u64, void *))
399{
400 struct resource res;
401 u64 orig_end;
402 int ret = -1;
403
404 res.start = start;
405 res.end = end;
406 res.flags = flags;
407 orig_end = res.end;
408 while ((res.start < res.end) &&
409 (!find_next_iomem_res(&res, name, false))) {
410 ret = (*func)(res.start, res.end, arg);
411 if (ret)
412 break;
413 res.start = res.end + 1;
414 res.end = orig_end;
415 }
416 return ret;
417}
418
419/*
420 * This function calls callback against all memory range of "System RAM"
421 * which are marked as IORESOURCE_MEM and IORESOUCE_BUSY.
422 * Now, this function is only for "System RAM". This function deals with
423 * full ranges and not pfn. If resources are not pfn aligned, dealing
424 * with pfn can truncate ranges.
425 */
426int walk_system_ram_res(u64 start, u64 end, void *arg,
427 int (*func)(u64, u64, void *))
428{
429 struct resource res;
430 u64 orig_end;
431 int ret = -1;
432
433 res.start = start;
434 res.end = end;
435 res.flags = IORESOURCE_MEM | IORESOURCE_BUSY;
436 orig_end = res.end;
437 while ((res.start < res.end) &&
438 (!find_next_iomem_res(&res, "System RAM", true))) {
439 ret = (*func)(res.start, res.end, arg);
440 if (ret)
441 break;
442 res.start = res.end + 1;
443 res.end = orig_end;
444 }
445 return ret;
446}
447
448#if !defined(CONFIG_ARCH_HAS_WALK_MEMORY)
449
450/*
368 * This function calls callback against all memory range of "System RAM" 451 * This function calls callback against all memory range of "System RAM"
369 * which are marked as IORESOURCE_MEM and IORESOUCE_BUSY. 452 * which are marked as IORESOURCE_MEM and IORESOUCE_BUSY.
370 * Now, this function is only for "System RAM". 453 * Now, this function is only for "System RAM".
@@ -382,7 +465,7 @@ int walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages,
382 res.flags = IORESOURCE_MEM | IORESOURCE_BUSY; 465 res.flags = IORESOURCE_MEM | IORESOURCE_BUSY;
383 orig_end = res.end; 466 orig_end = res.end;
384 while ((res.start < res.end) && 467 while ((res.start < res.end) &&
385 (find_next_system_ram(&res, "System RAM") >= 0)) { 468 (find_next_iomem_res(&res, "System RAM", true) >= 0)) {
386 pfn = (res.start + PAGE_SIZE - 1) >> PAGE_SHIFT; 469 pfn = (res.start + PAGE_SIZE - 1) >> PAGE_SHIFT;
387 end_pfn = (res.end + 1) >> PAGE_SHIFT; 470 end_pfn = (res.end + 1) >> PAGE_SHIFT;
388 if (end_pfn > pfn) 471 if (end_pfn > pfn)
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 2904a2105914..391d4ddb6f4b 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -25,6 +25,7 @@ cond_syscall(sys_swapon);
25cond_syscall(sys_swapoff); 25cond_syscall(sys_swapoff);
26cond_syscall(sys_kexec_load); 26cond_syscall(sys_kexec_load);
27cond_syscall(compat_sys_kexec_load); 27cond_syscall(compat_sys_kexec_load);
28cond_syscall(sys_kexec_file_load);
28cond_syscall(sys_init_module); 29cond_syscall(sys_init_module);
29cond_syscall(sys_finit_module); 30cond_syscall(sys_finit_module);
30cond_syscall(sys_delete_module); 31cond_syscall(sys_delete_module);
@@ -197,6 +198,7 @@ cond_syscall(compat_sys_timerfd_settime);
197cond_syscall(compat_sys_timerfd_gettime); 198cond_syscall(compat_sys_timerfd_gettime);
198cond_syscall(sys_eventfd); 199cond_syscall(sys_eventfd);
199cond_syscall(sys_eventfd2); 200cond_syscall(sys_eventfd2);
201cond_syscall(sys_memfd_create);
200 202
201/* performance counters: */ 203/* performance counters: */
202cond_syscall(sys_perf_event_open); 204cond_syscall(sys_perf_event_open);
diff --git a/kernel/test_kprobes.c b/kernel/test_kprobes.c
index 12d6ebbfdd83..0dbab6d1acb4 100644
--- a/kernel/test_kprobes.c
+++ b/kernel/test_kprobes.c
@@ -14,6 +14,8 @@
14 * the GNU General Public License for more details. 14 * the GNU General Public License for more details.
15 */ 15 */
16 16
17#define pr_fmt(fmt) "Kprobe smoke test: " fmt
18
17#include <linux/kernel.h> 19#include <linux/kernel.h>
18#include <linux/kprobes.h> 20#include <linux/kprobes.h>
19#include <linux/random.h> 21#include <linux/random.h>
@@ -41,8 +43,7 @@ static void kp_post_handler(struct kprobe *p, struct pt_regs *regs,
41{ 43{
42 if (preh_val != (rand1 / div_factor)) { 44 if (preh_val != (rand1 / div_factor)) {
43 handler_errors++; 45 handler_errors++;
44 printk(KERN_ERR "Kprobe smoke test failed: " 46 pr_err("incorrect value in post_handler\n");
45 "incorrect value in post_handler\n");
46 } 47 }
47 posth_val = preh_val + div_factor; 48 posth_val = preh_val + div_factor;
48} 49}
@@ -59,8 +60,7 @@ static int test_kprobe(void)
59 60
60 ret = register_kprobe(&kp); 61 ret = register_kprobe(&kp);
61 if (ret < 0) { 62 if (ret < 0) {
62 printk(KERN_ERR "Kprobe smoke test failed: " 63 pr_err("register_kprobe returned %d\n", ret);
63 "register_kprobe returned %d\n", ret);
64 return ret; 64 return ret;
65 } 65 }
66 66
@@ -68,14 +68,12 @@ static int test_kprobe(void)
68 unregister_kprobe(&kp); 68 unregister_kprobe(&kp);
69 69
70 if (preh_val == 0) { 70 if (preh_val == 0) {
71 printk(KERN_ERR "Kprobe smoke test failed: " 71 pr_err("kprobe pre_handler not called\n");
72 "kprobe pre_handler not called\n");
73 handler_errors++; 72 handler_errors++;
74 } 73 }
75 74
76 if (posth_val == 0) { 75 if (posth_val == 0) {
77 printk(KERN_ERR "Kprobe smoke test failed: " 76 pr_err("kprobe post_handler not called\n");
78 "kprobe post_handler not called\n");
79 handler_errors++; 77 handler_errors++;
80 } 78 }
81 79
@@ -98,8 +96,7 @@ static void kp_post_handler2(struct kprobe *p, struct pt_regs *regs,
98{ 96{
99 if (preh_val != (rand1 / div_factor) + 1) { 97 if (preh_val != (rand1 / div_factor) + 1) {
100 handler_errors++; 98 handler_errors++;
101 printk(KERN_ERR "Kprobe smoke test failed: " 99 pr_err("incorrect value in post_handler2\n");
102 "incorrect value in post_handler2\n");
103 } 100 }
104 posth_val = preh_val + div_factor; 101 posth_val = preh_val + div_factor;
105} 102}
@@ -120,8 +117,7 @@ static int test_kprobes(void)
120 kp.flags = 0; 117 kp.flags = 0;
121 ret = register_kprobes(kps, 2); 118 ret = register_kprobes(kps, 2);
122 if (ret < 0) { 119 if (ret < 0) {
123 printk(KERN_ERR "Kprobe smoke test failed: " 120 pr_err("register_kprobes returned %d\n", ret);
124 "register_kprobes returned %d\n", ret);
125 return ret; 121 return ret;
126 } 122 }
127 123
@@ -130,14 +126,12 @@ static int test_kprobes(void)
130 ret = target(rand1); 126 ret = target(rand1);
131 127
132 if (preh_val == 0) { 128 if (preh_val == 0) {
133 printk(KERN_ERR "Kprobe smoke test failed: " 129 pr_err("kprobe pre_handler not called\n");
134 "kprobe pre_handler not called\n");
135 handler_errors++; 130 handler_errors++;
136 } 131 }
137 132
138 if (posth_val == 0) { 133 if (posth_val == 0) {
139 printk(KERN_ERR "Kprobe smoke test failed: " 134 pr_err("kprobe post_handler not called\n");
140 "kprobe post_handler not called\n");
141 handler_errors++; 135 handler_errors++;
142 } 136 }
143 137
@@ -146,14 +140,12 @@ static int test_kprobes(void)
146 ret = target2(rand1); 140 ret = target2(rand1);
147 141
148 if (preh_val == 0) { 142 if (preh_val == 0) {
149 printk(KERN_ERR "Kprobe smoke test failed: " 143 pr_err("kprobe pre_handler2 not called\n");
150 "kprobe pre_handler2 not called\n");
151 handler_errors++; 144 handler_errors++;
152 } 145 }
153 146
154 if (posth_val == 0) { 147 if (posth_val == 0) {
155 printk(KERN_ERR "Kprobe smoke test failed: " 148 pr_err("kprobe post_handler2 not called\n");
156 "kprobe post_handler2 not called\n");
157 handler_errors++; 149 handler_errors++;
158 } 150 }
159 151
@@ -166,8 +158,7 @@ static u32 j_kprobe_target(u32 value)
166{ 158{
167 if (value != rand1) { 159 if (value != rand1) {
168 handler_errors++; 160 handler_errors++;
169 printk(KERN_ERR "Kprobe smoke test failed: " 161 pr_err("incorrect value in jprobe handler\n");
170 "incorrect value in jprobe handler\n");
171 } 162 }
172 163
173 jph_val = rand1; 164 jph_val = rand1;
@@ -186,16 +177,14 @@ static int test_jprobe(void)
186 177
187 ret = register_jprobe(&jp); 178 ret = register_jprobe(&jp);
188 if (ret < 0) { 179 if (ret < 0) {
189 printk(KERN_ERR "Kprobe smoke test failed: " 180 pr_err("register_jprobe returned %d\n", ret);
190 "register_jprobe returned %d\n", ret);
191 return ret; 181 return ret;
192 } 182 }
193 183
194 ret = target(rand1); 184 ret = target(rand1);
195 unregister_jprobe(&jp); 185 unregister_jprobe(&jp);
196 if (jph_val == 0) { 186 if (jph_val == 0) {
197 printk(KERN_ERR "Kprobe smoke test failed: " 187 pr_err("jprobe handler not called\n");
198 "jprobe handler not called\n");
199 handler_errors++; 188 handler_errors++;
200 } 189 }
201 190
@@ -217,24 +206,21 @@ static int test_jprobes(void)
217 jp.kp.flags = 0; 206 jp.kp.flags = 0;
218 ret = register_jprobes(jps, 2); 207 ret = register_jprobes(jps, 2);
219 if (ret < 0) { 208 if (ret < 0) {
220 printk(KERN_ERR "Kprobe smoke test failed: " 209 pr_err("register_jprobes returned %d\n", ret);
221 "register_jprobes returned %d\n", ret);
222 return ret; 210 return ret;
223 } 211 }
224 212
225 jph_val = 0; 213 jph_val = 0;
226 ret = target(rand1); 214 ret = target(rand1);
227 if (jph_val == 0) { 215 if (jph_val == 0) {
228 printk(KERN_ERR "Kprobe smoke test failed: " 216 pr_err("jprobe handler not called\n");
229 "jprobe handler not called\n");
230 handler_errors++; 217 handler_errors++;
231 } 218 }
232 219
233 jph_val = 0; 220 jph_val = 0;
234 ret = target2(rand1); 221 ret = target2(rand1);
235 if (jph_val == 0) { 222 if (jph_val == 0) {
236 printk(KERN_ERR "Kprobe smoke test failed: " 223 pr_err("jprobe handler2 not called\n");
237 "jprobe handler2 not called\n");
238 handler_errors++; 224 handler_errors++;
239 } 225 }
240 unregister_jprobes(jps, 2); 226 unregister_jprobes(jps, 2);
@@ -256,13 +242,11 @@ static int return_handler(struct kretprobe_instance *ri, struct pt_regs *regs)
256 242
257 if (ret != (rand1 / div_factor)) { 243 if (ret != (rand1 / div_factor)) {
258 handler_errors++; 244 handler_errors++;
259 printk(KERN_ERR "Kprobe smoke test failed: " 245 pr_err("incorrect value in kretprobe handler\n");
260 "incorrect value in kretprobe handler\n");
261 } 246 }
262 if (krph_val == 0) { 247 if (krph_val == 0) {
263 handler_errors++; 248 handler_errors++;
264 printk(KERN_ERR "Kprobe smoke test failed: " 249 pr_err("call to kretprobe entry handler failed\n");
265 "call to kretprobe entry handler failed\n");
266 } 250 }
267 251
268 krph_val = rand1; 252 krph_val = rand1;
@@ -281,16 +265,14 @@ static int test_kretprobe(void)
281 265
282 ret = register_kretprobe(&rp); 266 ret = register_kretprobe(&rp);
283 if (ret < 0) { 267 if (ret < 0) {
284 printk(KERN_ERR "Kprobe smoke test failed: " 268 pr_err("register_kretprobe returned %d\n", ret);
285 "register_kretprobe returned %d\n", ret);
286 return ret; 269 return ret;
287 } 270 }
288 271
289 ret = target(rand1); 272 ret = target(rand1);
290 unregister_kretprobe(&rp); 273 unregister_kretprobe(&rp);
291 if (krph_val != rand1) { 274 if (krph_val != rand1) {
292 printk(KERN_ERR "Kprobe smoke test failed: " 275 pr_err("kretprobe handler not called\n");
293 "kretprobe handler not called\n");
294 handler_errors++; 276 handler_errors++;
295 } 277 }
296 278
@@ -303,13 +285,11 @@ static int return_handler2(struct kretprobe_instance *ri, struct pt_regs *regs)
303 285
304 if (ret != (rand1 / div_factor) + 1) { 286 if (ret != (rand1 / div_factor) + 1) {
305 handler_errors++; 287 handler_errors++;
306 printk(KERN_ERR "Kprobe smoke test failed: " 288 pr_err("incorrect value in kretprobe handler2\n");
307 "incorrect value in kretprobe handler2\n");
308 } 289 }
309 if (krph_val == 0) { 290 if (krph_val == 0) {
310 handler_errors++; 291 handler_errors++;
311 printk(KERN_ERR "Kprobe smoke test failed: " 292 pr_err("call to kretprobe entry handler failed\n");
312 "call to kretprobe entry handler failed\n");
313 } 293 }
314 294
315 krph_val = rand1; 295 krph_val = rand1;
@@ -332,24 +312,21 @@ static int test_kretprobes(void)
332 rp.kp.flags = 0; 312 rp.kp.flags = 0;
333 ret = register_kretprobes(rps, 2); 313 ret = register_kretprobes(rps, 2);
334 if (ret < 0) { 314 if (ret < 0) {
335 printk(KERN_ERR "Kprobe smoke test failed: " 315 pr_err("register_kretprobe returned %d\n", ret);
336 "register_kretprobe returned %d\n", ret);
337 return ret; 316 return ret;
338 } 317 }
339 318
340 krph_val = 0; 319 krph_val = 0;
341 ret = target(rand1); 320 ret = target(rand1);
342 if (krph_val != rand1) { 321 if (krph_val != rand1) {
343 printk(KERN_ERR "Kprobe smoke test failed: " 322 pr_err("kretprobe handler not called\n");
344 "kretprobe handler not called\n");
345 handler_errors++; 323 handler_errors++;
346 } 324 }
347 325
348 krph_val = 0; 326 krph_val = 0;
349 ret = target2(rand1); 327 ret = target2(rand1);
350 if (krph_val != rand1) { 328 if (krph_val != rand1) {
351 printk(KERN_ERR "Kprobe smoke test failed: " 329 pr_err("kretprobe handler2 not called\n");
352 "kretprobe handler2 not called\n");
353 handler_errors++; 330 handler_errors++;
354 } 331 }
355 unregister_kretprobes(rps, 2); 332 unregister_kretprobes(rps, 2);
@@ -368,7 +345,7 @@ int init_test_probes(void)
368 rand1 = prandom_u32(); 345 rand1 = prandom_u32();
369 } while (rand1 <= div_factor); 346 } while (rand1 <= div_factor);
370 347
371 printk(KERN_INFO "Kprobe smoke test started\n"); 348 pr_info("started\n");
372 num_tests++; 349 num_tests++;
373 ret = test_kprobe(); 350 ret = test_kprobe();
374 if (ret < 0) 351 if (ret < 0)
@@ -402,13 +379,11 @@ int init_test_probes(void)
402#endif /* CONFIG_KRETPROBES */ 379#endif /* CONFIG_KRETPROBES */
403 380
404 if (errors) 381 if (errors)
405 printk(KERN_ERR "BUG: Kprobe smoke test: %d out of " 382 pr_err("BUG: %d out of %d tests failed\n", errors, num_tests);
406 "%d tests failed\n", errors, num_tests);
407 else if (handler_errors) 383 else if (handler_errors)
408 printk(KERN_ERR "BUG: Kprobe smoke test: %d error(s) " 384 pr_err("BUG: %d error(s) running handlers\n", handler_errors);
409 "running handlers\n", handler_errors);
410 else 385 else
411 printk(KERN_INFO "Kprobe smoke test passed successfully\n"); 386 pr_info("passed successfully\n");
412 387
413 return 0; 388 return 0;
414} 389}
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index fcc02560fd6b..aa312b0dc3ec 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -526,21 +526,21 @@ static void m_stop(struct seq_file *seq, void *v)
526 return; 526 return;
527} 527}
528 528
529struct seq_operations proc_uid_seq_operations = { 529const struct seq_operations proc_uid_seq_operations = {
530 .start = uid_m_start, 530 .start = uid_m_start,
531 .stop = m_stop, 531 .stop = m_stop,
532 .next = m_next, 532 .next = m_next,
533 .show = uid_m_show, 533 .show = uid_m_show,
534}; 534};
535 535
536struct seq_operations proc_gid_seq_operations = { 536const struct seq_operations proc_gid_seq_operations = {
537 .start = gid_m_start, 537 .start = gid_m_start,
538 .stop = m_stop, 538 .stop = m_stop,
539 .next = m_next, 539 .next = m_next,
540 .show = gid_m_show, 540 .show = gid_m_show,
541}; 541};
542 542
543struct seq_operations proc_projid_seq_operations = { 543const struct seq_operations proc_projid_seq_operations = {
544 .start = projid_m_start, 544 .start = projid_m_start,
545 .stop = m_stop, 545 .stop = m_stop,
546 .next = m_next, 546 .next = m_next,
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 51b29e9d2ba6..a8d6914030fe 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -368,6 +368,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
368 smp_mb__after_atomic(); 368 smp_mb__after_atomic();
369 } 369 }
370 370
371 add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
371 if (softlockup_panic) 372 if (softlockup_panic)
372 panic("softlockup: hung tasks"); 373 panic("softlockup: hung tasks");
373 __this_cpu_write(soft_watchdog_warn, true); 374 __this_cpu_write(soft_watchdog_warn, true);