aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile6
-rw-r--r--kernel/cgroup.c41
-rw-r--r--kernel/events/uprobes.c345
-rw-r--r--kernel/module-internal.h3
-rw-r--r--kernel/module.c53
-rw-r--r--kernel/module_signing.c24
-rw-r--r--kernel/pid_namespace.c33
-rw-r--r--kernel/printk.c1
-rw-r--r--kernel/sys.c12
-rw-r--r--kernel/trace/ring_buffer.c4
-rw-r--r--kernel/workqueue.c2
11 files changed, 252 insertions, 272 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index 0dfeca4324e..86e3285ae7e 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -174,10 +174,8 @@ signing_key.priv signing_key.x509: x509.genkey
174 @echo "###" 174 @echo "###"
175 @echo "### If this takes a long time, you might wish to run rngd in the" 175 @echo "### If this takes a long time, you might wish to run rngd in the"
176 @echo "### background to keep the supply of entropy topped up. It" 176 @echo "### background to keep the supply of entropy topped up. It"
177 @echo "### needs to be run as root, and should use a hardware random" 177 @echo "### needs to be run as root, and uses a hardware random"
178 @echo "### number generator if one is available, eg:" 178 @echo "### number generator if one is available."
179 @echo "###"
180 @echo "### rngd -r /dev/hwrandom"
181 @echo "###" 179 @echo "###"
182 openssl req -new -nodes -utf8 $(sign_key_with_hash) -days 36500 -batch \ 180 openssl req -new -nodes -utf8 $(sign_key_with_hash) -days 36500 -batch \
183 -x509 -config x509.genkey \ 181 -x509 -config x509.genkey \
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 13774b3b39a..f24f724620d 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1962,9 +1962,8 @@ static void cgroup_task_migrate(struct cgroup *cgrp, struct cgroup *oldcgrp,
1962 * trading it for newcg is protected by cgroup_mutex, we're safe to drop 1962 * trading it for newcg is protected by cgroup_mutex, we're safe to drop
1963 * it here; it will be freed under RCU. 1963 * it here; it will be freed under RCU.
1964 */ 1964 */
1965 put_css_set(oldcg);
1966
1967 set_bit(CGRP_RELEASABLE, &oldcgrp->flags); 1965 set_bit(CGRP_RELEASABLE, &oldcgrp->flags);
1966 put_css_set(oldcg);
1968} 1967}
1969 1968
1970/** 1969/**
@@ -4815,31 +4814,20 @@ static const struct file_operations proc_cgroupstats_operations = {
4815 * 4814 *
4816 * A pointer to the shared css_set was automatically copied in 4815 * A pointer to the shared css_set was automatically copied in
4817 * fork.c by dup_task_struct(). However, we ignore that copy, since 4816 * fork.c by dup_task_struct(). However, we ignore that copy, since
4818 * it was not made under the protection of RCU, cgroup_mutex or 4817 * it was not made under the protection of RCU or cgroup_mutex, so
4819 * threadgroup_change_begin(), so it might no longer be a valid 4818 * might no longer be a valid cgroup pointer. cgroup_attach_task() might
4820 * cgroup pointer. cgroup_attach_task() might have already changed 4819 * have already changed current->cgroups, allowing the previously
4821 * current->cgroups, allowing the previously referenced cgroup 4820 * referenced cgroup group to be removed and freed.
4822 * group to be removed and freed.
4823 *
4824 * Outside the pointer validity we also need to process the css_set
4825 * inheritance between threadgoup_change_begin() and
4826 * threadgoup_change_end(), this way there is no leak in any process
4827 * wide migration performed by cgroup_attach_proc() that could otherwise
4828 * miss a thread because it is too early or too late in the fork stage.
4829 * 4821 *
4830 * At the point that cgroup_fork() is called, 'current' is the parent 4822 * At the point that cgroup_fork() is called, 'current' is the parent
4831 * task, and the passed argument 'child' points to the child task. 4823 * task, and the passed argument 'child' points to the child task.
4832 */ 4824 */
4833void cgroup_fork(struct task_struct *child) 4825void cgroup_fork(struct task_struct *child)
4834{ 4826{
4835 /* 4827 task_lock(current);
4836 * We don't need to task_lock() current because current->cgroups
4837 * can't be changed concurrently here. The parent obviously hasn't
4838 * exited and called cgroup_exit(), and we are synchronized against
4839 * cgroup migration through threadgroup_change_begin().
4840 */
4841 child->cgroups = current->cgroups; 4828 child->cgroups = current->cgroups;
4842 get_css_set(child->cgroups); 4829 get_css_set(child->cgroups);
4830 task_unlock(current);
4843 INIT_LIST_HEAD(&child->cg_list); 4831 INIT_LIST_HEAD(&child->cg_list);
4844} 4832}
4845 4833
@@ -4895,19 +4883,10 @@ void cgroup_post_fork(struct task_struct *child)
4895 */ 4883 */
4896 if (use_task_css_set_links) { 4884 if (use_task_css_set_links) {
4897 write_lock(&css_set_lock); 4885 write_lock(&css_set_lock);
4898 if (list_empty(&child->cg_list)) { 4886 task_lock(child);
4899 /* 4887 if (list_empty(&child->cg_list))
4900 * It's safe to use child->cgroups without task_lock()
4901 * here because we are protected through
4902 * threadgroup_change_begin() against concurrent
4903 * css_set change in cgroup_task_migrate(). Also
4904 * the task can't exit at that point until
4905 * wake_up_new_task() is called, so we are protected
4906 * against cgroup_exit() setting child->cgroup to
4907 * init_css_set.
4908 */
4909 list_add(&child->cg_list, &child->cgroups->tasks); 4888 list_add(&child->cg_list, &child->cgroups->tasks);
4910 } 4889 task_unlock(child);
4911 write_unlock(&css_set_lock); 4890 write_unlock(&css_set_lock);
4912 } 4891 }
4913} 4892}
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 98256bc71ee..5cc4e7e42e6 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -78,15 +78,23 @@ static struct mutex uprobes_mmap_mutex[UPROBES_HASH_SZ];
78 */ 78 */
79static atomic_t uprobe_events = ATOMIC_INIT(0); 79static atomic_t uprobe_events = ATOMIC_INIT(0);
80 80
81/* Have a copy of original instruction */
82#define UPROBE_COPY_INSN 0
83/* Dont run handlers when first register/ last unregister in progress*/
84#define UPROBE_RUN_HANDLER 1
85/* Can skip singlestep */
86#define UPROBE_SKIP_SSTEP 2
87
81struct uprobe { 88struct uprobe {
82 struct rb_node rb_node; /* node in the rb tree */ 89 struct rb_node rb_node; /* node in the rb tree */
83 atomic_t ref; 90 atomic_t ref;
84 struct rw_semaphore consumer_rwsem; 91 struct rw_semaphore consumer_rwsem;
92 struct mutex copy_mutex; /* TODO: kill me and UPROBE_COPY_INSN */
85 struct list_head pending_list; 93 struct list_head pending_list;
86 struct uprobe_consumer *consumers; 94 struct uprobe_consumer *consumers;
87 struct inode *inode; /* Also hold a ref to inode */ 95 struct inode *inode; /* Also hold a ref to inode */
88 loff_t offset; 96 loff_t offset;
89 int flags; 97 unsigned long flags;
90 struct arch_uprobe arch; 98 struct arch_uprobe arch;
91}; 99};
92 100
@@ -100,17 +108,12 @@ struct uprobe {
100 */ 108 */
101static bool valid_vma(struct vm_area_struct *vma, bool is_register) 109static bool valid_vma(struct vm_area_struct *vma, bool is_register)
102{ 110{
103 if (!vma->vm_file) 111 vm_flags_t flags = VM_HUGETLB | VM_MAYEXEC | VM_SHARED;
104 return false;
105
106 if (!is_register)
107 return true;
108 112
109 if ((vma->vm_flags & (VM_HUGETLB|VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)) 113 if (is_register)
110 == (VM_READ|VM_EXEC)) 114 flags |= VM_WRITE;
111 return true;
112 115
113 return false; 116 return vma->vm_file && (vma->vm_flags & flags) == VM_MAYEXEC;
114} 117}
115 118
116static unsigned long offset_to_vaddr(struct vm_area_struct *vma, loff_t offset) 119static unsigned long offset_to_vaddr(struct vm_area_struct *vma, loff_t offset)
@@ -193,19 +196,44 @@ bool __weak is_swbp_insn(uprobe_opcode_t *insn)
193 return *insn == UPROBE_SWBP_INSN; 196 return *insn == UPROBE_SWBP_INSN;
194} 197}
195 198
199static void copy_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t *opcode)
200{
201 void *kaddr = kmap_atomic(page);
202 memcpy(opcode, kaddr + (vaddr & ~PAGE_MASK), UPROBE_SWBP_INSN_SIZE);
203 kunmap_atomic(kaddr);
204}
205
206static int verify_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t *new_opcode)
207{
208 uprobe_opcode_t old_opcode;
209 bool is_swbp;
210
211 copy_opcode(page, vaddr, &old_opcode);
212 is_swbp = is_swbp_insn(&old_opcode);
213
214 if (is_swbp_insn(new_opcode)) {
215 if (is_swbp) /* register: already installed? */
216 return 0;
217 } else {
218 if (!is_swbp) /* unregister: was it changed by us? */
219 return 0;
220 }
221
222 return 1;
223}
224
196/* 225/*
197 * NOTE: 226 * NOTE:
198 * Expect the breakpoint instruction to be the smallest size instruction for 227 * Expect the breakpoint instruction to be the smallest size instruction for
199 * the architecture. If an arch has variable length instruction and the 228 * the architecture. If an arch has variable length instruction and the
200 * breakpoint instruction is not of the smallest length instruction 229 * breakpoint instruction is not of the smallest length instruction
201 * supported by that architecture then we need to modify read_opcode / 230 * supported by that architecture then we need to modify is_swbp_at_addr and
202 * write_opcode accordingly. This would never be a problem for archs that 231 * write_opcode accordingly. This would never be a problem for archs that
203 * have fixed length instructions. 232 * have fixed length instructions.
204 */ 233 */
205 234
206/* 235/*
207 * write_opcode - write the opcode at a given virtual address. 236 * write_opcode - write the opcode at a given virtual address.
208 * @auprobe: arch breakpointing information.
209 * @mm: the probed process address space. 237 * @mm: the probed process address space.
210 * @vaddr: the virtual address to store the opcode. 238 * @vaddr: the virtual address to store the opcode.
211 * @opcode: opcode to be written at @vaddr. 239 * @opcode: opcode to be written at @vaddr.
@@ -216,8 +244,8 @@ bool __weak is_swbp_insn(uprobe_opcode_t *insn)
216 * For mm @mm, write the opcode at @vaddr. 244 * For mm @mm, write the opcode at @vaddr.
217 * Return 0 (success) or a negative errno. 245 * Return 0 (success) or a negative errno.
218 */ 246 */
219static int write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm, 247static int write_opcode(struct mm_struct *mm, unsigned long vaddr,
220 unsigned long vaddr, uprobe_opcode_t opcode) 248 uprobe_opcode_t opcode)
221{ 249{
222 struct page *old_page, *new_page; 250 struct page *old_page, *new_page;
223 void *vaddr_old, *vaddr_new; 251 void *vaddr_old, *vaddr_new;
@@ -226,10 +254,14 @@ static int write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm,
226 254
227retry: 255retry:
228 /* Read the page with vaddr into memory */ 256 /* Read the page with vaddr into memory */
229 ret = get_user_pages(NULL, mm, vaddr, 1, 0, 0, &old_page, &vma); 257 ret = get_user_pages(NULL, mm, vaddr, 1, 0, 1, &old_page, &vma);
230 if (ret <= 0) 258 if (ret <= 0)
231 return ret; 259 return ret;
232 260
261 ret = verify_opcode(old_page, vaddr, &opcode);
262 if (ret <= 0)
263 goto put_old;
264
233 ret = -ENOMEM; 265 ret = -ENOMEM;
234 new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vaddr); 266 new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vaddr);
235 if (!new_page) 267 if (!new_page)
@@ -264,63 +296,6 @@ put_old:
264} 296}
265 297
266/** 298/**
267 * read_opcode - read the opcode at a given virtual address.
268 * @mm: the probed process address space.
269 * @vaddr: the virtual address to read the opcode.
270 * @opcode: location to store the read opcode.
271 *
272 * Called with mm->mmap_sem held (for read and with a reference to
273 * mm.
274 *
275 * For mm @mm, read the opcode at @vaddr and store it in @opcode.
276 * Return 0 (success) or a negative errno.
277 */
278static int read_opcode(struct mm_struct *mm, unsigned long vaddr, uprobe_opcode_t *opcode)
279{
280 struct page *page;
281 void *vaddr_new;
282 int ret;
283
284 ret = get_user_pages(NULL, mm, vaddr, 1, 0, 1, &page, NULL);
285 if (ret <= 0)
286 return ret;
287
288 vaddr_new = kmap_atomic(page);
289 vaddr &= ~PAGE_MASK;
290 memcpy(opcode, vaddr_new + vaddr, UPROBE_SWBP_INSN_SIZE);
291 kunmap_atomic(vaddr_new);
292
293 put_page(page);
294
295 return 0;
296}
297
298static int is_swbp_at_addr(struct mm_struct *mm, unsigned long vaddr)
299{
300 uprobe_opcode_t opcode;
301 int result;
302
303 if (current->mm == mm) {
304 pagefault_disable();
305 result = __copy_from_user_inatomic(&opcode, (void __user*)vaddr,
306 sizeof(opcode));
307 pagefault_enable();
308
309 if (likely(result == 0))
310 goto out;
311 }
312
313 result = read_opcode(mm, vaddr, &opcode);
314 if (result)
315 return result;
316out:
317 if (is_swbp_insn(&opcode))
318 return 1;
319
320 return 0;
321}
322
323/**
324 * set_swbp - store breakpoint at a given address. 299 * set_swbp - store breakpoint at a given address.
325 * @auprobe: arch specific probepoint information. 300 * @auprobe: arch specific probepoint information.
326 * @mm: the probed process address space. 301 * @mm: the probed process address space.
@@ -331,18 +306,7 @@ out:
331 */ 306 */
332int __weak set_swbp(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr) 307int __weak set_swbp(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr)
333{ 308{
334 int result; 309 return write_opcode(mm, vaddr, UPROBE_SWBP_INSN);
335 /*
336 * See the comment near uprobes_hash().
337 */
338 result = is_swbp_at_addr(mm, vaddr);
339 if (result == 1)
340 return 0;
341
342 if (result)
343 return result;
344
345 return write_opcode(auprobe, mm, vaddr, UPROBE_SWBP_INSN);
346} 310}
347 311
348/** 312/**
@@ -357,16 +321,7 @@ int __weak set_swbp(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned
357int __weak 321int __weak
358set_orig_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr) 322set_orig_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr)
359{ 323{
360 int result; 324 return write_opcode(mm, vaddr, *(uprobe_opcode_t *)auprobe->insn);
361
362 result = is_swbp_at_addr(mm, vaddr);
363 if (!result)
364 return -EINVAL;
365
366 if (result != 1)
367 return result;
368
369 return write_opcode(auprobe, mm, vaddr, *(uprobe_opcode_t *)auprobe->insn);
370} 325}
371 326
372static int match_uprobe(struct uprobe *l, struct uprobe *r) 327static int match_uprobe(struct uprobe *l, struct uprobe *r)
@@ -473,7 +428,7 @@ static struct uprobe *insert_uprobe(struct uprobe *uprobe)
473 spin_unlock(&uprobes_treelock); 428 spin_unlock(&uprobes_treelock);
474 429
475 /* For now assume that the instruction need not be single-stepped */ 430 /* For now assume that the instruction need not be single-stepped */
476 uprobe->flags |= UPROBE_SKIP_SSTEP; 431 __set_bit(UPROBE_SKIP_SSTEP, &uprobe->flags);
477 432
478 return u; 433 return u;
479} 434}
@@ -495,6 +450,7 @@ static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset)
495 uprobe->inode = igrab(inode); 450 uprobe->inode = igrab(inode);
496 uprobe->offset = offset; 451 uprobe->offset = offset;
497 init_rwsem(&uprobe->consumer_rwsem); 452 init_rwsem(&uprobe->consumer_rwsem);
453 mutex_init(&uprobe->copy_mutex);
498 454
499 /* add to uprobes_tree, sorted on inode:offset */ 455 /* add to uprobes_tree, sorted on inode:offset */
500 cur_uprobe = insert_uprobe(uprobe); 456 cur_uprobe = insert_uprobe(uprobe);
@@ -515,7 +471,7 @@ static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs)
515{ 471{
516 struct uprobe_consumer *uc; 472 struct uprobe_consumer *uc;
517 473
518 if (!(uprobe->flags & UPROBE_RUN_HANDLER)) 474 if (!test_bit(UPROBE_RUN_HANDLER, &uprobe->flags))
519 return; 475 return;
520 476
521 down_read(&uprobe->consumer_rwsem); 477 down_read(&uprobe->consumer_rwsem);
@@ -621,29 +577,43 @@ static int copy_insn(struct uprobe *uprobe, struct file *filp)
621 return __copy_insn(mapping, filp, uprobe->arch.insn, bytes, uprobe->offset); 577 return __copy_insn(mapping, filp, uprobe->arch.insn, bytes, uprobe->offset);
622} 578}
623 579
624/* 580static int prepare_uprobe(struct uprobe *uprobe, struct file *file,
625 * How mm->uprobes_state.count gets updated 581 struct mm_struct *mm, unsigned long vaddr)
626 * uprobe_mmap() increments the count if 582{
627 * - it successfully adds a breakpoint. 583 int ret = 0;
628 * - it cannot add a breakpoint, but sees that there is a underlying 584
629 * breakpoint (via a is_swbp_at_addr()). 585 if (test_bit(UPROBE_COPY_INSN, &uprobe->flags))
630 * 586 return ret;
631 * uprobe_munmap() decrements the count if 587
632 * - it sees a underlying breakpoint, (via is_swbp_at_addr) 588 mutex_lock(&uprobe->copy_mutex);
633 * (Subsequent uprobe_unregister wouldnt find the breakpoint 589 if (test_bit(UPROBE_COPY_INSN, &uprobe->flags))
634 * unless a uprobe_mmap kicks in, since the old vma would be 590 goto out;
635 * dropped just after uprobe_munmap.) 591
636 * 592 ret = copy_insn(uprobe, file);
637 * uprobe_register increments the count if: 593 if (ret)
638 * - it successfully adds a breakpoint. 594 goto out;
639 * 595
640 * uprobe_unregister decrements the count if: 596 ret = -ENOTSUPP;
641 * - it sees a underlying breakpoint and removes successfully. 597 if (is_swbp_insn((uprobe_opcode_t *)uprobe->arch.insn))
642 * (via is_swbp_at_addr) 598 goto out;
643 * (Subsequent uprobe_munmap wouldnt find the breakpoint 599
644 * since there is no underlying breakpoint after the 600 ret = arch_uprobe_analyze_insn(&uprobe->arch, mm, vaddr);
645 * breakpoint removal.) 601 if (ret)
646 */ 602 goto out;
603
604 /* write_opcode() assumes we don't cross page boundary */
605 BUG_ON((uprobe->offset & ~PAGE_MASK) +
606 UPROBE_SWBP_INSN_SIZE > PAGE_SIZE);
607
608 smp_wmb(); /* pairs with rmb() in find_active_uprobe() */
609 set_bit(UPROBE_COPY_INSN, &uprobe->flags);
610
611 out:
612 mutex_unlock(&uprobe->copy_mutex);
613
614 return ret;
615}
616
647static int 617static int
648install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, 618install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm,
649 struct vm_area_struct *vma, unsigned long vaddr) 619 struct vm_area_struct *vma, unsigned long vaddr)
@@ -661,24 +631,9 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm,
661 if (!uprobe->consumers) 631 if (!uprobe->consumers)
662 return 0; 632 return 0;
663 633
664 if (!(uprobe->flags & UPROBE_COPY_INSN)) { 634 ret = prepare_uprobe(uprobe, vma->vm_file, mm, vaddr);
665 ret = copy_insn(uprobe, vma->vm_file); 635 if (ret)
666 if (ret) 636 return ret;
667 return ret;
668
669 if (is_swbp_insn((uprobe_opcode_t *)uprobe->arch.insn))
670 return -ENOTSUPP;
671
672 ret = arch_uprobe_analyze_insn(&uprobe->arch, mm, vaddr);
673 if (ret)
674 return ret;
675
676 /* write_opcode() assumes we don't cross page boundary */
677 BUG_ON((uprobe->offset & ~PAGE_MASK) +
678 UPROBE_SWBP_INSN_SIZE > PAGE_SIZE);
679
680 uprobe->flags |= UPROBE_COPY_INSN;
681 }
682 637
683 /* 638 /*
684 * set MMF_HAS_UPROBES in advance for uprobe_pre_sstep_notifier(), 639 * set MMF_HAS_UPROBES in advance for uprobe_pre_sstep_notifier(),
@@ -697,15 +652,15 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm,
697 return ret; 652 return ret;
698} 653}
699 654
700static void 655static int
701remove_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, unsigned long vaddr) 656remove_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, unsigned long vaddr)
702{ 657{
703 /* can happen if uprobe_register() fails */ 658 /* can happen if uprobe_register() fails */
704 if (!test_bit(MMF_HAS_UPROBES, &mm->flags)) 659 if (!test_bit(MMF_HAS_UPROBES, &mm->flags))
705 return; 660 return 0;
706 661
707 set_bit(MMF_RECALC_UPROBES, &mm->flags); 662 set_bit(MMF_RECALC_UPROBES, &mm->flags);
708 set_orig_insn(&uprobe->arch, mm, vaddr); 663 return set_orig_insn(&uprobe->arch, mm, vaddr);
709} 664}
710 665
711/* 666/*
@@ -820,7 +775,7 @@ static int register_for_each_vma(struct uprobe *uprobe, bool is_register)
820 struct mm_struct *mm = info->mm; 775 struct mm_struct *mm = info->mm;
821 struct vm_area_struct *vma; 776 struct vm_area_struct *vma;
822 777
823 if (err) 778 if (err && is_register)
824 goto free; 779 goto free;
825 780
826 down_write(&mm->mmap_sem); 781 down_write(&mm->mmap_sem);
@@ -836,7 +791,7 @@ static int register_for_each_vma(struct uprobe *uprobe, bool is_register)
836 if (is_register) 791 if (is_register)
837 err = install_breakpoint(uprobe, mm, vma, info->vaddr); 792 err = install_breakpoint(uprobe, mm, vma, info->vaddr);
838 else 793 else
839 remove_breakpoint(uprobe, mm, info->vaddr); 794 err |= remove_breakpoint(uprobe, mm, info->vaddr);
840 795
841 unlock: 796 unlock:
842 up_write(&mm->mmap_sem); 797 up_write(&mm->mmap_sem);
@@ -893,13 +848,15 @@ int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *
893 mutex_lock(uprobes_hash(inode)); 848 mutex_lock(uprobes_hash(inode));
894 uprobe = alloc_uprobe(inode, offset); 849 uprobe = alloc_uprobe(inode, offset);
895 850
896 if (uprobe && !consumer_add(uprobe, uc)) { 851 if (!uprobe) {
852 ret = -ENOMEM;
853 } else if (!consumer_add(uprobe, uc)) {
897 ret = __uprobe_register(uprobe); 854 ret = __uprobe_register(uprobe);
898 if (ret) { 855 if (ret) {
899 uprobe->consumers = NULL; 856 uprobe->consumers = NULL;
900 __uprobe_unregister(uprobe); 857 __uprobe_unregister(uprobe);
901 } else { 858 } else {
902 uprobe->flags |= UPROBE_RUN_HANDLER; 859 set_bit(UPROBE_RUN_HANDLER, &uprobe->flags);
903 } 860 }
904 } 861 }
905 862
@@ -932,7 +889,7 @@ void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consume
932 if (consumer_del(uprobe, uc)) { 889 if (consumer_del(uprobe, uc)) {
933 if (!uprobe->consumers) { 890 if (!uprobe->consumers) {
934 __uprobe_unregister(uprobe); 891 __uprobe_unregister(uprobe);
935 uprobe->flags &= ~UPROBE_RUN_HANDLER; 892 clear_bit(UPROBE_RUN_HANDLER, &uprobe->flags);
936 } 893 }
937 } 894 }
938 895
@@ -1393,10 +1350,11 @@ bool uprobe_deny_signal(void)
1393 */ 1350 */
1394static bool can_skip_sstep(struct uprobe *uprobe, struct pt_regs *regs) 1351static bool can_skip_sstep(struct uprobe *uprobe, struct pt_regs *regs)
1395{ 1352{
1396 if (arch_uprobe_skip_sstep(&uprobe->arch, regs)) 1353 if (test_bit(UPROBE_SKIP_SSTEP, &uprobe->flags)) {
1397 return true; 1354 if (arch_uprobe_skip_sstep(&uprobe->arch, regs))
1398 1355 return true;
1399 uprobe->flags &= ~UPROBE_SKIP_SSTEP; 1356 clear_bit(UPROBE_SKIP_SSTEP, &uprobe->flags);
1357 }
1400 return false; 1358 return false;
1401} 1359}
1402 1360
@@ -1419,6 +1377,30 @@ static void mmf_recalc_uprobes(struct mm_struct *mm)
1419 clear_bit(MMF_HAS_UPROBES, &mm->flags); 1377 clear_bit(MMF_HAS_UPROBES, &mm->flags);
1420} 1378}
1421 1379
1380static int is_swbp_at_addr(struct mm_struct *mm, unsigned long vaddr)
1381{
1382 struct page *page;
1383 uprobe_opcode_t opcode;
1384 int result;
1385
1386 pagefault_disable();
1387 result = __copy_from_user_inatomic(&opcode, (void __user*)vaddr,
1388 sizeof(opcode));
1389 pagefault_enable();
1390
1391 if (likely(result == 0))
1392 goto out;
1393
1394 result = get_user_pages(NULL, mm, vaddr, 1, 0, 1, &page, NULL);
1395 if (result < 0)
1396 return result;
1397
1398 copy_opcode(page, vaddr, &opcode);
1399 put_page(page);
1400 out:
1401 return is_swbp_insn(&opcode);
1402}
1403
1422static struct uprobe *find_active_uprobe(unsigned long bp_vaddr, int *is_swbp) 1404static struct uprobe *find_active_uprobe(unsigned long bp_vaddr, int *is_swbp)
1423{ 1405{
1424 struct mm_struct *mm = current->mm; 1406 struct mm_struct *mm = current->mm;
@@ -1489,38 +1471,41 @@ static void handle_swbp(struct pt_regs *regs)
1489 } 1471 }
1490 return; 1472 return;
1491 } 1473 }
1474 /*
1475 * TODO: move copy_insn/etc into _register and remove this hack.
1476 * After we hit the bp, _unregister + _register can install the
1477 * new and not-yet-analyzed uprobe at the same address, restart.
1478 */
1479 smp_rmb(); /* pairs with wmb() in install_breakpoint() */
1480 if (unlikely(!test_bit(UPROBE_COPY_INSN, &uprobe->flags)))
1481 goto restart;
1492 1482
1493 utask = current->utask; 1483 utask = current->utask;
1494 if (!utask) { 1484 if (!utask) {
1495 utask = add_utask(); 1485 utask = add_utask();
1496 /* Cannot allocate; re-execute the instruction. */ 1486 /* Cannot allocate; re-execute the instruction. */
1497 if (!utask) 1487 if (!utask)
1498 goto cleanup_ret; 1488 goto restart;
1499 } 1489 }
1500 utask->active_uprobe = uprobe; 1490
1501 handler_chain(uprobe, regs); 1491 handler_chain(uprobe, regs);
1502 if (uprobe->flags & UPROBE_SKIP_SSTEP && can_skip_sstep(uprobe, regs)) 1492 if (can_skip_sstep(uprobe, regs))
1503 goto cleanup_ret; 1493 goto out;
1504 1494
1505 utask->state = UTASK_SSTEP;
1506 if (!pre_ssout(uprobe, regs, bp_vaddr)) { 1495 if (!pre_ssout(uprobe, regs, bp_vaddr)) {
1507 arch_uprobe_enable_step(&uprobe->arch); 1496 arch_uprobe_enable_step(&uprobe->arch);
1497 utask->active_uprobe = uprobe;
1498 utask->state = UTASK_SSTEP;
1508 return; 1499 return;
1509 } 1500 }
1510 1501
1511cleanup_ret: 1502restart:
1512 if (utask) { 1503 /*
1513 utask->active_uprobe = NULL; 1504 * cannot singlestep; cannot skip instruction;
1514 utask->state = UTASK_RUNNING; 1505 * re-execute the instruction.
1515 } 1506 */
1516 if (!(uprobe->flags & UPROBE_SKIP_SSTEP)) 1507 instruction_pointer_set(regs, bp_vaddr);
1517 1508out:
1518 /*
1519 * cannot singlestep; cannot skip instruction;
1520 * re-execute the instruction.
1521 */
1522 instruction_pointer_set(regs, bp_vaddr);
1523
1524 put_uprobe(uprobe); 1509 put_uprobe(uprobe);
1525} 1510}
1526 1511
@@ -1552,13 +1537,12 @@ static void handle_singlestep(struct uprobe_task *utask, struct pt_regs *regs)
1552} 1537}
1553 1538
1554/* 1539/*
1555 * On breakpoint hit, breakpoint notifier sets the TIF_UPROBE flag. (and on 1540 * On breakpoint hit, breakpoint notifier sets the TIF_UPROBE flag and
1556 * subsequent probe hits on the thread sets the state to UTASK_BP_HIT) and 1541 * allows the thread to return from interrupt. After that handle_swbp()
1557 * allows the thread to return from interrupt. 1542 * sets utask->active_uprobe.
1558 * 1543 *
1559 * On singlestep exception, singlestep notifier sets the TIF_UPROBE flag and 1544 * On singlestep exception, singlestep notifier sets the TIF_UPROBE flag
1560 * also sets the state to UTASK_SSTEP_ACK and allows the thread to return from 1545 * and allows the thread to return from interrupt.
1561 * interrupt.
1562 * 1546 *
1563 * While returning to userspace, thread notices the TIF_UPROBE flag and calls 1547 * While returning to userspace, thread notices the TIF_UPROBE flag and calls
1564 * uprobe_notify_resume(). 1548 * uprobe_notify_resume().
@@ -1567,11 +1551,13 @@ void uprobe_notify_resume(struct pt_regs *regs)
1567{ 1551{
1568 struct uprobe_task *utask; 1552 struct uprobe_task *utask;
1569 1553
1554 clear_thread_flag(TIF_UPROBE);
1555
1570 utask = current->utask; 1556 utask = current->utask;
1571 if (!utask || utask->state == UTASK_BP_HIT) 1557 if (utask && utask->active_uprobe)
1572 handle_swbp(regs);
1573 else
1574 handle_singlestep(utask, regs); 1558 handle_singlestep(utask, regs);
1559 else
1560 handle_swbp(regs);
1575} 1561}
1576 1562
1577/* 1563/*
@@ -1580,17 +1566,10 @@ void uprobe_notify_resume(struct pt_regs *regs)
1580 */ 1566 */
1581int uprobe_pre_sstep_notifier(struct pt_regs *regs) 1567int uprobe_pre_sstep_notifier(struct pt_regs *regs)
1582{ 1568{
1583 struct uprobe_task *utask;
1584
1585 if (!current->mm || !test_bit(MMF_HAS_UPROBES, &current->mm->flags)) 1569 if (!current->mm || !test_bit(MMF_HAS_UPROBES, &current->mm->flags))
1586 return 0; 1570 return 0;
1587 1571
1588 utask = current->utask;
1589 if (utask)
1590 utask->state = UTASK_BP_HIT;
1591
1592 set_thread_flag(TIF_UPROBE); 1572 set_thread_flag(TIF_UPROBE);
1593
1594 return 1; 1573 return 1;
1595} 1574}
1596 1575
diff --git a/kernel/module-internal.h b/kernel/module-internal.h
index 6114a13419b..24f9247b7d0 100644
--- a/kernel/module-internal.h
+++ b/kernel/module-internal.h
@@ -11,5 +11,4 @@
11 11
12extern struct key *modsign_keyring; 12extern struct key *modsign_keyring;
13 13
14extern int mod_verify_sig(const void *mod, unsigned long modlen, 14extern int mod_verify_sig(const void *mod, unsigned long *_modlen);
15 const void *sig, unsigned long siglen);
diff --git a/kernel/module.c b/kernel/module.c
index 0e2da8695f8..6e48c3a4359 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2293,12 +2293,17 @@ static void layout_symtab(struct module *mod, struct load_info *info)
2293 src = (void *)info->hdr + symsect->sh_offset; 2293 src = (void *)info->hdr + symsect->sh_offset;
2294 nsrc = symsect->sh_size / sizeof(*src); 2294 nsrc = symsect->sh_size / sizeof(*src);
2295 2295
2296 /* strtab always starts with a nul, so offset 0 is the empty string. */
2297 strtab_size = 1;
2298
2296 /* Compute total space required for the core symbols' strtab. */ 2299 /* Compute total space required for the core symbols' strtab. */
2297 for (ndst = i = strtab_size = 1; i < nsrc; ++i, ++src) 2300 for (ndst = i = 0; i < nsrc; i++) {
2298 if (is_core_symbol(src, info->sechdrs, info->hdr->e_shnum)) { 2301 if (i == 0 ||
2299 strtab_size += strlen(&info->strtab[src->st_name]) + 1; 2302 is_core_symbol(src+i, info->sechdrs, info->hdr->e_shnum)) {
2303 strtab_size += strlen(&info->strtab[src[i].st_name])+1;
2300 ndst++; 2304 ndst++;
2301 } 2305 }
2306 }
2302 2307
2303 /* Append room for core symbols at end of core part. */ 2308 /* Append room for core symbols at end of core part. */
2304 info->symoffs = ALIGN(mod->core_size, symsect->sh_addralign ?: 1); 2309 info->symoffs = ALIGN(mod->core_size, symsect->sh_addralign ?: 1);
@@ -2332,15 +2337,15 @@ static void add_kallsyms(struct module *mod, const struct load_info *info)
2332 mod->core_symtab = dst = mod->module_core + info->symoffs; 2337 mod->core_symtab = dst = mod->module_core + info->symoffs;
2333 mod->core_strtab = s = mod->module_core + info->stroffs; 2338 mod->core_strtab = s = mod->module_core + info->stroffs;
2334 src = mod->symtab; 2339 src = mod->symtab;
2335 *dst = *src;
2336 *s++ = 0; 2340 *s++ = 0;
2337 for (ndst = i = 1; i < mod->num_symtab; ++i, ++src) { 2341 for (ndst = i = 0; i < mod->num_symtab; i++) {
2338 if (!is_core_symbol(src, info->sechdrs, info->hdr->e_shnum)) 2342 if (i == 0 ||
2339 continue; 2343 is_core_symbol(src+i, info->sechdrs, info->hdr->e_shnum)) {
2340 2344 dst[ndst] = src[i];
2341 dst[ndst] = *src; 2345 dst[ndst++].st_name = s - mod->core_strtab;
2342 dst[ndst++].st_name = s - mod->core_strtab; 2346 s += strlcpy(s, &mod->strtab[src[i].st_name],
2343 s += strlcpy(s, &mod->strtab[src->st_name], KSYM_NAME_LEN) + 1; 2347 KSYM_NAME_LEN) + 1;
2348 }
2344 } 2349 }
2345 mod->core_num_syms = ndst; 2350 mod->core_num_syms = ndst;
2346} 2351}
@@ -2421,25 +2426,17 @@ static inline void kmemleak_load_module(const struct module *mod,
2421 2426
2422#ifdef CONFIG_MODULE_SIG 2427#ifdef CONFIG_MODULE_SIG
2423static int module_sig_check(struct load_info *info, 2428static int module_sig_check(struct load_info *info,
2424 const void *mod, unsigned long *len) 2429 const void *mod, unsigned long *_len)
2425{ 2430{
2426 int err = -ENOKEY; 2431 int err = -ENOKEY;
2427 const unsigned long markerlen = sizeof(MODULE_SIG_STRING) - 1; 2432 unsigned long markerlen = sizeof(MODULE_SIG_STRING) - 1;
2428 const void *p = mod, *end = mod + *len; 2433 unsigned long len = *_len;
2429 2434
2430 /* Poor man's memmem. */ 2435 if (len > markerlen &&
2431 while ((p = memchr(p, MODULE_SIG_STRING[0], end - p))) { 2436 memcmp(mod + len - markerlen, MODULE_SIG_STRING, markerlen) == 0) {
2432 if (p + markerlen > end) 2437 /* We truncate the module to discard the signature */
2433 break; 2438 *_len -= markerlen;
2434 2439 err = mod_verify_sig(mod, _len);
2435 if (memcmp(p, MODULE_SIG_STRING, markerlen) == 0) {
2436 const void *sig = p + markerlen;
2437 /* Truncate module up to signature. */
2438 *len = p - mod;
2439 err = mod_verify_sig(mod, *len, sig, end - sig);
2440 break;
2441 }
2442 p++;
2443 } 2440 }
2444 2441
2445 if (!err) { 2442 if (!err) {
diff --git a/kernel/module_signing.c b/kernel/module_signing.c
index 6b09f6983ac..ea1b1df5dbb 100644
--- a/kernel/module_signing.c
+++ b/kernel/module_signing.c
@@ -183,27 +183,33 @@ static struct key *request_asymmetric_key(const char *signer, size_t signer_len,
183/* 183/*
184 * Verify the signature on a module. 184 * Verify the signature on a module.
185 */ 185 */
186int mod_verify_sig(const void *mod, unsigned long modlen, 186int mod_verify_sig(const void *mod, unsigned long *_modlen)
187 const void *sig, unsigned long siglen)
188{ 187{
189 struct public_key_signature *pks; 188 struct public_key_signature *pks;
190 struct module_signature ms; 189 struct module_signature ms;
191 struct key *key; 190 struct key *key;
192 size_t sig_len; 191 const void *sig;
192 size_t modlen = *_modlen, sig_len;
193 int ret; 193 int ret;
194 194
195 pr_devel("==>%s(,%lu,,%lu,)\n", __func__, modlen, siglen); 195 pr_devel("==>%s(,%zu)\n", __func__, modlen);
196 196
197 if (siglen <= sizeof(ms)) 197 if (modlen <= sizeof(ms))
198 return -EBADMSG; 198 return -EBADMSG;
199 199
200 memcpy(&ms, sig + (siglen - sizeof(ms)), sizeof(ms)); 200 memcpy(&ms, mod + (modlen - sizeof(ms)), sizeof(ms));
201 siglen -= sizeof(ms); 201 modlen -= sizeof(ms);
202 202
203 sig_len = be32_to_cpu(ms.sig_len); 203 sig_len = be32_to_cpu(ms.sig_len);
204 if (sig_len >= siglen || 204 if (sig_len >= modlen)
205 siglen - sig_len != (size_t)ms.signer_len + ms.key_id_len)
206 return -EBADMSG; 205 return -EBADMSG;
206 modlen -= sig_len;
207 if ((size_t)ms.signer_len + ms.key_id_len >= modlen)
208 return -EBADMSG;
209 modlen -= (size_t)ms.signer_len + ms.key_id_len;
210
211 *_modlen = modlen;
212 sig = mod + modlen;
207 213
208 /* For the moment, only support RSA and X.509 identifiers */ 214 /* For the moment, only support RSA and X.509 identifiers */
209 if (ms.algo != PKEY_ALGO_RSA || 215 if (ms.algo != PKEY_ALGO_RSA ||
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index 478bad2745e..7b07cc0dfb7 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -71,12 +71,22 @@ err_alloc:
71 return NULL; 71 return NULL;
72} 72}
73 73
74/* MAX_PID_NS_LEVEL is needed for limiting size of 'struct pid' */
75#define MAX_PID_NS_LEVEL 32
76
74static struct pid_namespace *create_pid_namespace(struct pid_namespace *parent_pid_ns) 77static struct pid_namespace *create_pid_namespace(struct pid_namespace *parent_pid_ns)
75{ 78{
76 struct pid_namespace *ns; 79 struct pid_namespace *ns;
77 unsigned int level = parent_pid_ns->level + 1; 80 unsigned int level = parent_pid_ns->level + 1;
78 int i, err = -ENOMEM; 81 int i;
82 int err;
83
84 if (level > MAX_PID_NS_LEVEL) {
85 err = -EINVAL;
86 goto out;
87 }
79 88
89 err = -ENOMEM;
80 ns = kmem_cache_zalloc(pid_ns_cachep, GFP_KERNEL); 90 ns = kmem_cache_zalloc(pid_ns_cachep, GFP_KERNEL);
81 if (ns == NULL) 91 if (ns == NULL)
82 goto out; 92 goto out;
@@ -133,19 +143,26 @@ struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *old
133 return create_pid_namespace(old_ns); 143 return create_pid_namespace(old_ns);
134} 144}
135 145
136void free_pid_ns(struct kref *kref) 146static void free_pid_ns(struct kref *kref)
137{ 147{
138 struct pid_namespace *ns, *parent; 148 struct pid_namespace *ns;
139 149
140 ns = container_of(kref, struct pid_namespace, kref); 150 ns = container_of(kref, struct pid_namespace, kref);
141
142 parent = ns->parent;
143 destroy_pid_namespace(ns); 151 destroy_pid_namespace(ns);
152}
153
154void put_pid_ns(struct pid_namespace *ns)
155{
156 struct pid_namespace *parent;
144 157
145 if (parent != NULL) 158 while (ns != &init_pid_ns) {
146 put_pid_ns(parent); 159 parent = ns->parent;
160 if (!kref_put(&ns->kref, free_pid_ns))
161 break;
162 ns = parent;
163 }
147} 164}
148EXPORT_SYMBOL_GPL(free_pid_ns); 165EXPORT_SYMBOL_GPL(put_pid_ns);
149 166
150void zap_pid_ns_processes(struct pid_namespace *pid_ns) 167void zap_pid_ns_processes(struct pid_namespace *pid_ns)
151{ 168{
diff --git a/kernel/printk.c b/kernel/printk.c
index 66a2ea37b57..2d607f4d179 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -1890,7 +1890,6 @@ static int __cpuinit console_cpu_notify(struct notifier_block *self,
1890 switch (action) { 1890 switch (action) {
1891 case CPU_ONLINE: 1891 case CPU_ONLINE:
1892 case CPU_DEAD: 1892 case CPU_DEAD:
1893 case CPU_DYING:
1894 case CPU_DOWN_FAILED: 1893 case CPU_DOWN_FAILED:
1895 case CPU_UP_CANCELED: 1894 case CPU_UP_CANCELED:
1896 console_lock(); 1895 console_lock();
diff --git a/kernel/sys.c b/kernel/sys.c
index c5cb5b99cb8..e6e0ece5f6a 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1265,15 +1265,16 @@ DECLARE_RWSEM(uts_sem);
1265 * Work around broken programs that cannot handle "Linux 3.0". 1265 * Work around broken programs that cannot handle "Linux 3.0".
1266 * Instead we map 3.x to 2.6.40+x, so e.g. 3.0 would be 2.6.40 1266 * Instead we map 3.x to 2.6.40+x, so e.g. 3.0 would be 2.6.40
1267 */ 1267 */
1268static int override_release(char __user *release, int len) 1268static int override_release(char __user *release, size_t len)
1269{ 1269{
1270 int ret = 0; 1270 int ret = 0;
1271 char buf[65];
1272 1271
1273 if (current->personality & UNAME26) { 1272 if (current->personality & UNAME26) {
1274 char *rest = UTS_RELEASE; 1273 const char *rest = UTS_RELEASE;
1274 char buf[65] = { 0 };
1275 int ndots = 0; 1275 int ndots = 0;
1276 unsigned v; 1276 unsigned v;
1277 size_t copy;
1277 1278
1278 while (*rest) { 1279 while (*rest) {
1279 if (*rest == '.' && ++ndots >= 3) 1280 if (*rest == '.' && ++ndots >= 3)
@@ -1283,8 +1284,9 @@ static int override_release(char __user *release, int len)
1283 rest++; 1284 rest++;
1284 } 1285 }
1285 v = ((LINUX_VERSION_CODE >> 8) & 0xff) + 40; 1286 v = ((LINUX_VERSION_CODE >> 8) & 0xff) + 40;
1286 snprintf(buf, len, "2.6.%u%s", v, rest); 1287 copy = clamp_t(size_t, len, 1, sizeof(buf));
1287 ret = copy_to_user(release, buf, len); 1288 copy = scnprintf(buf, copy, "2.6.%u%s", v, rest);
1289 ret = copy_to_user(release, buf, copy + 1);
1288 } 1290 }
1289 return ret; 1291 return ret;
1290} 1292}
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index b32ed0e385a..b979426d16c 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -1567,6 +1567,10 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size,
1567 1567
1568 put_online_cpus(); 1568 put_online_cpus();
1569 } else { 1569 } else {
1570 /* Make sure this CPU has been intitialized */
1571 if (!cpumask_test_cpu(cpu_id, buffer->cpumask))
1572 goto out;
1573
1570 cpu_buffer = buffer->buffers[cpu_id]; 1574 cpu_buffer = buffer->buffers[cpu_id];
1571 1575
1572 if (nr_pages == cpu_buffer->nr_pages) 1576 if (nr_pages == cpu_buffer->nr_pages)
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index d951daa0ca9..042d221d33c 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -2982,7 +2982,7 @@ bool cancel_delayed_work(struct delayed_work *dwork)
2982 2982
2983 set_work_cpu_and_clear_pending(&dwork->work, work_cpu(&dwork->work)); 2983 set_work_cpu_and_clear_pending(&dwork->work, work_cpu(&dwork->work));
2984 local_irq_restore(flags); 2984 local_irq_restore(flags);
2985 return true; 2985 return ret;
2986} 2986}
2987EXPORT_SYMBOL(cancel_delayed_work); 2987EXPORT_SYMBOL(cancel_delayed_work);
2988 2988