aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/seccomp.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-10-13 20:27:06 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-10-13 20:27:06 -0400
commitba1a96fc7ddcaf0c8d4a6752f6a70f080bc307ac (patch)
treec07af88f62df1ab8ed98aab9951dd05dff09d0d2 /kernel/seccomp.c
parentf1bfbd984b4e2177886507b6a0ec5faeb6d7c217 (diff)
parent1dcf74f6edfc3a9acd84d83d8865dd9e2a3b1d1e (diff)
Merge branch 'x86-seccomp-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 seccomp changes from Ingo Molnar: "This tree includes x86 seccomp filter speedups and related preparatory work, which touches core seccomp facilities as well. The main idea is to split seccomp into two phases, to be able to enter a simple fast path for syscalls with ptrace side effects. There's no substantial user-visible (and ABI) effects expected from this, except a change in how we emit a better audit record for SECCOMP_RET_TRACE events" * 'x86-seccomp-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86_64, entry: Use split-phase syscall_trace_enter for 64-bit syscalls x86_64, entry: Treat regs->ax the same in fastpath and slowpath syscalls x86: Split syscall_trace_enter into two phases x86, entry: Only call user_exit if TIF_NOHZ x86, x32, audit: Fix x32's AUDIT_ARCH wrt audit seccomp: Document two-phase seccomp and arch-provided seccomp_data seccomp: Allow arch code to provide seccomp_data seccomp: Refactor the filter callback and the API seccomp,x86,arm,mips,s390: Remove nr parameter from secure_computing
Diffstat (limited to 'kernel/seccomp.c')
-rw-r--r--kernel/seccomp.c252
1 files changed, 171 insertions, 81 deletions
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 84922befea84..4ef9687ac115 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -21,10 +21,11 @@
21#include <linux/slab.h> 21#include <linux/slab.h>
22#include <linux/syscalls.h> 22#include <linux/syscalls.h>
23 23
24/* #define SECCOMP_DEBUG 1 */ 24#ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER
25#include <asm/syscall.h>
26#endif
25 27
26#ifdef CONFIG_SECCOMP_FILTER 28#ifdef CONFIG_SECCOMP_FILTER
27#include <asm/syscall.h>
28#include <linux/filter.h> 29#include <linux/filter.h>
29#include <linux/pid.h> 30#include <linux/pid.h>
30#include <linux/ptrace.h> 31#include <linux/ptrace.h>
@@ -172,10 +173,10 @@ static int seccomp_check_filter(struct sock_filter *filter, unsigned int flen)
172 * 173 *
173 * Returns valid seccomp BPF response codes. 174 * Returns valid seccomp BPF response codes.
174 */ 175 */
175static u32 seccomp_run_filters(int syscall) 176static u32 seccomp_run_filters(struct seccomp_data *sd)
176{ 177{
177 struct seccomp_filter *f = ACCESS_ONCE(current->seccomp.filter); 178 struct seccomp_filter *f = ACCESS_ONCE(current->seccomp.filter);
178 struct seccomp_data sd; 179 struct seccomp_data sd_local;
179 u32 ret = SECCOMP_RET_ALLOW; 180 u32 ret = SECCOMP_RET_ALLOW;
180 181
181 /* Ensure unexpected behavior doesn't result in failing open. */ 182 /* Ensure unexpected behavior doesn't result in failing open. */
@@ -185,14 +186,17 @@ static u32 seccomp_run_filters(int syscall)
185 /* Make sure cross-thread synced filter points somewhere sane. */ 186 /* Make sure cross-thread synced filter points somewhere sane. */
186 smp_read_barrier_depends(); 187 smp_read_barrier_depends();
187 188
188 populate_seccomp_data(&sd); 189 if (!sd) {
190 populate_seccomp_data(&sd_local);
191 sd = &sd_local;
192 }
189 193
190 /* 194 /*
191 * All filters in the list are evaluated and the lowest BPF return 195 * All filters in the list are evaluated and the lowest BPF return
192 * value always takes priority (ignoring the DATA). 196 * value always takes priority (ignoring the DATA).
193 */ 197 */
194 for (; f; f = f->prev) { 198 for (; f; f = f->prev) {
195 u32 cur_ret = BPF_PROG_RUN(f->prog, (void *)&sd); 199 u32 cur_ret = BPF_PROG_RUN(f->prog, (void *)sd);
196 200
197 if ((cur_ret & SECCOMP_RET_ACTION) < (ret & SECCOMP_RET_ACTION)) 201 if ((cur_ret & SECCOMP_RET_ACTION) < (ret & SECCOMP_RET_ACTION))
198 ret = cur_ret; 202 ret = cur_ret;
@@ -563,11 +567,55 @@ static int mode1_syscalls_32[] = {
563}; 567};
564#endif 568#endif
565 569
566int __secure_computing(int this_syscall) 570static void __secure_computing_strict(int this_syscall)
571{
572 int *syscall_whitelist = mode1_syscalls;
573#ifdef CONFIG_COMPAT
574 if (is_compat_task())
575 syscall_whitelist = mode1_syscalls_32;
576#endif
577 do {
578 if (*syscall_whitelist == this_syscall)
579 return;
580 } while (*++syscall_whitelist);
581
582#ifdef SECCOMP_DEBUG
583 dump_stack();
584#endif
585 audit_seccomp(this_syscall, SIGKILL, SECCOMP_RET_KILL);
586 do_exit(SIGKILL);
587}
588
589#ifndef CONFIG_HAVE_ARCH_SECCOMP_FILTER
590void secure_computing_strict(int this_syscall)
591{
592 int mode = current->seccomp.mode;
593
594 if (mode == 0)
595 return;
596 else if (mode == SECCOMP_MODE_STRICT)
597 __secure_computing_strict(this_syscall);
598 else
599 BUG();
600}
601#else
602int __secure_computing(void)
567{ 603{
568 int exit_sig = 0; 604 u32 phase1_result = seccomp_phase1(NULL);
569 int *syscall; 605
570 u32 ret; 606 if (likely(phase1_result == SECCOMP_PHASE1_OK))
607 return 0;
608 else if (likely(phase1_result == SECCOMP_PHASE1_SKIP))
609 return -1;
610 else
611 return seccomp_phase2(phase1_result);
612}
613
614#ifdef CONFIG_SECCOMP_FILTER
615static u32 __seccomp_phase1_filter(int this_syscall, struct seccomp_data *sd)
616{
617 u32 filter_ret, action;
618 int data;
571 619
572 /* 620 /*
573 * Make sure that any changes to mode from another thread have 621 * Make sure that any changes to mode from another thread have
@@ -575,85 +623,127 @@ int __secure_computing(int this_syscall)
575 */ 623 */
576 rmb(); 624 rmb();
577 625
578 switch (current->seccomp.mode) { 626 filter_ret = seccomp_run_filters(sd);
579 case SECCOMP_MODE_STRICT: 627 data = filter_ret & SECCOMP_RET_DATA;
580 syscall = mode1_syscalls; 628 action = filter_ret & SECCOMP_RET_ACTION;
581#ifdef CONFIG_COMPAT 629
582 if (is_compat_task()) 630 switch (action) {
583 syscall = mode1_syscalls_32; 631 case SECCOMP_RET_ERRNO:
632 /* Set the low-order 16-bits as a errno. */
633 syscall_set_return_value(current, task_pt_regs(current),
634 -data, 0);
635 goto skip;
636
637 case SECCOMP_RET_TRAP:
638 /* Show the handler the original registers. */
639 syscall_rollback(current, task_pt_regs(current));
640 /* Let the filter pass back 16 bits of data. */
641 seccomp_send_sigsys(this_syscall, data);
642 goto skip;
643
644 case SECCOMP_RET_TRACE:
645 return filter_ret; /* Save the rest for phase 2. */
646
647 case SECCOMP_RET_ALLOW:
648 return SECCOMP_PHASE1_OK;
649
650 case SECCOMP_RET_KILL:
651 default:
652 audit_seccomp(this_syscall, SIGSYS, action);
653 do_exit(SIGSYS);
654 }
655
656 unreachable();
657
658skip:
659 audit_seccomp(this_syscall, 0, action);
660 return SECCOMP_PHASE1_SKIP;
661}
584#endif 662#endif
585 do { 663
586 if (*syscall == this_syscall) 664/**
587 return 0; 665 * seccomp_phase1() - run fast path seccomp checks on the current syscall
588 } while (*++syscall); 666 * @arg sd: The seccomp_data or NULL
589 exit_sig = SIGKILL; 667 *
590 ret = SECCOMP_RET_KILL; 668 * This only reads pt_regs via the syscall_xyz helpers. The only change
591 break; 669 * it will make to pt_regs is via syscall_set_return_value, and it will
670 * only do that if it returns SECCOMP_PHASE1_SKIP.
671 *
672 * If sd is provided, it will not read pt_regs at all.
673 *
674 * It may also call do_exit or force a signal; these actions must be
675 * safe.
676 *
677 * If it returns SECCOMP_PHASE1_OK, the syscall passes checks and should
678 * be processed normally.
679 *
680 * If it returns SECCOMP_PHASE1_SKIP, then the syscall should not be
681 * invoked. In this case, seccomp_phase1 will have set the return value
682 * using syscall_set_return_value.
683 *
684 * If it returns anything else, then the return value should be passed
685 * to seccomp_phase2 from a context in which ptrace hooks are safe.
686 */
687u32 seccomp_phase1(struct seccomp_data *sd)
688{
689 int mode = current->seccomp.mode;
690 int this_syscall = sd ? sd->nr :
691 syscall_get_nr(current, task_pt_regs(current));
692
693 switch (mode) {
694 case SECCOMP_MODE_STRICT:
695 __secure_computing_strict(this_syscall); /* may call do_exit */
696 return SECCOMP_PHASE1_OK;
592#ifdef CONFIG_SECCOMP_FILTER 697#ifdef CONFIG_SECCOMP_FILTER
593 case SECCOMP_MODE_FILTER: { 698 case SECCOMP_MODE_FILTER:
594 int data; 699 return __seccomp_phase1_filter(this_syscall, sd);
595 struct pt_regs *regs = task_pt_regs(current);
596 ret = seccomp_run_filters(this_syscall);
597 data = ret & SECCOMP_RET_DATA;
598 ret &= SECCOMP_RET_ACTION;
599 switch (ret) {
600 case SECCOMP_RET_ERRNO:
601 /* Set the low-order 16-bits as a errno. */
602 syscall_set_return_value(current, regs,
603 -data, 0);
604 goto skip;
605 case SECCOMP_RET_TRAP:
606 /* Show the handler the original registers. */
607 syscall_rollback(current, regs);
608 /* Let the filter pass back 16 bits of data. */
609 seccomp_send_sigsys(this_syscall, data);
610 goto skip;
611 case SECCOMP_RET_TRACE:
612 /* Skip these calls if there is no tracer. */
613 if (!ptrace_event_enabled(current, PTRACE_EVENT_SECCOMP)) {
614 syscall_set_return_value(current, regs,
615 -ENOSYS, 0);
616 goto skip;
617 }
618 /* Allow the BPF to provide the event message */
619 ptrace_event(PTRACE_EVENT_SECCOMP, data);
620 /*
621 * The delivery of a fatal signal during event
622 * notification may silently skip tracer notification.
623 * Terminating the task now avoids executing a system
624 * call that may not be intended.
625 */
626 if (fatal_signal_pending(current))
627 break;
628 if (syscall_get_nr(current, regs) < 0)
629 goto skip; /* Explicit request to skip. */
630
631 return 0;
632 case SECCOMP_RET_ALLOW:
633 return 0;
634 case SECCOMP_RET_KILL:
635 default:
636 break;
637 }
638 exit_sig = SIGSYS;
639 break;
640 }
641#endif 700#endif
642 default: 701 default:
643 BUG(); 702 BUG();
644 } 703 }
704}
645 705
646#ifdef SECCOMP_DEBUG 706/**
647 dump_stack(); 707 * seccomp_phase2() - finish slow path seccomp work for the current syscall
648#endif 708 * @phase1_result: The return value from seccomp_phase1()
649 audit_seccomp(this_syscall, exit_sig, ret); 709 *
650 do_exit(exit_sig); 710 * This must be called from a context in which ptrace hooks can be used.
651#ifdef CONFIG_SECCOMP_FILTER 711 *
652skip: 712 * Returns 0 if the syscall should be processed or -1 to skip the syscall.
653 audit_seccomp(this_syscall, exit_sig, ret); 713 */
654#endif 714int seccomp_phase2(u32 phase1_result)
655 return -1; 715{
716 struct pt_regs *regs = task_pt_regs(current);
717 u32 action = phase1_result & SECCOMP_RET_ACTION;
718 int data = phase1_result & SECCOMP_RET_DATA;
719
720 BUG_ON(action != SECCOMP_RET_TRACE);
721
722 audit_seccomp(syscall_get_nr(current, regs), 0, action);
723
724 /* Skip these calls if there is no tracer. */
725 if (!ptrace_event_enabled(current, PTRACE_EVENT_SECCOMP)) {
726 syscall_set_return_value(current, regs,
727 -ENOSYS, 0);
728 return -1;
729 }
730
731 /* Allow the BPF to provide the event message */
732 ptrace_event(PTRACE_EVENT_SECCOMP, data);
733 /*
734 * The delivery of a fatal signal during event
735 * notification may silently skip tracer notification.
736 * Terminating the task now avoids executing a system
737 * call that may not be intended.
738 */
739 if (fatal_signal_pending(current))
740 do_exit(SIGSYS);
741 if (syscall_get_nr(current, regs) < 0)
742 return -1; /* Explicit request to skip. */
743
744 return 0;
656} 745}
746#endif /* CONFIG_HAVE_ARCH_SECCOMP_FILTER */
657 747
658long prctl_get_seccomp(void) 748long prctl_get_seccomp(void)
659{ 749{