aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndy Lutomirski <luto@amacapital.net>2014-07-21 21:49:15 -0400
committerKees Cook <keescook@chromium.org>2014-09-03 17:58:17 -0400
commit13aa72f0fd0a9f98a41cefb662487269e2f1ad65 (patch)
treefbd7bf64cdc2b5d0f89c658ab784edad7fc1cff3
parenta4412fc9486ec85686c6c7929e7e829f62ae377e (diff)
seccomp: Refactor the filter callback and the API
The reason I did this is to add a seccomp API that will be usable for an x86 fast path. The x86 entry code needs to use a rather expensive slow path for a syscall that might be visible to things like ptrace. By splitting seccomp into two phases, we can check whether we need the slow path and then use the fast path in if the filter allows the syscall or just returns some errno. As a side effect, I think the new code is much easier to understand than the old code. This has one user-visible effect: the audit record written for SECCOMP_RET_TRACE is now a simple indication that SECCOMP_RET_TRACE happened. It used to depend in a complicated way on what the tracer did. I couldn't make much sense of it. Signed-off-by: Andy Lutomirski <luto@amacapital.net> Signed-off-by: Kees Cook <keescook@chromium.org>
-rw-r--r--include/linux/seccomp.h6
-rw-r--r--kernel/seccomp.c190
2 files changed, 130 insertions, 66 deletions
diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h
index aa3c040230be..38851085e481 100644
--- a/include/linux/seccomp.h
+++ b/include/linux/seccomp.h
@@ -35,6 +35,12 @@ static inline int secure_computing(void)
35 return __secure_computing(); 35 return __secure_computing();
36 return 0; 36 return 0;
37} 37}
38
39#define SECCOMP_PHASE1_OK 0
40#define SECCOMP_PHASE1_SKIP 1
41
42extern u32 seccomp_phase1(void);
43int seccomp_phase2(u32 phase1_result);
38#else 44#else
39extern void secure_computing_strict(int this_syscall); 45extern void secure_computing_strict(int this_syscall);
40#endif 46#endif
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 5e738e0dd2e9..6c8528ce9df9 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -21,8 +21,6 @@
21#include <linux/slab.h> 21#include <linux/slab.h>
22#include <linux/syscalls.h> 22#include <linux/syscalls.h>
23 23
24/* #define SECCOMP_DEBUG 1 */
25
26#ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER 24#ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER
27#include <asm/syscall.h> 25#include <asm/syscall.h>
28#endif 26#endif
@@ -601,10 +599,21 @@ void secure_computing_strict(int this_syscall)
601#else 599#else
602int __secure_computing(void) 600int __secure_computing(void)
603{ 601{
604 struct pt_regs *regs = task_pt_regs(current); 602 u32 phase1_result = seccomp_phase1();
605 int this_syscall = syscall_get_nr(current, regs); 603
606 int exit_sig = 0; 604 if (likely(phase1_result == SECCOMP_PHASE1_OK))
607 u32 ret; 605 return 0;
606 else if (likely(phase1_result == SECCOMP_PHASE1_SKIP))
607 return -1;
608 else
609 return seccomp_phase2(phase1_result);
610}
611
612#ifdef CONFIG_SECCOMP_FILTER
613static u32 __seccomp_phase1_filter(int this_syscall, struct pt_regs *regs)
614{
615 u32 filter_ret, action;
616 int data;
608 617
609 /* 618 /*
610 * Make sure that any changes to mode from another thread have 619 * Make sure that any changes to mode from another thread have
@@ -612,73 +621,122 @@ int __secure_computing(void)
612 */ 621 */
613 rmb(); 622 rmb();
614 623
615 switch (current->seccomp.mode) { 624 filter_ret = seccomp_run_filters();
625 data = filter_ret & SECCOMP_RET_DATA;
626 action = filter_ret & SECCOMP_RET_ACTION;
627
628 switch (action) {
629 case SECCOMP_RET_ERRNO:
630 /* Set the low-order 16-bits as a errno. */
631 syscall_set_return_value(current, regs,
632 -data, 0);
633 goto skip;
634
635 case SECCOMP_RET_TRAP:
636 /* Show the handler the original registers. */
637 syscall_rollback(current, regs);
638 /* Let the filter pass back 16 bits of data. */
639 seccomp_send_sigsys(this_syscall, data);
640 goto skip;
641
642 case SECCOMP_RET_TRACE:
643 return filter_ret; /* Save the rest for phase 2. */
644
645 case SECCOMP_RET_ALLOW:
646 return SECCOMP_PHASE1_OK;
647
648 case SECCOMP_RET_KILL:
649 default:
650 audit_seccomp(this_syscall, SIGSYS, action);
651 do_exit(SIGSYS);
652 }
653
654 unreachable();
655
656skip:
657 audit_seccomp(this_syscall, 0, action);
658 return SECCOMP_PHASE1_SKIP;
659}
660#endif
661
662/**
663 * seccomp_phase1() - run fast path seccomp checks on the current syscall
664 *
665 * This only reads pt_regs via the syscall_xyz helpers. The only change
666 * it will make to pt_regs is via syscall_set_return_value, and it will
667 * only do that if it returns SECCOMP_PHASE1_SKIP.
668 *
669 * It may also call do_exit or force a signal; these actions must be
670 * safe.
671 *
672 * If it returns SECCOMP_PHASE1_OK, the syscall passes checks and should
673 * be processed normally.
674 *
675 * If it returns SECCOMP_PHASE1_SKIP, then the syscall should not be
676 * invoked. In this case, seccomp_phase1 will have set the return value
677 * using syscall_set_return_value.
678 *
679 * If it returns anything else, then the return value should be passed
680 * to seccomp_phase2 from a context in which ptrace hooks are safe.
681 */
682u32 seccomp_phase1(void)
683{
684 int mode = current->seccomp.mode;
685 struct pt_regs *regs = task_pt_regs(current);
686 int this_syscall = syscall_get_nr(current, regs);
687
688 switch (mode) {
616 case SECCOMP_MODE_STRICT: 689 case SECCOMP_MODE_STRICT:
617 __secure_computing_strict(this_syscall); 690 __secure_computing_strict(this_syscall); /* may call do_exit */
618 return 0; 691 return SECCOMP_PHASE1_OK;
619#ifdef CONFIG_SECCOMP_FILTER 692#ifdef CONFIG_SECCOMP_FILTER
620 case SECCOMP_MODE_FILTER: { 693 case SECCOMP_MODE_FILTER:
621 int data; 694 return __seccomp_phase1_filter(this_syscall, regs);
622 ret = seccomp_run_filters();
623 data = ret & SECCOMP_RET_DATA;
624 ret &= SECCOMP_RET_ACTION;
625 switch (ret) {
626 case SECCOMP_RET_ERRNO:
627 /* Set the low-order 16-bits as a errno. */
628 syscall_set_return_value(current, regs,
629 -data, 0);
630 goto skip;
631 case SECCOMP_RET_TRAP:
632 /* Show the handler the original registers. */
633 syscall_rollback(current, regs);
634 /* Let the filter pass back 16 bits of data. */
635 seccomp_send_sigsys(this_syscall, data);
636 goto skip;
637 case SECCOMP_RET_TRACE:
638 /* Skip these calls if there is no tracer. */
639 if (!ptrace_event_enabled(current, PTRACE_EVENT_SECCOMP)) {
640 syscall_set_return_value(current, regs,
641 -ENOSYS, 0);
642 goto skip;
643 }
644 /* Allow the BPF to provide the event message */
645 ptrace_event(PTRACE_EVENT_SECCOMP, data);
646 /*
647 * The delivery of a fatal signal during event
648 * notification may silently skip tracer notification.
649 * Terminating the task now avoids executing a system
650 * call that may not be intended.
651 */
652 if (fatal_signal_pending(current))
653 break;
654 if (syscall_get_nr(current, regs) < 0)
655 goto skip; /* Explicit request to skip. */
656
657 return 0;
658 case SECCOMP_RET_ALLOW:
659 return 0;
660 case SECCOMP_RET_KILL:
661 default:
662 break;
663 }
664 exit_sig = SIGSYS;
665 break;
666 }
667#endif 695#endif
668 default: 696 default:
669 BUG(); 697 BUG();
670 } 698 }
699}
671 700
672#ifdef SECCOMP_DEBUG 701/**
673 dump_stack(); 702 * seccomp_phase2() - finish slow path seccomp work for the current syscall
674#endif 703 * @phase1_result: The return value from seccomp_phase1()
675 audit_seccomp(this_syscall, exit_sig, ret); 704 *
676 do_exit(exit_sig); 705 * This must be called from a context in which ptrace hooks can be used.
677#ifdef CONFIG_SECCOMP_FILTER 706 *
678skip: 707 * Returns 0 if the syscall should be processed or -1 to skip the syscall.
679 audit_seccomp(this_syscall, exit_sig, ret); 708 */
680 return -1; 709int seccomp_phase2(u32 phase1_result)
681#endif 710{
711 struct pt_regs *regs = task_pt_regs(current);
712 u32 action = phase1_result & SECCOMP_RET_ACTION;
713 int data = phase1_result & SECCOMP_RET_DATA;
714
715 BUG_ON(action != SECCOMP_RET_TRACE);
716
717 audit_seccomp(syscall_get_nr(current, regs), 0, action);
718
719 /* Skip these calls if there is no tracer. */
720 if (!ptrace_event_enabled(current, PTRACE_EVENT_SECCOMP)) {
721 syscall_set_return_value(current, regs,
722 -ENOSYS, 0);
723 return -1;
724 }
725
726 /* Allow the BPF to provide the event message */
727 ptrace_event(PTRACE_EVENT_SECCOMP, data);
728 /*
729 * The delivery of a fatal signal during event
730 * notification may silently skip tracer notification.
731 * Terminating the task now avoids executing a system
732 * call that may not be intended.
733 */
734 if (fatal_signal_pending(current))
735 do_exit(SIGSYS);
736 if (syscall_get_nr(current, regs) < 0)
737 return -1; /* Explicit request to skip. */
738
739 return 0;
682} 740}
683#endif /* CONFIG_HAVE_ARCH_SECCOMP_FILTER */ 741#endif /* CONFIG_HAVE_ARCH_SECCOMP_FILTER */
684 742