diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/seccomp.c | 252 |
1 files changed, 171 insertions, 81 deletions
diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 84922befea84..4ef9687ac115 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c | |||
@@ -21,10 +21,11 @@ | |||
21 | #include <linux/slab.h> | 21 | #include <linux/slab.h> |
22 | #include <linux/syscalls.h> | 22 | #include <linux/syscalls.h> |
23 | 23 | ||
24 | /* #define SECCOMP_DEBUG 1 */ | 24 | #ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER |
25 | #include <asm/syscall.h> | ||
26 | #endif | ||
25 | 27 | ||
26 | #ifdef CONFIG_SECCOMP_FILTER | 28 | #ifdef CONFIG_SECCOMP_FILTER |
27 | #include <asm/syscall.h> | ||
28 | #include <linux/filter.h> | 29 | #include <linux/filter.h> |
29 | #include <linux/pid.h> | 30 | #include <linux/pid.h> |
30 | #include <linux/ptrace.h> | 31 | #include <linux/ptrace.h> |
@@ -172,10 +173,10 @@ static int seccomp_check_filter(struct sock_filter *filter, unsigned int flen) | |||
172 | * | 173 | * |
173 | * Returns valid seccomp BPF response codes. | 174 | * Returns valid seccomp BPF response codes. |
174 | */ | 175 | */ |
175 | static u32 seccomp_run_filters(int syscall) | 176 | static u32 seccomp_run_filters(struct seccomp_data *sd) |
176 | { | 177 | { |
177 | struct seccomp_filter *f = ACCESS_ONCE(current->seccomp.filter); | 178 | struct seccomp_filter *f = ACCESS_ONCE(current->seccomp.filter); |
178 | struct seccomp_data sd; | 179 | struct seccomp_data sd_local; |
179 | u32 ret = SECCOMP_RET_ALLOW; | 180 | u32 ret = SECCOMP_RET_ALLOW; |
180 | 181 | ||
181 | /* Ensure unexpected behavior doesn't result in failing open. */ | 182 | /* Ensure unexpected behavior doesn't result in failing open. */ |
@@ -185,14 +186,17 @@ static u32 seccomp_run_filters(int syscall) | |||
185 | /* Make sure cross-thread synced filter points somewhere sane. */ | 186 | /* Make sure cross-thread synced filter points somewhere sane. */ |
186 | smp_read_barrier_depends(); | 187 | smp_read_barrier_depends(); |
187 | 188 | ||
188 | populate_seccomp_data(&sd); | 189 | if (!sd) { |
190 | populate_seccomp_data(&sd_local); | ||
191 | sd = &sd_local; | ||
192 | } | ||
189 | 193 | ||
190 | /* | 194 | /* |
191 | * All filters in the list are evaluated and the lowest BPF return | 195 | * All filters in the list are evaluated and the lowest BPF return |
192 | * value always takes priority (ignoring the DATA). | 196 | * value always takes priority (ignoring the DATA). |
193 | */ | 197 | */ |
194 | for (; f; f = f->prev) { | 198 | for (; f; f = f->prev) { |
195 | u32 cur_ret = BPF_PROG_RUN(f->prog, (void *)&sd); | 199 | u32 cur_ret = BPF_PROG_RUN(f->prog, (void *)sd); |
196 | 200 | ||
197 | if ((cur_ret & SECCOMP_RET_ACTION) < (ret & SECCOMP_RET_ACTION)) | 201 | if ((cur_ret & SECCOMP_RET_ACTION) < (ret & SECCOMP_RET_ACTION)) |
198 | ret = cur_ret; | 202 | ret = cur_ret; |
@@ -563,11 +567,55 @@ static int mode1_syscalls_32[] = { | |||
563 | }; | 567 | }; |
564 | #endif | 568 | #endif |
565 | 569 | ||
566 | int __secure_computing(int this_syscall) | 570 | static void __secure_computing_strict(int this_syscall) |
571 | { | ||
572 | int *syscall_whitelist = mode1_syscalls; | ||
573 | #ifdef CONFIG_COMPAT | ||
574 | if (is_compat_task()) | ||
575 | syscall_whitelist = mode1_syscalls_32; | ||
576 | #endif | ||
577 | do { | ||
578 | if (*syscall_whitelist == this_syscall) | ||
579 | return; | ||
580 | } while (*++syscall_whitelist); | ||
581 | |||
582 | #ifdef SECCOMP_DEBUG | ||
583 | dump_stack(); | ||
584 | #endif | ||
585 | audit_seccomp(this_syscall, SIGKILL, SECCOMP_RET_KILL); | ||
586 | do_exit(SIGKILL); | ||
587 | } | ||
588 | |||
589 | #ifndef CONFIG_HAVE_ARCH_SECCOMP_FILTER | ||
590 | void secure_computing_strict(int this_syscall) | ||
591 | { | ||
592 | int mode = current->seccomp.mode; | ||
593 | |||
594 | if (mode == 0) | ||
595 | return; | ||
596 | else if (mode == SECCOMP_MODE_STRICT) | ||
597 | __secure_computing_strict(this_syscall); | ||
598 | else | ||
599 | BUG(); | ||
600 | } | ||
601 | #else | ||
602 | int __secure_computing(void) | ||
567 | { | 603 | { |
568 | int exit_sig = 0; | 604 | u32 phase1_result = seccomp_phase1(NULL); |
569 | int *syscall; | 605 | |
570 | u32 ret; | 606 | if (likely(phase1_result == SECCOMP_PHASE1_OK)) |
607 | return 0; | ||
608 | else if (likely(phase1_result == SECCOMP_PHASE1_SKIP)) | ||
609 | return -1; | ||
610 | else | ||
611 | return seccomp_phase2(phase1_result); | ||
612 | } | ||
613 | |||
614 | #ifdef CONFIG_SECCOMP_FILTER | ||
615 | static u32 __seccomp_phase1_filter(int this_syscall, struct seccomp_data *sd) | ||
616 | { | ||
617 | u32 filter_ret, action; | ||
618 | int data; | ||
571 | 619 | ||
572 | /* | 620 | /* |
573 | * Make sure that any changes to mode from another thread have | 621 | * Make sure that any changes to mode from another thread have |
@@ -575,85 +623,127 @@ int __secure_computing(int this_syscall) | |||
575 | */ | 623 | */ |
576 | rmb(); | 624 | rmb(); |
577 | 625 | ||
578 | switch (current->seccomp.mode) { | 626 | filter_ret = seccomp_run_filters(sd); |
579 | case SECCOMP_MODE_STRICT: | 627 | data = filter_ret & SECCOMP_RET_DATA; |
580 | syscall = mode1_syscalls; | 628 | action = filter_ret & SECCOMP_RET_ACTION; |
581 | #ifdef CONFIG_COMPAT | 629 | |
582 | if (is_compat_task()) | 630 | switch (action) { |
583 | syscall = mode1_syscalls_32; | 631 | case SECCOMP_RET_ERRNO: |
632 | /* Set the low-order 16-bits as a errno. */ | ||
633 | syscall_set_return_value(current, task_pt_regs(current), | ||
634 | -data, 0); | ||
635 | goto skip; | ||
636 | |||
637 | case SECCOMP_RET_TRAP: | ||
638 | /* Show the handler the original registers. */ | ||
639 | syscall_rollback(current, task_pt_regs(current)); | ||
640 | /* Let the filter pass back 16 bits of data. */ | ||
641 | seccomp_send_sigsys(this_syscall, data); | ||
642 | goto skip; | ||
643 | |||
644 | case SECCOMP_RET_TRACE: | ||
645 | return filter_ret; /* Save the rest for phase 2. */ | ||
646 | |||
647 | case SECCOMP_RET_ALLOW: | ||
648 | return SECCOMP_PHASE1_OK; | ||
649 | |||
650 | case SECCOMP_RET_KILL: | ||
651 | default: | ||
652 | audit_seccomp(this_syscall, SIGSYS, action); | ||
653 | do_exit(SIGSYS); | ||
654 | } | ||
655 | |||
656 | unreachable(); | ||
657 | |||
658 | skip: | ||
659 | audit_seccomp(this_syscall, 0, action); | ||
660 | return SECCOMP_PHASE1_SKIP; | ||
661 | } | ||
584 | #endif | 662 | #endif |
585 | do { | 663 | |
586 | if (*syscall == this_syscall) | 664 | /** |
587 | return 0; | 665 | * seccomp_phase1() - run fast path seccomp checks on the current syscall |
588 | } while (*++syscall); | 666 | * @arg sd: The seccomp_data or NULL |
589 | exit_sig = SIGKILL; | 667 | * |
590 | ret = SECCOMP_RET_KILL; | 668 | * This only reads pt_regs via the syscall_xyz helpers. The only change |
591 | break; | 669 | * it will make to pt_regs is via syscall_set_return_value, and it will |
670 | * only do that if it returns SECCOMP_PHASE1_SKIP. | ||
671 | * | ||
672 | * If sd is provided, it will not read pt_regs at all. | ||
673 | * | ||
674 | * It may also call do_exit or force a signal; these actions must be | ||
675 | * safe. | ||
676 | * | ||
677 | * If it returns SECCOMP_PHASE1_OK, the syscall passes checks and should | ||
678 | * be processed normally. | ||
679 | * | ||
680 | * If it returns SECCOMP_PHASE1_SKIP, then the syscall should not be | ||
681 | * invoked. In this case, seccomp_phase1 will have set the return value | ||
682 | * using syscall_set_return_value. | ||
683 | * | ||
684 | * If it returns anything else, then the return value should be passed | ||
685 | * to seccomp_phase2 from a context in which ptrace hooks are safe. | ||
686 | */ | ||
687 | u32 seccomp_phase1(struct seccomp_data *sd) | ||
688 | { | ||
689 | int mode = current->seccomp.mode; | ||
690 | int this_syscall = sd ? sd->nr : | ||
691 | syscall_get_nr(current, task_pt_regs(current)); | ||
692 | |||
693 | switch (mode) { | ||
694 | case SECCOMP_MODE_STRICT: | ||
695 | __secure_computing_strict(this_syscall); /* may call do_exit */ | ||
696 | return SECCOMP_PHASE1_OK; | ||
592 | #ifdef CONFIG_SECCOMP_FILTER | 697 | #ifdef CONFIG_SECCOMP_FILTER |
593 | case SECCOMP_MODE_FILTER: { | 698 | case SECCOMP_MODE_FILTER: |
594 | int data; | 699 | return __seccomp_phase1_filter(this_syscall, sd); |
595 | struct pt_regs *regs = task_pt_regs(current); | ||
596 | ret = seccomp_run_filters(this_syscall); | ||
597 | data = ret & SECCOMP_RET_DATA; | ||
598 | ret &= SECCOMP_RET_ACTION; | ||
599 | switch (ret) { | ||
600 | case SECCOMP_RET_ERRNO: | ||
601 | /* Set the low-order 16-bits as a errno. */ | ||
602 | syscall_set_return_value(current, regs, | ||
603 | -data, 0); | ||
604 | goto skip; | ||
605 | case SECCOMP_RET_TRAP: | ||
606 | /* Show the handler the original registers. */ | ||
607 | syscall_rollback(current, regs); | ||
608 | /* Let the filter pass back 16 bits of data. */ | ||
609 | seccomp_send_sigsys(this_syscall, data); | ||
610 | goto skip; | ||
611 | case SECCOMP_RET_TRACE: | ||
612 | /* Skip these calls if there is no tracer. */ | ||
613 | if (!ptrace_event_enabled(current, PTRACE_EVENT_SECCOMP)) { | ||
614 | syscall_set_return_value(current, regs, | ||
615 | -ENOSYS, 0); | ||
616 | goto skip; | ||
617 | } | ||
618 | /* Allow the BPF to provide the event message */ | ||
619 | ptrace_event(PTRACE_EVENT_SECCOMP, data); | ||
620 | /* | ||
621 | * The delivery of a fatal signal during event | ||
622 | * notification may silently skip tracer notification. | ||
623 | * Terminating the task now avoids executing a system | ||
624 | * call that may not be intended. | ||
625 | */ | ||
626 | if (fatal_signal_pending(current)) | ||
627 | break; | ||
628 | if (syscall_get_nr(current, regs) < 0) | ||
629 | goto skip; /* Explicit request to skip. */ | ||
630 | |||
631 | return 0; | ||
632 | case SECCOMP_RET_ALLOW: | ||
633 | return 0; | ||
634 | case SECCOMP_RET_KILL: | ||
635 | default: | ||
636 | break; | ||
637 | } | ||
638 | exit_sig = SIGSYS; | ||
639 | break; | ||
640 | } | ||
641 | #endif | 700 | #endif |
642 | default: | 701 | default: |
643 | BUG(); | 702 | BUG(); |
644 | } | 703 | } |
704 | } | ||
645 | 705 | ||
646 | #ifdef SECCOMP_DEBUG | 706 | /** |
647 | dump_stack(); | 707 | * seccomp_phase2() - finish slow path seccomp work for the current syscall |
648 | #endif | 708 | * @phase1_result: The return value from seccomp_phase1() |
649 | audit_seccomp(this_syscall, exit_sig, ret); | 709 | * |
650 | do_exit(exit_sig); | 710 | * This must be called from a context in which ptrace hooks can be used. |
651 | #ifdef CONFIG_SECCOMP_FILTER | 711 | * |
652 | skip: | 712 | * Returns 0 if the syscall should be processed or -1 to skip the syscall. |
653 | audit_seccomp(this_syscall, exit_sig, ret); | 713 | */ |
654 | #endif | 714 | int seccomp_phase2(u32 phase1_result) |
655 | return -1; | 715 | { |
716 | struct pt_regs *regs = task_pt_regs(current); | ||
717 | u32 action = phase1_result & SECCOMP_RET_ACTION; | ||
718 | int data = phase1_result & SECCOMP_RET_DATA; | ||
719 | |||
720 | BUG_ON(action != SECCOMP_RET_TRACE); | ||
721 | |||
722 | audit_seccomp(syscall_get_nr(current, regs), 0, action); | ||
723 | |||
724 | /* Skip these calls if there is no tracer. */ | ||
725 | if (!ptrace_event_enabled(current, PTRACE_EVENT_SECCOMP)) { | ||
726 | syscall_set_return_value(current, regs, | ||
727 | -ENOSYS, 0); | ||
728 | return -1; | ||
729 | } | ||
730 | |||
731 | /* Allow the BPF to provide the event message */ | ||
732 | ptrace_event(PTRACE_EVENT_SECCOMP, data); | ||
733 | /* | ||
734 | * The delivery of a fatal signal during event | ||
735 | * notification may silently skip tracer notification. | ||
736 | * Terminating the task now avoids executing a system | ||
737 | * call that may not be intended. | ||
738 | */ | ||
739 | if (fatal_signal_pending(current)) | ||
740 | do_exit(SIGSYS); | ||
741 | if (syscall_get_nr(current, regs) < 0) | ||
742 | return -1; /* Explicit request to skip. */ | ||
743 | |||
744 | return 0; | ||
656 | } | 745 | } |
746 | #endif /* CONFIG_HAVE_ARCH_SECCOMP_FILTER */ | ||
657 | 747 | ||
658 | long prctl_get_seccomp(void) | 748 | long prctl_get_seccomp(void) |
659 | { | 749 | { |