diff options
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/seccomp.c | 252 |
1 files changed, 171 insertions, 81 deletions
diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 84922befea84..4ef9687ac115 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c | |||
| @@ -21,10 +21,11 @@ | |||
| 21 | #include <linux/slab.h> | 21 | #include <linux/slab.h> |
| 22 | #include <linux/syscalls.h> | 22 | #include <linux/syscalls.h> |
| 23 | 23 | ||
| 24 | /* #define SECCOMP_DEBUG 1 */ | 24 | #ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER |
| 25 | #include <asm/syscall.h> | ||
| 26 | #endif | ||
| 25 | 27 | ||
| 26 | #ifdef CONFIG_SECCOMP_FILTER | 28 | #ifdef CONFIG_SECCOMP_FILTER |
| 27 | #include <asm/syscall.h> | ||
| 28 | #include <linux/filter.h> | 29 | #include <linux/filter.h> |
| 29 | #include <linux/pid.h> | 30 | #include <linux/pid.h> |
| 30 | #include <linux/ptrace.h> | 31 | #include <linux/ptrace.h> |
| @@ -172,10 +173,10 @@ static int seccomp_check_filter(struct sock_filter *filter, unsigned int flen) | |||
| 172 | * | 173 | * |
| 173 | * Returns valid seccomp BPF response codes. | 174 | * Returns valid seccomp BPF response codes. |
| 174 | */ | 175 | */ |
| 175 | static u32 seccomp_run_filters(int syscall) | 176 | static u32 seccomp_run_filters(struct seccomp_data *sd) |
| 176 | { | 177 | { |
| 177 | struct seccomp_filter *f = ACCESS_ONCE(current->seccomp.filter); | 178 | struct seccomp_filter *f = ACCESS_ONCE(current->seccomp.filter); |
| 178 | struct seccomp_data sd; | 179 | struct seccomp_data sd_local; |
| 179 | u32 ret = SECCOMP_RET_ALLOW; | 180 | u32 ret = SECCOMP_RET_ALLOW; |
| 180 | 181 | ||
| 181 | /* Ensure unexpected behavior doesn't result in failing open. */ | 182 | /* Ensure unexpected behavior doesn't result in failing open. */ |
| @@ -185,14 +186,17 @@ static u32 seccomp_run_filters(int syscall) | |||
| 185 | /* Make sure cross-thread synced filter points somewhere sane. */ | 186 | /* Make sure cross-thread synced filter points somewhere sane. */ |
| 186 | smp_read_barrier_depends(); | 187 | smp_read_barrier_depends(); |
| 187 | 188 | ||
| 188 | populate_seccomp_data(&sd); | 189 | if (!sd) { |
| 190 | populate_seccomp_data(&sd_local); | ||
| 191 | sd = &sd_local; | ||
| 192 | } | ||
| 189 | 193 | ||
| 190 | /* | 194 | /* |
| 191 | * All filters in the list are evaluated and the lowest BPF return | 195 | * All filters in the list are evaluated and the lowest BPF return |
| 192 | * value always takes priority (ignoring the DATA). | 196 | * value always takes priority (ignoring the DATA). |
| 193 | */ | 197 | */ |
| 194 | for (; f; f = f->prev) { | 198 | for (; f; f = f->prev) { |
| 195 | u32 cur_ret = BPF_PROG_RUN(f->prog, (void *)&sd); | 199 | u32 cur_ret = BPF_PROG_RUN(f->prog, (void *)sd); |
| 196 | 200 | ||
| 197 | if ((cur_ret & SECCOMP_RET_ACTION) < (ret & SECCOMP_RET_ACTION)) | 201 | if ((cur_ret & SECCOMP_RET_ACTION) < (ret & SECCOMP_RET_ACTION)) |
| 198 | ret = cur_ret; | 202 | ret = cur_ret; |
| @@ -563,11 +567,55 @@ static int mode1_syscalls_32[] = { | |||
| 563 | }; | 567 | }; |
| 564 | #endif | 568 | #endif |
| 565 | 569 | ||
| 566 | int __secure_computing(int this_syscall) | 570 | static void __secure_computing_strict(int this_syscall) |
| 571 | { | ||
| 572 | int *syscall_whitelist = mode1_syscalls; | ||
| 573 | #ifdef CONFIG_COMPAT | ||
| 574 | if (is_compat_task()) | ||
| 575 | syscall_whitelist = mode1_syscalls_32; | ||
| 576 | #endif | ||
| 577 | do { | ||
| 578 | if (*syscall_whitelist == this_syscall) | ||
| 579 | return; | ||
| 580 | } while (*++syscall_whitelist); | ||
| 581 | |||
| 582 | #ifdef SECCOMP_DEBUG | ||
| 583 | dump_stack(); | ||
| 584 | #endif | ||
| 585 | audit_seccomp(this_syscall, SIGKILL, SECCOMP_RET_KILL); | ||
| 586 | do_exit(SIGKILL); | ||
| 587 | } | ||
| 588 | |||
| 589 | #ifndef CONFIG_HAVE_ARCH_SECCOMP_FILTER | ||
| 590 | void secure_computing_strict(int this_syscall) | ||
| 591 | { | ||
| 592 | int mode = current->seccomp.mode; | ||
| 593 | |||
| 594 | if (mode == 0) | ||
| 595 | return; | ||
| 596 | else if (mode == SECCOMP_MODE_STRICT) | ||
| 597 | __secure_computing_strict(this_syscall); | ||
| 598 | else | ||
| 599 | BUG(); | ||
| 600 | } | ||
| 601 | #else | ||
| 602 | int __secure_computing(void) | ||
| 567 | { | 603 | { |
| 568 | int exit_sig = 0; | 604 | u32 phase1_result = seccomp_phase1(NULL); |
| 569 | int *syscall; | 605 | |
| 570 | u32 ret; | 606 | if (likely(phase1_result == SECCOMP_PHASE1_OK)) |
| 607 | return 0; | ||
| 608 | else if (likely(phase1_result == SECCOMP_PHASE1_SKIP)) | ||
| 609 | return -1; | ||
| 610 | else | ||
| 611 | return seccomp_phase2(phase1_result); | ||
| 612 | } | ||
| 613 | |||
| 614 | #ifdef CONFIG_SECCOMP_FILTER | ||
| 615 | static u32 __seccomp_phase1_filter(int this_syscall, struct seccomp_data *sd) | ||
| 616 | { | ||
| 617 | u32 filter_ret, action; | ||
| 618 | int data; | ||
| 571 | 619 | ||
| 572 | /* | 620 | /* |
| 573 | * Make sure that any changes to mode from another thread have | 621 | * Make sure that any changes to mode from another thread have |
| @@ -575,85 +623,127 @@ int __secure_computing(int this_syscall) | |||
| 575 | */ | 623 | */ |
| 576 | rmb(); | 624 | rmb(); |
| 577 | 625 | ||
| 578 | switch (current->seccomp.mode) { | 626 | filter_ret = seccomp_run_filters(sd); |
| 579 | case SECCOMP_MODE_STRICT: | 627 | data = filter_ret & SECCOMP_RET_DATA; |
| 580 | syscall = mode1_syscalls; | 628 | action = filter_ret & SECCOMP_RET_ACTION; |
| 581 | #ifdef CONFIG_COMPAT | 629 | |
| 582 | if (is_compat_task()) | 630 | switch (action) { |
| 583 | syscall = mode1_syscalls_32; | 631 | case SECCOMP_RET_ERRNO: |
| 632 | /* Set the low-order 16-bits as a errno. */ | ||
| 633 | syscall_set_return_value(current, task_pt_regs(current), | ||
| 634 | -data, 0); | ||
| 635 | goto skip; | ||
| 636 | |||
| 637 | case SECCOMP_RET_TRAP: | ||
| 638 | /* Show the handler the original registers. */ | ||
| 639 | syscall_rollback(current, task_pt_regs(current)); | ||
| 640 | /* Let the filter pass back 16 bits of data. */ | ||
| 641 | seccomp_send_sigsys(this_syscall, data); | ||
| 642 | goto skip; | ||
| 643 | |||
| 644 | case SECCOMP_RET_TRACE: | ||
| 645 | return filter_ret; /* Save the rest for phase 2. */ | ||
| 646 | |||
| 647 | case SECCOMP_RET_ALLOW: | ||
| 648 | return SECCOMP_PHASE1_OK; | ||
| 649 | |||
| 650 | case SECCOMP_RET_KILL: | ||
| 651 | default: | ||
| 652 | audit_seccomp(this_syscall, SIGSYS, action); | ||
| 653 | do_exit(SIGSYS); | ||
| 654 | } | ||
| 655 | |||
| 656 | unreachable(); | ||
| 657 | |||
| 658 | skip: | ||
| 659 | audit_seccomp(this_syscall, 0, action); | ||
| 660 | return SECCOMP_PHASE1_SKIP; | ||
| 661 | } | ||
| 584 | #endif | 662 | #endif |
| 585 | do { | 663 | |
| 586 | if (*syscall == this_syscall) | 664 | /** |
| 587 | return 0; | 665 | * seccomp_phase1() - run fast path seccomp checks on the current syscall |
| 588 | } while (*++syscall); | 666 | * @arg sd: The seccomp_data or NULL |
| 589 | exit_sig = SIGKILL; | 667 | * |
| 590 | ret = SECCOMP_RET_KILL; | 668 | * This only reads pt_regs via the syscall_xyz helpers. The only change |
| 591 | break; | 669 | * it will make to pt_regs is via syscall_set_return_value, and it will |
| 670 | * only do that if it returns SECCOMP_PHASE1_SKIP. | ||
| 671 | * | ||
| 672 | * If sd is provided, it will not read pt_regs at all. | ||
| 673 | * | ||
| 674 | * It may also call do_exit or force a signal; these actions must be | ||
| 675 | * safe. | ||
| 676 | * | ||
| 677 | * If it returns SECCOMP_PHASE1_OK, the syscall passes checks and should | ||
| 678 | * be processed normally. | ||
| 679 | * | ||
| 680 | * If it returns SECCOMP_PHASE1_SKIP, then the syscall should not be | ||
| 681 | * invoked. In this case, seccomp_phase1 will have set the return value | ||
| 682 | * using syscall_set_return_value. | ||
| 683 | * | ||
| 684 | * If it returns anything else, then the return value should be passed | ||
| 685 | * to seccomp_phase2 from a context in which ptrace hooks are safe. | ||
| 686 | */ | ||
| 687 | u32 seccomp_phase1(struct seccomp_data *sd) | ||
| 688 | { | ||
| 689 | int mode = current->seccomp.mode; | ||
| 690 | int this_syscall = sd ? sd->nr : | ||
| 691 | syscall_get_nr(current, task_pt_regs(current)); | ||
| 692 | |||
| 693 | switch (mode) { | ||
| 694 | case SECCOMP_MODE_STRICT: | ||
| 695 | __secure_computing_strict(this_syscall); /* may call do_exit */ | ||
| 696 | return SECCOMP_PHASE1_OK; | ||
| 592 | #ifdef CONFIG_SECCOMP_FILTER | 697 | #ifdef CONFIG_SECCOMP_FILTER |
| 593 | case SECCOMP_MODE_FILTER: { | 698 | case SECCOMP_MODE_FILTER: |
| 594 | int data; | 699 | return __seccomp_phase1_filter(this_syscall, sd); |
| 595 | struct pt_regs *regs = task_pt_regs(current); | ||
| 596 | ret = seccomp_run_filters(this_syscall); | ||
| 597 | data = ret & SECCOMP_RET_DATA; | ||
| 598 | ret &= SECCOMP_RET_ACTION; | ||
| 599 | switch (ret) { | ||
| 600 | case SECCOMP_RET_ERRNO: | ||
| 601 | /* Set the low-order 16-bits as a errno. */ | ||
| 602 | syscall_set_return_value(current, regs, | ||
| 603 | -data, 0); | ||
| 604 | goto skip; | ||
| 605 | case SECCOMP_RET_TRAP: | ||
| 606 | /* Show the handler the original registers. */ | ||
| 607 | syscall_rollback(current, regs); | ||
| 608 | /* Let the filter pass back 16 bits of data. */ | ||
| 609 | seccomp_send_sigsys(this_syscall, data); | ||
| 610 | goto skip; | ||
| 611 | case SECCOMP_RET_TRACE: | ||
| 612 | /* Skip these calls if there is no tracer. */ | ||
| 613 | if (!ptrace_event_enabled(current, PTRACE_EVENT_SECCOMP)) { | ||
| 614 | syscall_set_return_value(current, regs, | ||
| 615 | -ENOSYS, 0); | ||
| 616 | goto skip; | ||
| 617 | } | ||
| 618 | /* Allow the BPF to provide the event message */ | ||
| 619 | ptrace_event(PTRACE_EVENT_SECCOMP, data); | ||
| 620 | /* | ||
| 621 | * The delivery of a fatal signal during event | ||
| 622 | * notification may silently skip tracer notification. | ||
| 623 | * Terminating the task now avoids executing a system | ||
| 624 | * call that may not be intended. | ||
| 625 | */ | ||
| 626 | if (fatal_signal_pending(current)) | ||
| 627 | break; | ||
| 628 | if (syscall_get_nr(current, regs) < 0) | ||
| 629 | goto skip; /* Explicit request to skip. */ | ||
| 630 | |||
| 631 | return 0; | ||
| 632 | case SECCOMP_RET_ALLOW: | ||
| 633 | return 0; | ||
| 634 | case SECCOMP_RET_KILL: | ||
| 635 | default: | ||
| 636 | break; | ||
| 637 | } | ||
| 638 | exit_sig = SIGSYS; | ||
| 639 | break; | ||
| 640 | } | ||
| 641 | #endif | 700 | #endif |
| 642 | default: | 701 | default: |
| 643 | BUG(); | 702 | BUG(); |
| 644 | } | 703 | } |
| 704 | } | ||
| 645 | 705 | ||
| 646 | #ifdef SECCOMP_DEBUG | 706 | /** |
| 647 | dump_stack(); | 707 | * seccomp_phase2() - finish slow path seccomp work for the current syscall |
| 648 | #endif | 708 | * @phase1_result: The return value from seccomp_phase1() |
| 649 | audit_seccomp(this_syscall, exit_sig, ret); | 709 | * |
| 650 | do_exit(exit_sig); | 710 | * This must be called from a context in which ptrace hooks can be used. |
| 651 | #ifdef CONFIG_SECCOMP_FILTER | 711 | * |
| 652 | skip: | 712 | * Returns 0 if the syscall should be processed or -1 to skip the syscall. |
| 653 | audit_seccomp(this_syscall, exit_sig, ret); | 713 | */ |
| 654 | #endif | 714 | int seccomp_phase2(u32 phase1_result) |
| 655 | return -1; | 715 | { |
| 716 | struct pt_regs *regs = task_pt_regs(current); | ||
| 717 | u32 action = phase1_result & SECCOMP_RET_ACTION; | ||
| 718 | int data = phase1_result & SECCOMP_RET_DATA; | ||
| 719 | |||
| 720 | BUG_ON(action != SECCOMP_RET_TRACE); | ||
| 721 | |||
| 722 | audit_seccomp(syscall_get_nr(current, regs), 0, action); | ||
| 723 | |||
| 724 | /* Skip these calls if there is no tracer. */ | ||
| 725 | if (!ptrace_event_enabled(current, PTRACE_EVENT_SECCOMP)) { | ||
| 726 | syscall_set_return_value(current, regs, | ||
| 727 | -ENOSYS, 0); | ||
| 728 | return -1; | ||
| 729 | } | ||
| 730 | |||
| 731 | /* Allow the BPF to provide the event message */ | ||
| 732 | ptrace_event(PTRACE_EVENT_SECCOMP, data); | ||
| 733 | /* | ||
| 734 | * The delivery of a fatal signal during event | ||
| 735 | * notification may silently skip tracer notification. | ||
| 736 | * Terminating the task now avoids executing a system | ||
| 737 | * call that may not be intended. | ||
| 738 | */ | ||
| 739 | if (fatal_signal_pending(current)) | ||
| 740 | do_exit(SIGSYS); | ||
| 741 | if (syscall_get_nr(current, regs) < 0) | ||
| 742 | return -1; /* Explicit request to skip. */ | ||
| 743 | |||
| 744 | return 0; | ||
| 656 | } | 745 | } |
| 746 | #endif /* CONFIG_HAVE_ARCH_SECCOMP_FILTER */ | ||
| 657 | 747 | ||
| 658 | long prctl_get_seccomp(void) | 748 | long prctl_get_seccomp(void) |
| 659 | { | 749 | { |
