aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/bpf/cgroup.c
diff options
context:
space:
mode:
authorAndrey Ignatov <rdna@fb.com>2019-02-27 15:59:24 -0500
committerAlexei Starovoitov <ast@kernel.org>2019-04-12 16:54:58 -0400
commit7b146cebe30cb481b0f70d85779da938da818637 (patch)
tree11dbbeb42b32557d345e6dac2baf2881cbe5adb5 /kernel/bpf/cgroup.c
parentb1cd609d9b517f01867c211bd520cc805db3068a (diff)
bpf: Sysctl hook
Containerized applications may run as root and it may create problems for whole host. Specifically such applications may change a sysctl and affect applications in other containers. Furthermore in existing infrastructure it may not be possible to just completely disable writing to sysctl, instead such a process should be gradual with ability to log what sysctl are being changed by a container, investigate, limit the set of writable sysctl to currently used ones (so that new ones can not be changed) and eventually reduce this set to zero. The patch introduces new program type BPF_PROG_TYPE_CGROUP_SYSCTL and attach type BPF_CGROUP_SYSCTL to solve these problems on cgroup basis. New program type has access to following minimal context: struct bpf_sysctl { __u32 write; }; Where @write indicates whether sysctl is being read (= 0) or written (= 1). Helpers to access sysctl name and value will be introduced separately. BPF_CGROUP_SYSCTL attach point is added to sysctl code right before passing control to ctl_table->proc_handler so that BPF program can either allow or deny access to sysctl. Suggested-by: Roman Gushchin <guro@fb.com> Signed-off-by: Andrey Ignatov <rdna@fb.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Diffstat (limited to 'kernel/bpf/cgroup.c')
-rw-r--r--kernel/bpf/cgroup.c92
1 files changed, 92 insertions, 0 deletions
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index f6cd38746df2..610491b5f0aa 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -11,7 +11,9 @@
11#include <linux/kernel.h> 11#include <linux/kernel.h>
12#include <linux/atomic.h> 12#include <linux/atomic.h>
13#include <linux/cgroup.h> 13#include <linux/cgroup.h>
14#include <linux/filter.h>
14#include <linux/slab.h> 15#include <linux/slab.h>
16#include <linux/sysctl.h>
15#include <linux/bpf.h> 17#include <linux/bpf.h>
16#include <linux/bpf-cgroup.h> 18#include <linux/bpf-cgroup.h>
17#include <net/sock.h> 19#include <net/sock.h>
@@ -768,3 +770,93 @@ const struct bpf_verifier_ops cg_dev_verifier_ops = {
768 .get_func_proto = cgroup_dev_func_proto, 770 .get_func_proto = cgroup_dev_func_proto,
769 .is_valid_access = cgroup_dev_is_valid_access, 771 .is_valid_access = cgroup_dev_is_valid_access,
770}; 772};
773
774/**
775 * __cgroup_bpf_run_filter_sysctl - Run a program on sysctl
776 *
777 * @head: sysctl table header
778 * @table: sysctl table
779 * @write: sysctl is being read (= 0) or written (= 1)
780 * @type: type of program to be executed
781 *
782 * Program is run when sysctl is being accessed, either read or written, and
783 * can allow or deny such access.
784 *
785 * This function will return %-EPERM if an attached program is found and
786 * returned value != 1 during execution. In all other cases 0 is returned.
787 */
788int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
789 struct ctl_table *table, int write,
790 enum bpf_attach_type type)
791{
792 struct bpf_sysctl_kern ctx = {
793 .head = head,
794 .table = table,
795 .write = write,
796 };
797 struct cgroup *cgrp;
798 int ret;
799
800 rcu_read_lock();
801 cgrp = task_dfl_cgroup(current);
802 ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], &ctx, BPF_PROG_RUN);
803 rcu_read_unlock();
804
805 return ret == 1 ? 0 : -EPERM;
806}
807EXPORT_SYMBOL(__cgroup_bpf_run_filter_sysctl);
808
809static const struct bpf_func_proto *
810sysctl_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
811{
812 return cgroup_base_func_proto(func_id, prog);
813}
814
815static bool sysctl_is_valid_access(int off, int size, enum bpf_access_type type,
816 const struct bpf_prog *prog,
817 struct bpf_insn_access_aux *info)
818{
819 const int size_default = sizeof(__u32);
820
821 if (off < 0 || off + size > sizeof(struct bpf_sysctl) ||
822 off % size || type != BPF_READ)
823 return false;
824
825 switch (off) {
826 case offsetof(struct bpf_sysctl, write):
827 bpf_ctx_record_field_size(info, size_default);
828 return bpf_ctx_narrow_access_ok(off, size, size_default);
829 default:
830 return false;
831 }
832}
833
834static u32 sysctl_convert_ctx_access(enum bpf_access_type type,
835 const struct bpf_insn *si,
836 struct bpf_insn *insn_buf,
837 struct bpf_prog *prog, u32 *target_size)
838{
839 struct bpf_insn *insn = insn_buf;
840
841 switch (si->off) {
842 case offsetof(struct bpf_sysctl, write):
843 *insn++ = BPF_LDX_MEM(
844 BPF_SIZE(si->code), si->dst_reg, si->src_reg,
845 bpf_target_off(struct bpf_sysctl_kern, write,
846 FIELD_SIZEOF(struct bpf_sysctl_kern,
847 write),
848 target_size));
849 break;
850 }
851
852 return insn - insn_buf;
853}
854
855const struct bpf_verifier_ops cg_sysctl_verifier_ops = {
856 .get_func_proto = sysctl_func_proto,
857 .is_valid_access = sysctl_is_valid_access,
858 .convert_ctx_access = sysctl_convert_ctx_access,
859};
860
861const struct bpf_prog_ops cg_sysctl_prog_ops = {
862};