aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndrey Ignatov <rdna@fb.com>2019-02-27 15:59:24 -0500
committerAlexei Starovoitov <ast@kernel.org>2019-04-12 16:54:58 -0400
commit7b146cebe30cb481b0f70d85779da938da818637 (patch)
tree11dbbeb42b32557d345e6dac2baf2881cbe5adb5
parentb1cd609d9b517f01867c211bd520cc805db3068a (diff)
bpf: Sysctl hook
Containerized applications may run as root and it may create problems for whole host. Specifically such applications may change a sysctl and affect applications in other containers. Furthermore in existing infrastructure it may not be possible to just completely disable writing to sysctl, instead such a process should be gradual with ability to log what sysctl are being changed by a container, investigate, limit the set of writable sysctl to currently used ones (so that new ones can not be changed) and eventually reduce this set to zero. The patch introduces new program type BPF_PROG_TYPE_CGROUP_SYSCTL and attach type BPF_CGROUP_SYSCTL to solve these problems on cgroup basis. New program type has access to following minimal context: struct bpf_sysctl { __u32 write; }; Where @write indicates whether sysctl is being read (= 0) or written (= 1). Helpers to access sysctl name and value will be introduced separately. BPF_CGROUP_SYSCTL attach point is added to sysctl code right before passing control to ctl_table->proc_handler so that BPF program can either allow or deny access to sysctl. Suggested-by: Roman Gushchin <guro@fb.com> Signed-off-by: Andrey Ignatov <rdna@fb.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org>
-rw-r--r--fs/proc/proc_sysctl.c5
-rw-r--r--include/linux/bpf-cgroup.h18
-rw-r--r--include/linux/bpf_types.h1
-rw-r--r--include/linux/filter.h8
-rw-r--r--include/uapi/linux/bpf.h9
-rw-r--r--kernel/bpf/cgroup.c92
-rw-r--r--kernel/bpf/syscall.c7
-rw-r--r--kernel/bpf/verifier.c1
8 files changed, 141 insertions, 0 deletions
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index d65390727541..e01b02150340 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -13,6 +13,7 @@
13#include <linux/namei.h> 13#include <linux/namei.h>
14#include <linux/mm.h> 14#include <linux/mm.h>
15#include <linux/module.h> 15#include <linux/module.h>
16#include <linux/bpf-cgroup.h>
16#include "internal.h" 17#include "internal.h"
17 18
18static const struct dentry_operations proc_sys_dentry_operations; 19static const struct dentry_operations proc_sys_dentry_operations;
@@ -588,6 +589,10 @@ static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf,
588 if (!table->proc_handler) 589 if (!table->proc_handler)
589 goto out; 590 goto out;
590 591
592 error = BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write);
593 if (error)
594 goto out;
595
591 /* careful: calling conventions are nasty here */ 596 /* careful: calling conventions are nasty here */
592 res = count; 597 res = count;
593 error = table->proc_handler(table, write, buf, &res, ppos); 598 error = table->proc_handler(table, write, buf, &res, ppos);
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index a4c644c1c091..b1c45da20a26 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -17,6 +17,8 @@ struct bpf_map;
17struct bpf_prog; 17struct bpf_prog;
18struct bpf_sock_ops_kern; 18struct bpf_sock_ops_kern;
19struct bpf_cgroup_storage; 19struct bpf_cgroup_storage;
20struct ctl_table;
21struct ctl_table_header;
20 22
21#ifdef CONFIG_CGROUP_BPF 23#ifdef CONFIG_CGROUP_BPF
22 24
@@ -109,6 +111,10 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
109int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor, 111int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
110 short access, enum bpf_attach_type type); 112 short access, enum bpf_attach_type type);
111 113
114int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
115 struct ctl_table *table, int write,
116 enum bpf_attach_type type);
117
112static inline enum bpf_cgroup_storage_type cgroup_storage_type( 118static inline enum bpf_cgroup_storage_type cgroup_storage_type(
113 struct bpf_map *map) 119 struct bpf_map *map)
114{ 120{
@@ -253,6 +259,17 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
253 \ 259 \
254 __ret; \ 260 __ret; \
255}) 261})
262
263
264#define BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write) \
265({ \
266 int __ret = 0; \
267 if (cgroup_bpf_enabled) \
268 __ret = __cgroup_bpf_run_filter_sysctl(head, table, write, \
269 BPF_CGROUP_SYSCTL); \
270 __ret; \
271})
272
256int cgroup_bpf_prog_attach(const union bpf_attr *attr, 273int cgroup_bpf_prog_attach(const union bpf_attr *attr,
257 enum bpf_prog_type ptype, struct bpf_prog *prog); 274 enum bpf_prog_type ptype, struct bpf_prog *prog);
258int cgroup_bpf_prog_detach(const union bpf_attr *attr, 275int cgroup_bpf_prog_detach(const union bpf_attr *attr,
@@ -321,6 +338,7 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map,
321#define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, t_ctx) ({ 0; }) 338#define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, t_ctx) ({ 0; })
322#define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; }) 339#define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; })
323#define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type,major,minor,access) ({ 0; }) 340#define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type,major,minor,access) ({ 0; })
341#define BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write) ({ 0; })
324 342
325#define for_each_cgroup_storage_type(stype) for (; false; ) 343#define for_each_cgroup_storage_type(stype) for (; false; )
326 344
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index 08bf2f1fe553..d26991a16894 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -28,6 +28,7 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT, raw_tracepoint)
28#endif 28#endif
29#ifdef CONFIG_CGROUP_BPF 29#ifdef CONFIG_CGROUP_BPF
30BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_DEVICE, cg_dev) 30BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_DEVICE, cg_dev)
31BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SYSCTL, cg_sysctl)
31#endif 32#endif
32#ifdef CONFIG_BPF_LIRC_MODE2 33#ifdef CONFIG_BPF_LIRC_MODE2
33BPF_PROG_TYPE(BPF_PROG_TYPE_LIRC_MODE2, lirc_mode2) 34BPF_PROG_TYPE(BPF_PROG_TYPE_LIRC_MODE2, lirc_mode2)
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 6074aa064b54..a17732057880 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -33,6 +33,8 @@ struct bpf_prog_aux;
33struct xdp_rxq_info; 33struct xdp_rxq_info;
34struct xdp_buff; 34struct xdp_buff;
35struct sock_reuseport; 35struct sock_reuseport;
36struct ctl_table;
37struct ctl_table_header;
36 38
37/* ArgX, context and stack frame pointer register positions. Note, 39/* ArgX, context and stack frame pointer register positions. Note,
38 * Arg1, Arg2, Arg3, etc are used as argument mappings of function 40 * Arg1, Arg2, Arg3, etc are used as argument mappings of function
@@ -1177,4 +1179,10 @@ struct bpf_sock_ops_kern {
1177 */ 1179 */
1178}; 1180};
1179 1181
1182struct bpf_sysctl_kern {
1183 struct ctl_table_header *head;
1184 struct ctl_table *table;
1185 int write;
1186};
1187
1180#endif /* __LINUX_FILTER_H__ */ 1188#endif /* __LINUX_FILTER_H__ */
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 2e96d0b4bf65..cc2a2466d5f3 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -167,6 +167,7 @@ enum bpf_prog_type {
167 BPF_PROG_TYPE_LIRC_MODE2, 167 BPF_PROG_TYPE_LIRC_MODE2,
168 BPF_PROG_TYPE_SK_REUSEPORT, 168 BPF_PROG_TYPE_SK_REUSEPORT,
169 BPF_PROG_TYPE_FLOW_DISSECTOR, 169 BPF_PROG_TYPE_FLOW_DISSECTOR,
170 BPF_PROG_TYPE_CGROUP_SYSCTL,
170}; 171};
171 172
172enum bpf_attach_type { 173enum bpf_attach_type {
@@ -188,6 +189,7 @@ enum bpf_attach_type {
188 BPF_CGROUP_UDP6_SENDMSG, 189 BPF_CGROUP_UDP6_SENDMSG,
189 BPF_LIRC_MODE2, 190 BPF_LIRC_MODE2,
190 BPF_FLOW_DISSECTOR, 191 BPF_FLOW_DISSECTOR,
192 BPF_CGROUP_SYSCTL,
191 __MAX_BPF_ATTACH_TYPE 193 __MAX_BPF_ATTACH_TYPE
192}; 194};
193 195
@@ -3308,4 +3310,11 @@ struct bpf_line_info {
3308struct bpf_spin_lock { 3310struct bpf_spin_lock {
3309 __u32 val; 3311 __u32 val;
3310}; 3312};
3313
3314struct bpf_sysctl {
3315 __u32 write; /* Sysctl is being read (= 0) or written (= 1).
3316 * Allows 1,2,4-byte read, but no write.
3317 */
3318};
3319
3311#endif /* _UAPI__LINUX_BPF_H__ */ 3320#endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index f6cd38746df2..610491b5f0aa 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -11,7 +11,9 @@
11#include <linux/kernel.h> 11#include <linux/kernel.h>
12#include <linux/atomic.h> 12#include <linux/atomic.h>
13#include <linux/cgroup.h> 13#include <linux/cgroup.h>
14#include <linux/filter.h>
14#include <linux/slab.h> 15#include <linux/slab.h>
16#include <linux/sysctl.h>
15#include <linux/bpf.h> 17#include <linux/bpf.h>
16#include <linux/bpf-cgroup.h> 18#include <linux/bpf-cgroup.h>
17#include <net/sock.h> 19#include <net/sock.h>
@@ -768,3 +770,93 @@ const struct bpf_verifier_ops cg_dev_verifier_ops = {
768 .get_func_proto = cgroup_dev_func_proto, 770 .get_func_proto = cgroup_dev_func_proto,
769 .is_valid_access = cgroup_dev_is_valid_access, 771 .is_valid_access = cgroup_dev_is_valid_access,
770}; 772};
773
774/**
775 * __cgroup_bpf_run_filter_sysctl - Run a program on sysctl
776 *
777 * @head: sysctl table header
778 * @table: sysctl table
779 * @write: sysctl is being read (= 0) or written (= 1)
780 * @type: type of program to be executed
781 *
782 * Program is run when sysctl is being accessed, either read or written, and
783 * can allow or deny such access.
784 *
785 * This function will return %-EPERM if an attached program is found and
786 * returned value != 1 during execution. In all other cases 0 is returned.
787 */
788int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
789 struct ctl_table *table, int write,
790 enum bpf_attach_type type)
791{
792 struct bpf_sysctl_kern ctx = {
793 .head = head,
794 .table = table,
795 .write = write,
796 };
797 struct cgroup *cgrp;
798 int ret;
799
800 rcu_read_lock();
801 cgrp = task_dfl_cgroup(current);
802 ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], &ctx, BPF_PROG_RUN);
803 rcu_read_unlock();
804
805 return ret == 1 ? 0 : -EPERM;
806}
807EXPORT_SYMBOL(__cgroup_bpf_run_filter_sysctl);
808
809static const struct bpf_func_proto *
810sysctl_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
811{
812 return cgroup_base_func_proto(func_id, prog);
813}
814
815static bool sysctl_is_valid_access(int off, int size, enum bpf_access_type type,
816 const struct bpf_prog *prog,
817 struct bpf_insn_access_aux *info)
818{
819 const int size_default = sizeof(__u32);
820
821 if (off < 0 || off + size > sizeof(struct bpf_sysctl) ||
822 off % size || type != BPF_READ)
823 return false;
824
825 switch (off) {
826 case offsetof(struct bpf_sysctl, write):
827 bpf_ctx_record_field_size(info, size_default);
828 return bpf_ctx_narrow_access_ok(off, size, size_default);
829 default:
830 return false;
831 }
832}
833
834static u32 sysctl_convert_ctx_access(enum bpf_access_type type,
835 const struct bpf_insn *si,
836 struct bpf_insn *insn_buf,
837 struct bpf_prog *prog, u32 *target_size)
838{
839 struct bpf_insn *insn = insn_buf;
840
841 switch (si->off) {
842 case offsetof(struct bpf_sysctl, write):
843 *insn++ = BPF_LDX_MEM(
844 BPF_SIZE(si->code), si->dst_reg, si->src_reg,
845 bpf_target_off(struct bpf_sysctl_kern, write,
846 FIELD_SIZEOF(struct bpf_sysctl_kern,
847 write),
848 target_size));
849 break;
850 }
851
852 return insn - insn_buf;
853}
854
855const struct bpf_verifier_ops cg_sysctl_verifier_ops = {
856 .get_func_proto = sysctl_func_proto,
857 .is_valid_access = sysctl_is_valid_access,
858 .convert_ctx_access = sysctl_convert_ctx_access,
859};
860
861const struct bpf_prog_ops cg_sysctl_prog_ops = {
862};
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index d995eedfdd16..92c9b8a32b50 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1888,6 +1888,9 @@ static int bpf_prog_attach(const union bpf_attr *attr)
1888 case BPF_FLOW_DISSECTOR: 1888 case BPF_FLOW_DISSECTOR:
1889 ptype = BPF_PROG_TYPE_FLOW_DISSECTOR; 1889 ptype = BPF_PROG_TYPE_FLOW_DISSECTOR;
1890 break; 1890 break;
1891 case BPF_CGROUP_SYSCTL:
1892 ptype = BPF_PROG_TYPE_CGROUP_SYSCTL;
1893 break;
1891 default: 1894 default:
1892 return -EINVAL; 1895 return -EINVAL;
1893 } 1896 }
@@ -1966,6 +1969,9 @@ static int bpf_prog_detach(const union bpf_attr *attr)
1966 return lirc_prog_detach(attr); 1969 return lirc_prog_detach(attr);
1967 case BPF_FLOW_DISSECTOR: 1970 case BPF_FLOW_DISSECTOR:
1968 return skb_flow_dissector_bpf_prog_detach(attr); 1971 return skb_flow_dissector_bpf_prog_detach(attr);
1972 case BPF_CGROUP_SYSCTL:
1973 ptype = BPF_PROG_TYPE_CGROUP_SYSCTL;
1974 break;
1969 default: 1975 default:
1970 return -EINVAL; 1976 return -EINVAL;
1971 } 1977 }
@@ -1999,6 +2005,7 @@ static int bpf_prog_query(const union bpf_attr *attr,
1999 case BPF_CGROUP_UDP6_SENDMSG: 2005 case BPF_CGROUP_UDP6_SENDMSG:
2000 case BPF_CGROUP_SOCK_OPS: 2006 case BPF_CGROUP_SOCK_OPS:
2001 case BPF_CGROUP_DEVICE: 2007 case BPF_CGROUP_DEVICE:
2008 case BPF_CGROUP_SYSCTL:
2002 break; 2009 break;
2003 case BPF_LIRC_MODE2: 2010 case BPF_LIRC_MODE2:
2004 return lirc_prog_query(attr, uattr); 2011 return lirc_prog_query(attr, uattr);
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index f25b7c9c20ba..20808e3c95a8 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -5267,6 +5267,7 @@ static int check_return_code(struct bpf_verifier_env *env)
5267 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: 5267 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
5268 case BPF_PROG_TYPE_SOCK_OPS: 5268 case BPF_PROG_TYPE_SOCK_OPS:
5269 case BPF_PROG_TYPE_CGROUP_DEVICE: 5269 case BPF_PROG_TYPE_CGROUP_DEVICE:
5270 case BPF_PROG_TYPE_CGROUP_SYSCTL:
5270 break; 5271 break;
5271 default: 5272 default:
5272 return 0; 5273 return 0;