summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndrey Ignatov <rdna@fb.com>2019-03-07 21:38:43 -0500
committerAlexei Starovoitov <ast@kernel.org>2019-04-12 16:54:58 -0400
commit4e63acdff864654cee0ac5aaeda3913798ee78f6 (patch)
tree85e2cff7f791e8e98dfcca646211ccc1278de61e
parent1d11b3016cec4ed9770b98e82a61708c8f4926e7 (diff)
bpf: Introduce bpf_sysctl_{get,set}_new_value helpers
Add helpers to work with new value being written to sysctl by user space. bpf_sysctl_get_new_value() copies value being written to sysctl into provided buffer. bpf_sysctl_set_new_value() overrides new value being written by user space with a one from provided buffer. Buffer should contain string representation of the value, similar to what can be seen in /proc/sys/. Both helpers can be used only on sysctl write. File position matters and can be managed by an interface that will be introduced separately. E.g. if user space calls sys_write to a file in /proc/sys/ at file position = X, where X > 0, then the value set by bpf_sysctl_set_new_value() will be written starting from X. If program wants to override whole value with specified buffer, file position has to be set to zero. Documentation for the new helpers is provided in bpf.h UAPI. Signed-off-by: Andrey Ignatov <rdna@fb.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org>
-rw-r--r--fs/proc/proc_sysctl.c22
-rw-r--r--include/linux/bpf-cgroup.h8
-rw-r--r--include/linux/filter.h3
-rw-r--r--include/uapi/linux/bpf.h38
-rw-r--r--kernel/bpf/cgroup.c81
5 files changed, 142 insertions, 10 deletions
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index e01b02150340..023101c6f0d7 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -570,8 +570,8 @@ static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf,
570 struct inode *inode = file_inode(filp); 570 struct inode *inode = file_inode(filp);
571 struct ctl_table_header *head = grab_header(inode); 571 struct ctl_table_header *head = grab_header(inode);
572 struct ctl_table *table = PROC_I(inode)->sysctl_entry; 572 struct ctl_table *table = PROC_I(inode)->sysctl_entry;
573 void *new_buf = NULL;
573 ssize_t error; 574 ssize_t error;
574 size_t res;
575 575
576 if (IS_ERR(head)) 576 if (IS_ERR(head))
577 return PTR_ERR(head); 577 return PTR_ERR(head);
@@ -589,15 +589,27 @@ static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf,
589 if (!table->proc_handler) 589 if (!table->proc_handler)
590 goto out; 590 goto out;
591 591
592 error = BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write); 592 error = BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write, buf, &count,
593 &new_buf);
593 if (error) 594 if (error)
594 goto out; 595 goto out;
595 596
596 /* careful: calling conventions are nasty here */ 597 /* careful: calling conventions are nasty here */
597 res = count; 598 if (new_buf) {
598 error = table->proc_handler(table, write, buf, &res, ppos); 599 mm_segment_t old_fs;
600
601 old_fs = get_fs();
602 set_fs(KERNEL_DS);
603 error = table->proc_handler(table, write, (void __user *)new_buf,
604 &count, ppos);
605 set_fs(old_fs);
606 kfree(new_buf);
607 } else {
608 error = table->proc_handler(table, write, buf, &count, ppos);
609 }
610
599 if (!error) 611 if (!error)
600 error = res; 612 error = count;
601out: 613out:
602 sysctl_head_finish(head); 614 sysctl_head_finish(head);
603 615
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index b1c45da20a26..1e97271f9a10 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -113,7 +113,8 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
113 113
114int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head, 114int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
115 struct ctl_table *table, int write, 115 struct ctl_table *table, int write,
116 enum bpf_attach_type type); 116 void __user *buf, size_t *pcount,
117 void **new_buf, enum bpf_attach_type type);
117 118
118static inline enum bpf_cgroup_storage_type cgroup_storage_type( 119static inline enum bpf_cgroup_storage_type cgroup_storage_type(
119 struct bpf_map *map) 120 struct bpf_map *map)
@@ -261,11 +262,12 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
261}) 262})
262 263
263 264
264#define BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write) \ 265#define BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write, buf, count, nbuf) \
265({ \ 266({ \
266 int __ret = 0; \ 267 int __ret = 0; \
267 if (cgroup_bpf_enabled) \ 268 if (cgroup_bpf_enabled) \
268 __ret = __cgroup_bpf_run_filter_sysctl(head, table, write, \ 269 __ret = __cgroup_bpf_run_filter_sysctl(head, table, write, \
270 buf, count, nbuf, \
269 BPF_CGROUP_SYSCTL); \ 271 BPF_CGROUP_SYSCTL); \
270 __ret; \ 272 __ret; \
271}) 273})
@@ -338,7 +340,7 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map,
338#define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, t_ctx) ({ 0; }) 340#define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, t_ctx) ({ 0; })
339#define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; }) 341#define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; })
340#define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type,major,minor,access) ({ 0; }) 342#define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type,major,minor,access) ({ 0; })
341#define BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write) ({ 0; }) 343#define BPF_CGROUP_RUN_PROG_SYSCTL(head,table,write,buf,count,nbuf) ({ 0; })
342 344
343#define for_each_cgroup_storage_type(stype) for (; false; ) 345#define for_each_cgroup_storage_type(stype) for (; false; )
344 346
diff --git a/include/linux/filter.h b/include/linux/filter.h
index f254ff92819f..a23653f9460c 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -1184,6 +1184,9 @@ struct bpf_sysctl_kern {
1184 struct ctl_table *table; 1184 struct ctl_table *table;
1185 void *cur_val; 1185 void *cur_val;
1186 size_t cur_len; 1186 size_t cur_len;
1187 void *new_val;
1188 size_t new_len;
1189 int new_updated;
1187 int write; 1190 int write;
1188}; 1191};
1189 1192
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 063543afa359..547b8258d731 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2541,6 +2541,40 @@ union bpf_attr {
2541 * 2541 *
2542 * **-EINVAL** if current value was unavailable, e.g. because 2542 * **-EINVAL** if current value was unavailable, e.g. because
2543 * sysctl is uninitialized and read returns -EIO for it. 2543 * sysctl is uninitialized and read returns -EIO for it.
2544 *
2545 * int bpf_sysctl_get_new_value(struct bpf_sysctl *ctx, char *buf, size_t buf_len)
2546 * Description
2547 * Get new value being written by user space to sysctl (before
2548 * the actual write happens) and copy it as a string into
2549 * provided by program buffer *buf* of size *buf_len*.
2550 *
2551 * User space may write new value at file position > 0.
2552 *
2553 * The buffer is always NUL terminated, unless it's zero-sized.
2554 * Return
2555 * Number of character copied (not including the trailing NUL).
2556 *
2557 * **-E2BIG** if the buffer wasn't big enough (*buf* will contain
2558 * truncated name in this case).
2559 *
2560 * **-EINVAL** if sysctl is being read.
2561 *
2562 * int bpf_sysctl_set_new_value(struct bpf_sysctl *ctx, const char *buf, size_t buf_len)
2563 * Description
2564 * Override new value being written by user space to sysctl with
2565 * value provided by program in buffer *buf* of size *buf_len*.
2566 *
2567 * *buf* should contain a string in same form as provided by user
2568 * space on sysctl write.
2569 *
2570 * User space may write new value at file position > 0. To override
2571 * the whole sysctl value file position should be set to zero.
2572 * Return
2573 * 0 on success.
2574 *
2575 * **-E2BIG** if the *buf_len* is too big.
2576 *
2577 * **-EINVAL** if sysctl is being read.
2544 */ 2578 */
2545#define __BPF_FUNC_MAPPER(FN) \ 2579#define __BPF_FUNC_MAPPER(FN) \
2546 FN(unspec), \ 2580 FN(unspec), \
@@ -2645,7 +2679,9 @@ union bpf_attr {
2645 FN(skc_lookup_tcp), \ 2679 FN(skc_lookup_tcp), \
2646 FN(tcp_check_syncookie), \ 2680 FN(tcp_check_syncookie), \
2647 FN(sysctl_get_name), \ 2681 FN(sysctl_get_name), \
2648 FN(sysctl_get_current_value), 2682 FN(sysctl_get_current_value), \
2683 FN(sysctl_get_new_value), \
2684 FN(sysctl_set_new_value),
2649 2685
2650/* integer value in 'imm' field of BPF_CALL instruction selects which helper 2686/* integer value in 'imm' field of BPF_CALL instruction selects which helper
2651 * function eBPF program intends to call 2687 * function eBPF program intends to call
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index c6b2cf29a54b..ba4e21986760 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -778,6 +778,13 @@ const struct bpf_verifier_ops cg_dev_verifier_ops = {
778 * @head: sysctl table header 778 * @head: sysctl table header
779 * @table: sysctl table 779 * @table: sysctl table
780 * @write: sysctl is being read (= 0) or written (= 1) 780 * @write: sysctl is being read (= 0) or written (= 1)
781 * @buf: pointer to buffer passed by user space
782 * @pcount: value-result argument: value is size of buffer pointed to by @buf,
783 * result is size of @new_buf if program set new value, initial value
784 * otherwise
785 * @new_buf: pointer to pointer to new buffer that will be allocated if program
786 * overrides new value provided by user space on sysctl write
787 * NOTE: it's caller responsibility to free *new_buf if it was set
781 * @type: type of program to be executed 788 * @type: type of program to be executed
782 * 789 *
783 * Program is run when sysctl is being accessed, either read or written, and 790 * Program is run when sysctl is being accessed, either read or written, and
@@ -788,7 +795,8 @@ const struct bpf_verifier_ops cg_dev_verifier_ops = {
788 */ 795 */
789int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head, 796int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
790 struct ctl_table *table, int write, 797 struct ctl_table *table, int write,
791 enum bpf_attach_type type) 798 void __user *buf, size_t *pcount,
799 void **new_buf, enum bpf_attach_type type)
792{ 800{
793 struct bpf_sysctl_kern ctx = { 801 struct bpf_sysctl_kern ctx = {
794 .head = head, 802 .head = head,
@@ -796,6 +804,9 @@ int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
796 .write = write, 804 .write = write,
797 .cur_val = NULL, 805 .cur_val = NULL,
798 .cur_len = PAGE_SIZE, 806 .cur_len = PAGE_SIZE,
807 .new_val = NULL,
808 .new_len = 0,
809 .new_updated = 0,
799 }; 810 };
800 struct cgroup *cgrp; 811 struct cgroup *cgrp;
801 int ret; 812 int ret;
@@ -818,6 +829,18 @@ int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
818 ctx.cur_len = 0; 829 ctx.cur_len = 0;
819 } 830 }
820 831
832 if (write && buf && *pcount) {
833 /* BPF program should be able to override new value with a
834 * buffer bigger than provided by user.
835 */
836 ctx.new_val = kmalloc_track_caller(PAGE_SIZE, GFP_KERNEL);
837 ctx.new_len = min(PAGE_SIZE, *pcount);
838 if (!ctx.new_val ||
839 copy_from_user(ctx.new_val, buf, ctx.new_len))
840 /* Let BPF program decide how to proceed. */
841 ctx.new_len = 0;
842 }
843
821 rcu_read_lock(); 844 rcu_read_lock();
822 cgrp = task_dfl_cgroup(current); 845 cgrp = task_dfl_cgroup(current);
823 ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], &ctx, BPF_PROG_RUN); 846 ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], &ctx, BPF_PROG_RUN);
@@ -825,6 +848,13 @@ int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
825 848
826 kfree(ctx.cur_val); 849 kfree(ctx.cur_val);
827 850
851 if (ret == 1 && ctx.new_updated) {
852 *new_buf = ctx.new_val;
853 *pcount = ctx.new_len;
854 } else {
855 kfree(ctx.new_val);
856 }
857
828 return ret == 1 ? 0 : -EPERM; 858 return ret == 1 ? 0 : -EPERM;
829} 859}
830EXPORT_SYMBOL(__cgroup_bpf_run_filter_sysctl); 860EXPORT_SYMBOL(__cgroup_bpf_run_filter_sysctl);
@@ -932,6 +962,51 @@ static const struct bpf_func_proto bpf_sysctl_get_current_value_proto = {
932 .arg3_type = ARG_CONST_SIZE, 962 .arg3_type = ARG_CONST_SIZE,
933}; 963};
934 964
965BPF_CALL_3(bpf_sysctl_get_new_value, struct bpf_sysctl_kern *, ctx, char *, buf,
966 size_t, buf_len)
967{
968 if (!ctx->write) {
969 if (buf && buf_len)
970 memset(buf, '\0', buf_len);
971 return -EINVAL;
972 }
973 return copy_sysctl_value(buf, buf_len, ctx->new_val, ctx->new_len);
974}
975
976static const struct bpf_func_proto bpf_sysctl_get_new_value_proto = {
977 .func = bpf_sysctl_get_new_value,
978 .gpl_only = false,
979 .ret_type = RET_INTEGER,
980 .arg1_type = ARG_PTR_TO_CTX,
981 .arg2_type = ARG_PTR_TO_UNINIT_MEM,
982 .arg3_type = ARG_CONST_SIZE,
983};
984
985BPF_CALL_3(bpf_sysctl_set_new_value, struct bpf_sysctl_kern *, ctx,
986 const char *, buf, size_t, buf_len)
987{
988 if (!ctx->write || !ctx->new_val || !ctx->new_len || !buf || !buf_len)
989 return -EINVAL;
990
991 if (buf_len > PAGE_SIZE - 1)
992 return -E2BIG;
993
994 memcpy(ctx->new_val, buf, buf_len);
995 ctx->new_len = buf_len;
996 ctx->new_updated = 1;
997
998 return 0;
999}
1000
1001static const struct bpf_func_proto bpf_sysctl_set_new_value_proto = {
1002 .func = bpf_sysctl_set_new_value,
1003 .gpl_only = false,
1004 .ret_type = RET_INTEGER,
1005 .arg1_type = ARG_PTR_TO_CTX,
1006 .arg2_type = ARG_PTR_TO_MEM,
1007 .arg3_type = ARG_CONST_SIZE,
1008};
1009
935static const struct bpf_func_proto * 1010static const struct bpf_func_proto *
936sysctl_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 1011sysctl_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
937{ 1012{
@@ -940,6 +1015,10 @@ sysctl_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
940 return &bpf_sysctl_get_name_proto; 1015 return &bpf_sysctl_get_name_proto;
941 case BPF_FUNC_sysctl_get_current_value: 1016 case BPF_FUNC_sysctl_get_current_value:
942 return &bpf_sysctl_get_current_value_proto; 1017 return &bpf_sysctl_get_current_value_proto;
1018 case BPF_FUNC_sysctl_get_new_value:
1019 return &bpf_sysctl_get_new_value_proto;
1020 case BPF_FUNC_sysctl_set_new_value:
1021 return &bpf_sysctl_set_new_value_proto;
943 default: 1022 default:
944 return cgroup_base_func_proto(func_id, prog); 1023 return cgroup_base_func_proto(func_id, prog);
945 } 1024 }