diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/bpf/arraymap.c | 18 | ||||
-rw-r--r-- | kernel/bpf/cgroup.c | 94 | ||||
-rw-r--r-- | kernel/bpf/core.c | 46 | ||||
-rw-r--r-- | kernel/bpf/cpumap.c | 9 | ||||
-rw-r--r-- | kernel/bpf/devmap.c | 14 | ||||
-rw-r--r-- | kernel/bpf/hashtab.c | 14 | ||||
-rw-r--r-- | kernel/bpf/local_storage.c | 13 | ||||
-rw-r--r-- | kernel/bpf/lpm_trie.c | 8 | ||||
-rw-r--r-- | kernel/bpf/queue_stack_maps.c | 13 | ||||
-rw-r--r-- | kernel/bpf/reuseport_array.c | 17 | ||||
-rw-r--r-- | kernel/bpf/stackmap.c | 28 | ||||
-rw-r--r-- | kernel/bpf/syscall.c | 103 | ||||
-rw-r--r-- | kernel/bpf/verifier.c | 397 | ||||
-rw-r--r-- | kernel/bpf/xskmap.c | 10 | ||||
-rw-r--r-- | kernel/cgroup/cgroup.c | 11 | ||||
-rw-r--r-- | kernel/trace/bpf_trace.c | 96 |
16 files changed, 661 insertions, 230 deletions
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 584636c9e2eb..0349cbf23cdb 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c | |||
@@ -83,6 +83,7 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) | |||
83 | u32 elem_size, index_mask, max_entries; | 83 | u32 elem_size, index_mask, max_entries; |
84 | bool unpriv = !capable(CAP_SYS_ADMIN); | 84 | bool unpriv = !capable(CAP_SYS_ADMIN); |
85 | u64 cost, array_size, mask64; | 85 | u64 cost, array_size, mask64; |
86 | struct bpf_map_memory mem; | ||
86 | struct bpf_array *array; | 87 | struct bpf_array *array; |
87 | 88 | ||
88 | elem_size = round_up(attr->value_size, 8); | 89 | elem_size = round_up(attr->value_size, 8); |
@@ -116,32 +117,29 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) | |||
116 | 117 | ||
117 | /* make sure there is no u32 overflow later in round_up() */ | 118 | /* make sure there is no u32 overflow later in round_up() */ |
118 | cost = array_size; | 119 | cost = array_size; |
119 | if (cost >= U32_MAX - PAGE_SIZE) | 120 | if (percpu) |
120 | return ERR_PTR(-ENOMEM); | ||
121 | if (percpu) { | ||
122 | cost += (u64)attr->max_entries * elem_size * num_possible_cpus(); | 121 | cost += (u64)attr->max_entries * elem_size * num_possible_cpus(); |
123 | if (cost >= U32_MAX - PAGE_SIZE) | ||
124 | return ERR_PTR(-ENOMEM); | ||
125 | } | ||
126 | cost = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT; | ||
127 | 122 | ||
128 | ret = bpf_map_precharge_memlock(cost); | 123 | ret = bpf_map_charge_init(&mem, cost); |
129 | if (ret < 0) | 124 | if (ret < 0) |
130 | return ERR_PTR(ret); | 125 | return ERR_PTR(ret); |
131 | 126 | ||
132 | /* allocate all map elements and zero-initialize them */ | 127 | /* allocate all map elements and zero-initialize them */ |
133 | array = bpf_map_area_alloc(array_size, numa_node); | 128 | array = bpf_map_area_alloc(array_size, numa_node); |
134 | if (!array) | 129 | if (!array) { |
130 | bpf_map_charge_finish(&mem); | ||
135 | return ERR_PTR(-ENOMEM); | 131 | return ERR_PTR(-ENOMEM); |
132 | } | ||
136 | array->index_mask = index_mask; | 133 | array->index_mask = index_mask; |
137 | array->map.unpriv_array = unpriv; | 134 | array->map.unpriv_array = unpriv; |
138 | 135 | ||
139 | /* copy mandatory map attributes */ | 136 | /* copy mandatory map attributes */ |
140 | bpf_map_init_from_attr(&array->map, attr); | 137 | bpf_map_init_from_attr(&array->map, attr); |
141 | array->map.pages = cost; | 138 | bpf_map_charge_move(&array->map.memory, &mem); |
142 | array->elem_size = elem_size; | 139 | array->elem_size = elem_size; |
143 | 140 | ||
144 | if (percpu && bpf_array_alloc_percpu(array)) { | 141 | if (percpu && bpf_array_alloc_percpu(array)) { |
142 | bpf_map_charge_finish(&array->map.memory); | ||
145 | bpf_map_area_free(array); | 143 | bpf_map_area_free(array); |
146 | return ERR_PTR(-ENOMEM); | 144 | return ERR_PTR(-ENOMEM); |
147 | } | 145 | } |
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index fcde0f7b2585..1b65ab0df457 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c | |||
@@ -22,13 +22,23 @@ | |||
22 | DEFINE_STATIC_KEY_FALSE(cgroup_bpf_enabled_key); | 22 | DEFINE_STATIC_KEY_FALSE(cgroup_bpf_enabled_key); |
23 | EXPORT_SYMBOL(cgroup_bpf_enabled_key); | 23 | EXPORT_SYMBOL(cgroup_bpf_enabled_key); |
24 | 24 | ||
25 | void cgroup_bpf_offline(struct cgroup *cgrp) | ||
26 | { | ||
27 | cgroup_get(cgrp); | ||
28 | percpu_ref_kill(&cgrp->bpf.refcnt); | ||
29 | } | ||
30 | |||
25 | /** | 31 | /** |
26 | * cgroup_bpf_put() - put references of all bpf programs | 32 | * cgroup_bpf_release() - put references of all bpf programs and |
27 | * @cgrp: the cgroup to modify | 33 | * release all cgroup bpf data |
34 | * @work: work structure embedded into the cgroup to modify | ||
28 | */ | 35 | */ |
29 | void cgroup_bpf_put(struct cgroup *cgrp) | 36 | static void cgroup_bpf_release(struct work_struct *work) |
30 | { | 37 | { |
38 | struct cgroup *cgrp = container_of(work, struct cgroup, | ||
39 | bpf.release_work); | ||
31 | enum bpf_cgroup_storage_type stype; | 40 | enum bpf_cgroup_storage_type stype; |
41 | struct bpf_prog_array *old_array; | ||
32 | unsigned int type; | 42 | unsigned int type; |
33 | 43 | ||
34 | for (type = 0; type < ARRAY_SIZE(cgrp->bpf.progs); type++) { | 44 | for (type = 0; type < ARRAY_SIZE(cgrp->bpf.progs); type++) { |
@@ -45,8 +55,27 @@ void cgroup_bpf_put(struct cgroup *cgrp) | |||
45 | kfree(pl); | 55 | kfree(pl); |
46 | static_branch_dec(&cgroup_bpf_enabled_key); | 56 | static_branch_dec(&cgroup_bpf_enabled_key); |
47 | } | 57 | } |
48 | bpf_prog_array_free(cgrp->bpf.effective[type]); | 58 | old_array = rcu_dereference_protected( |
59 | cgrp->bpf.effective[type], | ||
60 | percpu_ref_is_dying(&cgrp->bpf.refcnt)); | ||
61 | bpf_prog_array_free(old_array); | ||
49 | } | 62 | } |
63 | |||
64 | percpu_ref_exit(&cgrp->bpf.refcnt); | ||
65 | cgroup_put(cgrp); | ||
66 | } | ||
67 | |||
68 | /** | ||
69 | * cgroup_bpf_release_fn() - callback used to schedule releasing | ||
70 | * of bpf cgroup data | ||
71 | * @ref: percpu ref counter structure | ||
72 | */ | ||
73 | static void cgroup_bpf_release_fn(struct percpu_ref *ref) | ||
74 | { | ||
75 | struct cgroup *cgrp = container_of(ref, struct cgroup, bpf.refcnt); | ||
76 | |||
77 | INIT_WORK(&cgrp->bpf.release_work, cgroup_bpf_release); | ||
78 | queue_work(system_wq, &cgrp->bpf.release_work); | ||
50 | } | 79 | } |
51 | 80 | ||
52 | /* count number of elements in the list. | 81 | /* count number of elements in the list. |
@@ -101,7 +130,7 @@ static bool hierarchy_allows_attach(struct cgroup *cgrp, | |||
101 | */ | 130 | */ |
102 | static int compute_effective_progs(struct cgroup *cgrp, | 131 | static int compute_effective_progs(struct cgroup *cgrp, |
103 | enum bpf_attach_type type, | 132 | enum bpf_attach_type type, |
104 | struct bpf_prog_array __rcu **array) | 133 | struct bpf_prog_array **array) |
105 | { | 134 | { |
106 | enum bpf_cgroup_storage_type stype; | 135 | enum bpf_cgroup_storage_type stype; |
107 | struct bpf_prog_array *progs; | 136 | struct bpf_prog_array *progs; |
@@ -139,17 +168,16 @@ static int compute_effective_progs(struct cgroup *cgrp, | |||
139 | } | 168 | } |
140 | } while ((p = cgroup_parent(p))); | 169 | } while ((p = cgroup_parent(p))); |
141 | 170 | ||
142 | rcu_assign_pointer(*array, progs); | 171 | *array = progs; |
143 | return 0; | 172 | return 0; |
144 | } | 173 | } |
145 | 174 | ||
146 | static void activate_effective_progs(struct cgroup *cgrp, | 175 | static void activate_effective_progs(struct cgroup *cgrp, |
147 | enum bpf_attach_type type, | 176 | enum bpf_attach_type type, |
148 | struct bpf_prog_array __rcu *array) | 177 | struct bpf_prog_array *old_array) |
149 | { | 178 | { |
150 | struct bpf_prog_array __rcu *old_array; | 179 | rcu_swap_protected(cgrp->bpf.effective[type], old_array, |
151 | 180 | lockdep_is_held(&cgroup_mutex)); | |
152 | old_array = xchg(&cgrp->bpf.effective[type], array); | ||
153 | /* free prog array after grace period, since __cgroup_bpf_run_*() | 181 | /* free prog array after grace period, since __cgroup_bpf_run_*() |
154 | * might be still walking the array | 182 | * might be still walking the array |
155 | */ | 183 | */ |
@@ -166,8 +194,13 @@ int cgroup_bpf_inherit(struct cgroup *cgrp) | |||
166 | * that array below is variable length | 194 | * that array below is variable length |
167 | */ | 195 | */ |
168 | #define NR ARRAY_SIZE(cgrp->bpf.effective) | 196 | #define NR ARRAY_SIZE(cgrp->bpf.effective) |
169 | struct bpf_prog_array __rcu *arrays[NR] = {}; | 197 | struct bpf_prog_array *arrays[NR] = {}; |
170 | int i; | 198 | int ret, i; |
199 | |||
200 | ret = percpu_ref_init(&cgrp->bpf.refcnt, cgroup_bpf_release_fn, 0, | ||
201 | GFP_KERNEL); | ||
202 | if (ret) | ||
203 | return ret; | ||
171 | 204 | ||
172 | for (i = 0; i < NR; i++) | 205 | for (i = 0; i < NR; i++) |
173 | INIT_LIST_HEAD(&cgrp->bpf.progs[i]); | 206 | INIT_LIST_HEAD(&cgrp->bpf.progs[i]); |
@@ -183,6 +216,9 @@ int cgroup_bpf_inherit(struct cgroup *cgrp) | |||
183 | cleanup: | 216 | cleanup: |
184 | for (i = 0; i < NR; i++) | 217 | for (i = 0; i < NR; i++) |
185 | bpf_prog_array_free(arrays[i]); | 218 | bpf_prog_array_free(arrays[i]); |
219 | |||
220 | percpu_ref_exit(&cgrp->bpf.refcnt); | ||
221 | |||
186 | return -ENOMEM; | 222 | return -ENOMEM; |
187 | } | 223 | } |
188 | 224 | ||
@@ -444,10 +480,14 @@ int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, | |||
444 | enum bpf_attach_type type = attr->query.attach_type; | 480 | enum bpf_attach_type type = attr->query.attach_type; |
445 | struct list_head *progs = &cgrp->bpf.progs[type]; | 481 | struct list_head *progs = &cgrp->bpf.progs[type]; |
446 | u32 flags = cgrp->bpf.flags[type]; | 482 | u32 flags = cgrp->bpf.flags[type]; |
483 | struct bpf_prog_array *effective; | ||
447 | int cnt, ret = 0, i; | 484 | int cnt, ret = 0, i; |
448 | 485 | ||
486 | effective = rcu_dereference_protected(cgrp->bpf.effective[type], | ||
487 | lockdep_is_held(&cgroup_mutex)); | ||
488 | |||
449 | if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE) | 489 | if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE) |
450 | cnt = bpf_prog_array_length(cgrp->bpf.effective[type]); | 490 | cnt = bpf_prog_array_length(effective); |
451 | else | 491 | else |
452 | cnt = prog_list_length(progs); | 492 | cnt = prog_list_length(progs); |
453 | 493 | ||
@@ -464,8 +504,7 @@ int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, | |||
464 | } | 504 | } |
465 | 505 | ||
466 | if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE) { | 506 | if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE) { |
467 | return bpf_prog_array_copy_to_user(cgrp->bpf.effective[type], | 507 | return bpf_prog_array_copy_to_user(effective, prog_ids, cnt); |
468 | prog_ids, cnt); | ||
469 | } else { | 508 | } else { |
470 | struct bpf_prog_list *pl; | 509 | struct bpf_prog_list *pl; |
471 | u32 id; | 510 | u32 id; |
@@ -548,8 +587,16 @@ int cgroup_bpf_prog_query(const union bpf_attr *attr, | |||
548 | * The program type passed in via @type must be suitable for network | 587 | * The program type passed in via @type must be suitable for network |
549 | * filtering. No further check is performed to assert that. | 588 | * filtering. No further check is performed to assert that. |
550 | * | 589 | * |
551 | * This function will return %-EPERM if any if an attached program was found | 590 | * For egress packets, this function can return: |
552 | * and if it returned != 1 during execution. In all other cases, 0 is returned. | 591 | * NET_XMIT_SUCCESS (0) - continue with packet output |
592 | * NET_XMIT_DROP (1) - drop packet and notify TCP to call cwr | ||
593 | * NET_XMIT_CN (2) - continue with packet output and notify TCP | ||
594 | * to call cwr | ||
595 | * -EPERM - drop packet | ||
596 | * | ||
597 | * For ingress packets, this function will return -EPERM if any | ||
598 | * attached program was found and if it returned != 1 during execution. | ||
599 | * Otherwise 0 is returned. | ||
553 | */ | 600 | */ |
554 | int __cgroup_bpf_run_filter_skb(struct sock *sk, | 601 | int __cgroup_bpf_run_filter_skb(struct sock *sk, |
555 | struct sk_buff *skb, | 602 | struct sk_buff *skb, |
@@ -575,12 +622,19 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk, | |||
575 | /* compute pointers for the bpf prog */ | 622 | /* compute pointers for the bpf prog */ |
576 | bpf_compute_and_save_data_end(skb, &saved_data_end); | 623 | bpf_compute_and_save_data_end(skb, &saved_data_end); |
577 | 624 | ||
578 | ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], skb, | 625 | if (type == BPF_CGROUP_INET_EGRESS) { |
579 | __bpf_prog_run_save_cb); | 626 | ret = BPF_PROG_CGROUP_INET_EGRESS_RUN_ARRAY( |
627 | cgrp->bpf.effective[type], skb, __bpf_prog_run_save_cb); | ||
628 | } else { | ||
629 | ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], skb, | ||
630 | __bpf_prog_run_save_cb); | ||
631 | ret = (ret == 1 ? 0 : -EPERM); | ||
632 | } | ||
580 | bpf_restore_data_end(skb, saved_data_end); | 633 | bpf_restore_data_end(skb, saved_data_end); |
581 | __skb_pull(skb, offset); | 634 | __skb_pull(skb, offset); |
582 | skb->sk = save_sk; | 635 | skb->sk = save_sk; |
583 | return ret == 1 ? 0 : -EPERM; | 636 | |
637 | return ret; | ||
584 | } | 638 | } |
585 | EXPORT_SYMBOL(__cgroup_bpf_run_filter_skb); | 639 | EXPORT_SYMBOL(__cgroup_bpf_run_filter_skb); |
586 | 640 | ||
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 242a643af82f..33fb292f2e30 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c | |||
@@ -1795,38 +1795,33 @@ struct bpf_prog_array *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags) | |||
1795 | return &empty_prog_array.hdr; | 1795 | return &empty_prog_array.hdr; |
1796 | } | 1796 | } |
1797 | 1797 | ||
1798 | void bpf_prog_array_free(struct bpf_prog_array __rcu *progs) | 1798 | void bpf_prog_array_free(struct bpf_prog_array *progs) |
1799 | { | 1799 | { |
1800 | if (!progs || | 1800 | if (!progs || progs == &empty_prog_array.hdr) |
1801 | progs == (struct bpf_prog_array __rcu *)&empty_prog_array.hdr) | ||
1802 | return; | 1801 | return; |
1803 | kfree_rcu(progs, rcu); | 1802 | kfree_rcu(progs, rcu); |
1804 | } | 1803 | } |
1805 | 1804 | ||
1806 | int bpf_prog_array_length(struct bpf_prog_array __rcu *array) | 1805 | int bpf_prog_array_length(struct bpf_prog_array *array) |
1807 | { | 1806 | { |
1808 | struct bpf_prog_array_item *item; | 1807 | struct bpf_prog_array_item *item; |
1809 | u32 cnt = 0; | 1808 | u32 cnt = 0; |
1810 | 1809 | ||
1811 | rcu_read_lock(); | 1810 | for (item = array->items; item->prog; item++) |
1812 | item = rcu_dereference(array)->items; | ||
1813 | for (; item->prog; item++) | ||
1814 | if (item->prog != &dummy_bpf_prog.prog) | 1811 | if (item->prog != &dummy_bpf_prog.prog) |
1815 | cnt++; | 1812 | cnt++; |
1816 | rcu_read_unlock(); | ||
1817 | return cnt; | 1813 | return cnt; |
1818 | } | 1814 | } |
1819 | 1815 | ||
1820 | 1816 | ||
1821 | static bool bpf_prog_array_copy_core(struct bpf_prog_array __rcu *array, | 1817 | static bool bpf_prog_array_copy_core(struct bpf_prog_array *array, |
1822 | u32 *prog_ids, | 1818 | u32 *prog_ids, |
1823 | u32 request_cnt) | 1819 | u32 request_cnt) |
1824 | { | 1820 | { |
1825 | struct bpf_prog_array_item *item; | 1821 | struct bpf_prog_array_item *item; |
1826 | int i = 0; | 1822 | int i = 0; |
1827 | 1823 | ||
1828 | item = rcu_dereference_check(array, 1)->items; | 1824 | for (item = array->items; item->prog; item++) { |
1829 | for (; item->prog; item++) { | ||
1830 | if (item->prog == &dummy_bpf_prog.prog) | 1825 | if (item->prog == &dummy_bpf_prog.prog) |
1831 | continue; | 1826 | continue; |
1832 | prog_ids[i] = item->prog->aux->id; | 1827 | prog_ids[i] = item->prog->aux->id; |
@@ -1839,7 +1834,7 @@ static bool bpf_prog_array_copy_core(struct bpf_prog_array __rcu *array, | |||
1839 | return !!(item->prog); | 1834 | return !!(item->prog); |
1840 | } | 1835 | } |
1841 | 1836 | ||
1842 | int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *array, | 1837 | int bpf_prog_array_copy_to_user(struct bpf_prog_array *array, |
1843 | __u32 __user *prog_ids, u32 cnt) | 1838 | __u32 __user *prog_ids, u32 cnt) |
1844 | { | 1839 | { |
1845 | unsigned long err = 0; | 1840 | unsigned long err = 0; |
@@ -1850,18 +1845,12 @@ int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *array, | |||
1850 | * cnt = bpf_prog_array_length(); | 1845 | * cnt = bpf_prog_array_length(); |
1851 | * if (cnt > 0) | 1846 | * if (cnt > 0) |
1852 | * bpf_prog_array_copy_to_user(..., cnt); | 1847 | * bpf_prog_array_copy_to_user(..., cnt); |
1853 | * so below kcalloc doesn't need extra cnt > 0 check, but | 1848 | * so below kcalloc doesn't need extra cnt > 0 check. |
1854 | * bpf_prog_array_length() releases rcu lock and | ||
1855 | * prog array could have been swapped with empty or larger array, | ||
1856 | * so always copy 'cnt' prog_ids to the user. | ||
1857 | * In a rare race the user will see zero prog_ids | ||
1858 | */ | 1849 | */ |
1859 | ids = kcalloc(cnt, sizeof(u32), GFP_USER | __GFP_NOWARN); | 1850 | ids = kcalloc(cnt, sizeof(u32), GFP_USER | __GFP_NOWARN); |
1860 | if (!ids) | 1851 | if (!ids) |
1861 | return -ENOMEM; | 1852 | return -ENOMEM; |
1862 | rcu_read_lock(); | ||
1863 | nospc = bpf_prog_array_copy_core(array, ids, cnt); | 1853 | nospc = bpf_prog_array_copy_core(array, ids, cnt); |
1864 | rcu_read_unlock(); | ||
1865 | err = copy_to_user(prog_ids, ids, cnt * sizeof(u32)); | 1854 | err = copy_to_user(prog_ids, ids, cnt * sizeof(u32)); |
1866 | kfree(ids); | 1855 | kfree(ids); |
1867 | if (err) | 1856 | if (err) |
@@ -1871,19 +1860,19 @@ int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *array, | |||
1871 | return 0; | 1860 | return 0; |
1872 | } | 1861 | } |
1873 | 1862 | ||
1874 | void bpf_prog_array_delete_safe(struct bpf_prog_array __rcu *array, | 1863 | void bpf_prog_array_delete_safe(struct bpf_prog_array *array, |
1875 | struct bpf_prog *old_prog) | 1864 | struct bpf_prog *old_prog) |
1876 | { | 1865 | { |
1877 | struct bpf_prog_array_item *item = array->items; | 1866 | struct bpf_prog_array_item *item; |
1878 | 1867 | ||
1879 | for (; item->prog; item++) | 1868 | for (item = array->items; item->prog; item++) |
1880 | if (item->prog == old_prog) { | 1869 | if (item->prog == old_prog) { |
1881 | WRITE_ONCE(item->prog, &dummy_bpf_prog.prog); | 1870 | WRITE_ONCE(item->prog, &dummy_bpf_prog.prog); |
1882 | break; | 1871 | break; |
1883 | } | 1872 | } |
1884 | } | 1873 | } |
1885 | 1874 | ||
1886 | int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array, | 1875 | int bpf_prog_array_copy(struct bpf_prog_array *old_array, |
1887 | struct bpf_prog *exclude_prog, | 1876 | struct bpf_prog *exclude_prog, |
1888 | struct bpf_prog *include_prog, | 1877 | struct bpf_prog *include_prog, |
1889 | struct bpf_prog_array **new_array) | 1878 | struct bpf_prog_array **new_array) |
@@ -1947,7 +1936,7 @@ int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array, | |||
1947 | return 0; | 1936 | return 0; |
1948 | } | 1937 | } |
1949 | 1938 | ||
1950 | int bpf_prog_array_copy_info(struct bpf_prog_array __rcu *array, | 1939 | int bpf_prog_array_copy_info(struct bpf_prog_array *array, |
1951 | u32 *prog_ids, u32 request_cnt, | 1940 | u32 *prog_ids, u32 request_cnt, |
1952 | u32 *prog_cnt) | 1941 | u32 *prog_cnt) |
1953 | { | 1942 | { |
@@ -2090,6 +2079,15 @@ bool __weak bpf_helper_changes_pkt_data(void *func) | |||
2090 | return false; | 2079 | return false; |
2091 | } | 2080 | } |
2092 | 2081 | ||
2082 | /* Return TRUE if the JIT backend wants verifier to enable sub-register usage | ||
2083 | * analysis code and wants explicit zero extension inserted by verifier. | ||
2084 | * Otherwise, return FALSE. | ||
2085 | */ | ||
2086 | bool __weak bpf_jit_needs_zext(void) | ||
2087 | { | ||
2088 | return false; | ||
2089 | } | ||
2090 | |||
2093 | /* To execute LD_ABS/LD_IND instructions __bpf_prog_run() may call | 2091 | /* To execute LD_ABS/LD_IND instructions __bpf_prog_run() may call |
2094 | * skb_copy_bits(), so provide a weak definition of it for NET-less config. | 2092 | * skb_copy_bits(), so provide a weak definition of it for NET-less config. |
2095 | */ | 2093 | */ |
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c index cf727d77c6c6..b31a71909307 100644 --- a/kernel/bpf/cpumap.c +++ b/kernel/bpf/cpumap.c | |||
@@ -106,12 +106,9 @@ static struct bpf_map *cpu_map_alloc(union bpf_attr *attr) | |||
106 | /* make sure page count doesn't overflow */ | 106 | /* make sure page count doesn't overflow */ |
107 | cost = (u64) cmap->map.max_entries * sizeof(struct bpf_cpu_map_entry *); | 107 | cost = (u64) cmap->map.max_entries * sizeof(struct bpf_cpu_map_entry *); |
108 | cost += cpu_map_bitmap_size(attr) * num_possible_cpus(); | 108 | cost += cpu_map_bitmap_size(attr) * num_possible_cpus(); |
109 | if (cost >= U32_MAX - PAGE_SIZE) | ||
110 | goto free_cmap; | ||
111 | cmap->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT; | ||
112 | 109 | ||
113 | /* Notice returns -EPERM on if map size is larger than memlock limit */ | 110 | /* Notice returns -EPERM on if map size is larger than memlock limit */ |
114 | ret = bpf_map_precharge_memlock(cmap->map.pages); | 111 | ret = bpf_map_charge_init(&cmap->map.memory, cost); |
115 | if (ret) { | 112 | if (ret) { |
116 | err = ret; | 113 | err = ret; |
117 | goto free_cmap; | 114 | goto free_cmap; |
@@ -121,7 +118,7 @@ static struct bpf_map *cpu_map_alloc(union bpf_attr *attr) | |||
121 | cmap->flush_needed = __alloc_percpu(cpu_map_bitmap_size(attr), | 118 | cmap->flush_needed = __alloc_percpu(cpu_map_bitmap_size(attr), |
122 | __alignof__(unsigned long)); | 119 | __alignof__(unsigned long)); |
123 | if (!cmap->flush_needed) | 120 | if (!cmap->flush_needed) |
124 | goto free_cmap; | 121 | goto free_charge; |
125 | 122 | ||
126 | /* Alloc array for possible remote "destination" CPUs */ | 123 | /* Alloc array for possible remote "destination" CPUs */ |
127 | cmap->cpu_map = bpf_map_area_alloc(cmap->map.max_entries * | 124 | cmap->cpu_map = bpf_map_area_alloc(cmap->map.max_entries * |
@@ -133,6 +130,8 @@ static struct bpf_map *cpu_map_alloc(union bpf_attr *attr) | |||
133 | return &cmap->map; | 130 | return &cmap->map; |
134 | free_percpu: | 131 | free_percpu: |
135 | free_percpu(cmap->flush_needed); | 132 | free_percpu(cmap->flush_needed); |
133 | free_charge: | ||
134 | bpf_map_charge_finish(&cmap->map.memory); | ||
136 | free_cmap: | 135 | free_cmap: |
137 | kfree(cmap); | 136 | kfree(cmap); |
138 | return ERR_PTR(err); | 137 | return ERR_PTR(err); |
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index 1e525d70f833..5ae7cce5ef16 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c | |||
@@ -108,13 +108,9 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr) | |||
108 | /* make sure page count doesn't overflow */ | 108 | /* make sure page count doesn't overflow */ |
109 | cost = (u64) dtab->map.max_entries * sizeof(struct bpf_dtab_netdev *); | 109 | cost = (u64) dtab->map.max_entries * sizeof(struct bpf_dtab_netdev *); |
110 | cost += dev_map_bitmap_size(attr) * num_possible_cpus(); | 110 | cost += dev_map_bitmap_size(attr) * num_possible_cpus(); |
111 | if (cost >= U32_MAX - PAGE_SIZE) | ||
112 | goto free_dtab; | ||
113 | |||
114 | dtab->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT; | ||
115 | 111 | ||
116 | /* if map size is larger than memlock limit, reject it early */ | 112 | /* if map size is larger than memlock limit, reject it */ |
117 | err = bpf_map_precharge_memlock(dtab->map.pages); | 113 | err = bpf_map_charge_init(&dtab->map.memory, cost); |
118 | if (err) | 114 | if (err) |
119 | goto free_dtab; | 115 | goto free_dtab; |
120 | 116 | ||
@@ -125,19 +121,21 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr) | |||
125 | __alignof__(unsigned long), | 121 | __alignof__(unsigned long), |
126 | GFP_KERNEL | __GFP_NOWARN); | 122 | GFP_KERNEL | __GFP_NOWARN); |
127 | if (!dtab->flush_needed) | 123 | if (!dtab->flush_needed) |
128 | goto free_dtab; | 124 | goto free_charge; |
129 | 125 | ||
130 | dtab->netdev_map = bpf_map_area_alloc(dtab->map.max_entries * | 126 | dtab->netdev_map = bpf_map_area_alloc(dtab->map.max_entries * |
131 | sizeof(struct bpf_dtab_netdev *), | 127 | sizeof(struct bpf_dtab_netdev *), |
132 | dtab->map.numa_node); | 128 | dtab->map.numa_node); |
133 | if (!dtab->netdev_map) | 129 | if (!dtab->netdev_map) |
134 | goto free_dtab; | 130 | goto free_charge; |
135 | 131 | ||
136 | spin_lock(&dev_map_lock); | 132 | spin_lock(&dev_map_lock); |
137 | list_add_tail_rcu(&dtab->list, &dev_map_list); | 133 | list_add_tail_rcu(&dtab->list, &dev_map_list); |
138 | spin_unlock(&dev_map_lock); | 134 | spin_unlock(&dev_map_lock); |
139 | 135 | ||
140 | return &dtab->map; | 136 | return &dtab->map; |
137 | free_charge: | ||
138 | bpf_map_charge_finish(&dtab->map.memory); | ||
141 | free_dtab: | 139 | free_dtab: |
142 | free_percpu(dtab->flush_needed); | 140 | free_percpu(dtab->flush_needed); |
143 | kfree(dtab); | 141 | kfree(dtab); |
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 0f2708fde5f7..d92e05d9979b 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c | |||
@@ -360,14 +360,8 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr) | |||
360 | else | 360 | else |
361 | cost += (u64) htab->elem_size * num_possible_cpus(); | 361 | cost += (u64) htab->elem_size * num_possible_cpus(); |
362 | 362 | ||
363 | if (cost >= U32_MAX - PAGE_SIZE) | 363 | /* if map size is larger than memlock limit, reject it */ |
364 | /* make sure page count doesn't overflow */ | 364 | err = bpf_map_charge_init(&htab->map.memory, cost); |
365 | goto free_htab; | ||
366 | |||
367 | htab->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT; | ||
368 | |||
369 | /* if map size is larger than memlock limit, reject it early */ | ||
370 | err = bpf_map_precharge_memlock(htab->map.pages); | ||
371 | if (err) | 365 | if (err) |
372 | goto free_htab; | 366 | goto free_htab; |
373 | 367 | ||
@@ -376,7 +370,7 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr) | |||
376 | sizeof(struct bucket), | 370 | sizeof(struct bucket), |
377 | htab->map.numa_node); | 371 | htab->map.numa_node); |
378 | if (!htab->buckets) | 372 | if (!htab->buckets) |
379 | goto free_htab; | 373 | goto free_charge; |
380 | 374 | ||
381 | if (htab->map.map_flags & BPF_F_ZERO_SEED) | 375 | if (htab->map.map_flags & BPF_F_ZERO_SEED) |
382 | htab->hashrnd = 0; | 376 | htab->hashrnd = 0; |
@@ -409,6 +403,8 @@ free_prealloc: | |||
409 | prealloc_destroy(htab); | 403 | prealloc_destroy(htab); |
410 | free_buckets: | 404 | free_buckets: |
411 | bpf_map_area_free(htab->buckets); | 405 | bpf_map_area_free(htab->buckets); |
406 | free_charge: | ||
407 | bpf_map_charge_finish(&htab->map.memory); | ||
412 | free_htab: | 408 | free_htab: |
413 | kfree(htab); | 409 | kfree(htab); |
414 | return ERR_PTR(err); | 410 | return ERR_PTR(err); |
diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c index 980e8f1f6cb5..addd6fdceec8 100644 --- a/kernel/bpf/local_storage.c +++ b/kernel/bpf/local_storage.c | |||
@@ -272,6 +272,8 @@ static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr) | |||
272 | { | 272 | { |
273 | int numa_node = bpf_map_attr_numa_node(attr); | 273 | int numa_node = bpf_map_attr_numa_node(attr); |
274 | struct bpf_cgroup_storage_map *map; | 274 | struct bpf_cgroup_storage_map *map; |
275 | struct bpf_map_memory mem; | ||
276 | int ret; | ||
275 | 277 | ||
276 | if (attr->key_size != sizeof(struct bpf_cgroup_storage_key)) | 278 | if (attr->key_size != sizeof(struct bpf_cgroup_storage_key)) |
277 | return ERR_PTR(-EINVAL); | 279 | return ERR_PTR(-EINVAL); |
@@ -290,13 +292,18 @@ static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr) | |||
290 | /* max_entries is not used and enforced to be 0 */ | 292 | /* max_entries is not used and enforced to be 0 */ |
291 | return ERR_PTR(-EINVAL); | 293 | return ERR_PTR(-EINVAL); |
292 | 294 | ||
295 | ret = bpf_map_charge_init(&mem, sizeof(struct bpf_cgroup_storage_map)); | ||
296 | if (ret < 0) | ||
297 | return ERR_PTR(ret); | ||
298 | |||
293 | map = kmalloc_node(sizeof(struct bpf_cgroup_storage_map), | 299 | map = kmalloc_node(sizeof(struct bpf_cgroup_storage_map), |
294 | __GFP_ZERO | GFP_USER, numa_node); | 300 | __GFP_ZERO | GFP_USER, numa_node); |
295 | if (!map) | 301 | if (!map) { |
302 | bpf_map_charge_finish(&mem); | ||
296 | return ERR_PTR(-ENOMEM); | 303 | return ERR_PTR(-ENOMEM); |
304 | } | ||
297 | 305 | ||
298 | map->map.pages = round_up(sizeof(struct bpf_cgroup_storage_map), | 306 | bpf_map_charge_move(&map->map.memory, &mem); |
299 | PAGE_SIZE) >> PAGE_SHIFT; | ||
300 | 307 | ||
301 | /* copy mandatory map attributes */ | 308 | /* copy mandatory map attributes */ |
302 | bpf_map_init_from_attr(&map->map, attr); | 309 | bpf_map_init_from_attr(&map->map, attr); |
diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c index e61630c2e50b..09334f13a8a0 100644 --- a/kernel/bpf/lpm_trie.c +++ b/kernel/bpf/lpm_trie.c | |||
@@ -573,14 +573,8 @@ static struct bpf_map *trie_alloc(union bpf_attr *attr) | |||
573 | cost_per_node = sizeof(struct lpm_trie_node) + | 573 | cost_per_node = sizeof(struct lpm_trie_node) + |
574 | attr->value_size + trie->data_size; | 574 | attr->value_size + trie->data_size; |
575 | cost += (u64) attr->max_entries * cost_per_node; | 575 | cost += (u64) attr->max_entries * cost_per_node; |
576 | if (cost >= U32_MAX - PAGE_SIZE) { | ||
577 | ret = -E2BIG; | ||
578 | goto out_err; | ||
579 | } | ||
580 | |||
581 | trie->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT; | ||
582 | 576 | ||
583 | ret = bpf_map_precharge_memlock(trie->map.pages); | 577 | ret = bpf_map_charge_init(&trie->map.memory, cost); |
584 | if (ret) | 578 | if (ret) |
585 | goto out_err; | 579 | goto out_err; |
586 | 580 | ||
diff --git a/kernel/bpf/queue_stack_maps.c b/kernel/bpf/queue_stack_maps.c index 0b140d236889..f697647ceb54 100644 --- a/kernel/bpf/queue_stack_maps.c +++ b/kernel/bpf/queue_stack_maps.c | |||
@@ -67,29 +67,28 @@ static int queue_stack_map_alloc_check(union bpf_attr *attr) | |||
67 | static struct bpf_map *queue_stack_map_alloc(union bpf_attr *attr) | 67 | static struct bpf_map *queue_stack_map_alloc(union bpf_attr *attr) |
68 | { | 68 | { |
69 | int ret, numa_node = bpf_map_attr_numa_node(attr); | 69 | int ret, numa_node = bpf_map_attr_numa_node(attr); |
70 | struct bpf_map_memory mem = {0}; | ||
70 | struct bpf_queue_stack *qs; | 71 | struct bpf_queue_stack *qs; |
71 | u64 size, queue_size, cost; | 72 | u64 size, queue_size, cost; |
72 | 73 | ||
73 | size = (u64) attr->max_entries + 1; | 74 | size = (u64) attr->max_entries + 1; |
74 | cost = queue_size = sizeof(*qs) + size * attr->value_size; | 75 | cost = queue_size = sizeof(*qs) + size * attr->value_size; |
75 | if (cost >= U32_MAX - PAGE_SIZE) | ||
76 | return ERR_PTR(-E2BIG); | ||
77 | 76 | ||
78 | cost = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT; | 77 | ret = bpf_map_charge_init(&mem, cost); |
79 | |||
80 | ret = bpf_map_precharge_memlock(cost); | ||
81 | if (ret < 0) | 78 | if (ret < 0) |
82 | return ERR_PTR(ret); | 79 | return ERR_PTR(ret); |
83 | 80 | ||
84 | qs = bpf_map_area_alloc(queue_size, numa_node); | 81 | qs = bpf_map_area_alloc(queue_size, numa_node); |
85 | if (!qs) | 82 | if (!qs) { |
83 | bpf_map_charge_finish(&mem); | ||
86 | return ERR_PTR(-ENOMEM); | 84 | return ERR_PTR(-ENOMEM); |
85 | } | ||
87 | 86 | ||
88 | memset(qs, 0, sizeof(*qs)); | 87 | memset(qs, 0, sizeof(*qs)); |
89 | 88 | ||
90 | bpf_map_init_from_attr(&qs->map, attr); | 89 | bpf_map_init_from_attr(&qs->map, attr); |
91 | 90 | ||
92 | qs->map.pages = cost; | 91 | bpf_map_charge_move(&qs->map.memory, &mem); |
93 | qs->size = size; | 92 | qs->size = size; |
94 | 93 | ||
95 | raw_spin_lock_init(&qs->lock); | 94 | raw_spin_lock_init(&qs->lock); |
diff --git a/kernel/bpf/reuseport_array.c b/kernel/bpf/reuseport_array.c index 18e225de80ff..50c083ba978c 100644 --- a/kernel/bpf/reuseport_array.c +++ b/kernel/bpf/reuseport_array.c | |||
@@ -151,7 +151,8 @@ static struct bpf_map *reuseport_array_alloc(union bpf_attr *attr) | |||
151 | { | 151 | { |
152 | int err, numa_node = bpf_map_attr_numa_node(attr); | 152 | int err, numa_node = bpf_map_attr_numa_node(attr); |
153 | struct reuseport_array *array; | 153 | struct reuseport_array *array; |
154 | u64 cost, array_size; | 154 | struct bpf_map_memory mem; |
155 | u64 array_size; | ||
155 | 156 | ||
156 | if (!capable(CAP_SYS_ADMIN)) | 157 | if (!capable(CAP_SYS_ADMIN)) |
157 | return ERR_PTR(-EPERM); | 158 | return ERR_PTR(-EPERM); |
@@ -159,24 +160,20 @@ static struct bpf_map *reuseport_array_alloc(union bpf_attr *attr) | |||
159 | array_size = sizeof(*array); | 160 | array_size = sizeof(*array); |
160 | array_size += (u64)attr->max_entries * sizeof(struct sock *); | 161 | array_size += (u64)attr->max_entries * sizeof(struct sock *); |
161 | 162 | ||
162 | /* make sure there is no u32 overflow later in round_up() */ | 163 | err = bpf_map_charge_init(&mem, array_size); |
163 | cost = array_size; | ||
164 | if (cost >= U32_MAX - PAGE_SIZE) | ||
165 | return ERR_PTR(-ENOMEM); | ||
166 | cost = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT; | ||
167 | |||
168 | err = bpf_map_precharge_memlock(cost); | ||
169 | if (err) | 164 | if (err) |
170 | return ERR_PTR(err); | 165 | return ERR_PTR(err); |
171 | 166 | ||
172 | /* allocate all map elements and zero-initialize them */ | 167 | /* allocate all map elements and zero-initialize them */ |
173 | array = bpf_map_area_alloc(array_size, numa_node); | 168 | array = bpf_map_area_alloc(array_size, numa_node); |
174 | if (!array) | 169 | if (!array) { |
170 | bpf_map_charge_finish(&mem); | ||
175 | return ERR_PTR(-ENOMEM); | 171 | return ERR_PTR(-ENOMEM); |
172 | } | ||
176 | 173 | ||
177 | /* copy mandatory map attributes */ | 174 | /* copy mandatory map attributes */ |
178 | bpf_map_init_from_attr(&array->map, attr); | 175 | bpf_map_init_from_attr(&array->map, attr); |
179 | array->map.pages = cost; | 176 | bpf_map_charge_move(&array->map.memory, &mem); |
180 | 177 | ||
181 | return &array->map; | 178 | return &array->map; |
182 | } | 179 | } |
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index 950ab2f28922..3d86072d8e32 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c | |||
@@ -89,6 +89,7 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr) | |||
89 | { | 89 | { |
90 | u32 value_size = attr->value_size; | 90 | u32 value_size = attr->value_size; |
91 | struct bpf_stack_map *smap; | 91 | struct bpf_stack_map *smap; |
92 | struct bpf_map_memory mem; | ||
92 | u64 cost, n_buckets; | 93 | u64 cost, n_buckets; |
93 | int err; | 94 | int err; |
94 | 95 | ||
@@ -116,40 +117,37 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr) | |||
116 | n_buckets = roundup_pow_of_two(attr->max_entries); | 117 | n_buckets = roundup_pow_of_two(attr->max_entries); |
117 | 118 | ||
118 | cost = n_buckets * sizeof(struct stack_map_bucket *) + sizeof(*smap); | 119 | cost = n_buckets * sizeof(struct stack_map_bucket *) + sizeof(*smap); |
119 | if (cost >= U32_MAX - PAGE_SIZE) | 120 | cost += n_buckets * (value_size + sizeof(struct stack_map_bucket)); |
120 | return ERR_PTR(-E2BIG); | 121 | err = bpf_map_charge_init(&mem, cost); |
122 | if (err) | ||
123 | return ERR_PTR(err); | ||
121 | 124 | ||
122 | smap = bpf_map_area_alloc(cost, bpf_map_attr_numa_node(attr)); | 125 | smap = bpf_map_area_alloc(cost, bpf_map_attr_numa_node(attr)); |
123 | if (!smap) | 126 | if (!smap) { |
127 | bpf_map_charge_finish(&mem); | ||
124 | return ERR_PTR(-ENOMEM); | 128 | return ERR_PTR(-ENOMEM); |
125 | 129 | } | |
126 | err = -E2BIG; | ||
127 | cost += n_buckets * (value_size + sizeof(struct stack_map_bucket)); | ||
128 | if (cost >= U32_MAX - PAGE_SIZE) | ||
129 | goto free_smap; | ||
130 | 130 | ||
131 | bpf_map_init_from_attr(&smap->map, attr); | 131 | bpf_map_init_from_attr(&smap->map, attr); |
132 | smap->map.value_size = value_size; | 132 | smap->map.value_size = value_size; |
133 | smap->n_buckets = n_buckets; | 133 | smap->n_buckets = n_buckets; |
134 | smap->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT; | ||
135 | |||
136 | err = bpf_map_precharge_memlock(smap->map.pages); | ||
137 | if (err) | ||
138 | goto free_smap; | ||
139 | 134 | ||
140 | err = get_callchain_buffers(sysctl_perf_event_max_stack); | 135 | err = get_callchain_buffers(sysctl_perf_event_max_stack); |
141 | if (err) | 136 | if (err) |
142 | goto free_smap; | 137 | goto free_charge; |
143 | 138 | ||
144 | err = prealloc_elems_and_freelist(smap); | 139 | err = prealloc_elems_and_freelist(smap); |
145 | if (err) | 140 | if (err) |
146 | goto put_buffers; | 141 | goto put_buffers; |
147 | 142 | ||
143 | bpf_map_charge_move(&smap->map.memory, &mem); | ||
144 | |||
148 | return &smap->map; | 145 | return &smap->map; |
149 | 146 | ||
150 | put_buffers: | 147 | put_buffers: |
151 | put_callchain_buffers(); | 148 | put_callchain_buffers(); |
152 | free_smap: | 149 | free_charge: |
150 | bpf_map_charge_finish(&mem); | ||
153 | bpf_map_area_free(smap); | 151 | bpf_map_area_free(smap); |
154 | return ERR_PTR(err); | 152 | return ERR_PTR(err); |
155 | } | 153 | } |
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index cb5440b02e82..4c53cbd3329d 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c | |||
@@ -188,19 +188,6 @@ void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr) | |||
188 | map->numa_node = bpf_map_attr_numa_node(attr); | 188 | map->numa_node = bpf_map_attr_numa_node(attr); |
189 | } | 189 | } |
190 | 190 | ||
191 | int bpf_map_precharge_memlock(u32 pages) | ||
192 | { | ||
193 | struct user_struct *user = get_current_user(); | ||
194 | unsigned long memlock_limit, cur; | ||
195 | |||
196 | memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; | ||
197 | cur = atomic_long_read(&user->locked_vm); | ||
198 | free_uid(user); | ||
199 | if (cur + pages > memlock_limit) | ||
200 | return -EPERM; | ||
201 | return 0; | ||
202 | } | ||
203 | |||
204 | static int bpf_charge_memlock(struct user_struct *user, u32 pages) | 191 | static int bpf_charge_memlock(struct user_struct *user, u32 pages) |
205 | { | 192 | { |
206 | unsigned long memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; | 193 | unsigned long memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; |
@@ -214,45 +201,62 @@ static int bpf_charge_memlock(struct user_struct *user, u32 pages) | |||
214 | 201 | ||
215 | static void bpf_uncharge_memlock(struct user_struct *user, u32 pages) | 202 | static void bpf_uncharge_memlock(struct user_struct *user, u32 pages) |
216 | { | 203 | { |
217 | atomic_long_sub(pages, &user->locked_vm); | 204 | if (user) |
205 | atomic_long_sub(pages, &user->locked_vm); | ||
218 | } | 206 | } |
219 | 207 | ||
220 | static int bpf_map_init_memlock(struct bpf_map *map) | 208 | int bpf_map_charge_init(struct bpf_map_memory *mem, size_t size) |
221 | { | 209 | { |
222 | struct user_struct *user = get_current_user(); | 210 | u32 pages = round_up(size, PAGE_SIZE) >> PAGE_SHIFT; |
211 | struct user_struct *user; | ||
223 | int ret; | 212 | int ret; |
224 | 213 | ||
225 | ret = bpf_charge_memlock(user, map->pages); | 214 | if (size >= U32_MAX - PAGE_SIZE) |
215 | return -E2BIG; | ||
216 | |||
217 | user = get_current_user(); | ||
218 | ret = bpf_charge_memlock(user, pages); | ||
226 | if (ret) { | 219 | if (ret) { |
227 | free_uid(user); | 220 | free_uid(user); |
228 | return ret; | 221 | return ret; |
229 | } | 222 | } |
230 | map->user = user; | 223 | |
231 | return ret; | 224 | mem->pages = pages; |
225 | mem->user = user; | ||
226 | |||
227 | return 0; | ||
232 | } | 228 | } |
233 | 229 | ||
234 | static void bpf_map_release_memlock(struct bpf_map *map) | 230 | void bpf_map_charge_finish(struct bpf_map_memory *mem) |
235 | { | 231 | { |
236 | struct user_struct *user = map->user; | 232 | bpf_uncharge_memlock(mem->user, mem->pages); |
237 | bpf_uncharge_memlock(user, map->pages); | 233 | free_uid(mem->user); |
238 | free_uid(user); | 234 | } |
235 | |||
236 | void bpf_map_charge_move(struct bpf_map_memory *dst, | ||
237 | struct bpf_map_memory *src) | ||
238 | { | ||
239 | *dst = *src; | ||
240 | |||
241 | /* Make sure src will not be used for the redundant uncharging. */ | ||
242 | memset(src, 0, sizeof(struct bpf_map_memory)); | ||
239 | } | 243 | } |
240 | 244 | ||
241 | int bpf_map_charge_memlock(struct bpf_map *map, u32 pages) | 245 | int bpf_map_charge_memlock(struct bpf_map *map, u32 pages) |
242 | { | 246 | { |
243 | int ret; | 247 | int ret; |
244 | 248 | ||
245 | ret = bpf_charge_memlock(map->user, pages); | 249 | ret = bpf_charge_memlock(map->memory.user, pages); |
246 | if (ret) | 250 | if (ret) |
247 | return ret; | 251 | return ret; |
248 | map->pages += pages; | 252 | map->memory.pages += pages; |
249 | return ret; | 253 | return ret; |
250 | } | 254 | } |
251 | 255 | ||
252 | void bpf_map_uncharge_memlock(struct bpf_map *map, u32 pages) | 256 | void bpf_map_uncharge_memlock(struct bpf_map *map, u32 pages) |
253 | { | 257 | { |
254 | bpf_uncharge_memlock(map->user, pages); | 258 | bpf_uncharge_memlock(map->memory.user, pages); |
255 | map->pages -= pages; | 259 | map->memory.pages -= pages; |
256 | } | 260 | } |
257 | 261 | ||
258 | static int bpf_map_alloc_id(struct bpf_map *map) | 262 | static int bpf_map_alloc_id(struct bpf_map *map) |
@@ -303,11 +307,13 @@ void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock) | |||
303 | static void bpf_map_free_deferred(struct work_struct *work) | 307 | static void bpf_map_free_deferred(struct work_struct *work) |
304 | { | 308 | { |
305 | struct bpf_map *map = container_of(work, struct bpf_map, work); | 309 | struct bpf_map *map = container_of(work, struct bpf_map, work); |
310 | struct bpf_map_memory mem; | ||
306 | 311 | ||
307 | bpf_map_release_memlock(map); | 312 | bpf_map_charge_move(&mem, &map->memory); |
308 | security_bpf_map_free(map); | 313 | security_bpf_map_free(map); |
309 | /* implementation dependent freeing */ | 314 | /* implementation dependent freeing */ |
310 | map->ops->map_free(map); | 315 | map->ops->map_free(map); |
316 | bpf_map_charge_finish(&mem); | ||
311 | } | 317 | } |
312 | 318 | ||
313 | static void bpf_map_put_uref(struct bpf_map *map) | 319 | static void bpf_map_put_uref(struct bpf_map *map) |
@@ -395,7 +401,7 @@ static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp) | |||
395 | map->value_size, | 401 | map->value_size, |
396 | map->max_entries, | 402 | map->max_entries, |
397 | map->map_flags, | 403 | map->map_flags, |
398 | map->pages * 1ULL << PAGE_SHIFT, | 404 | map->memory.pages * 1ULL << PAGE_SHIFT, |
399 | map->id, | 405 | map->id, |
400 | READ_ONCE(map->frozen)); | 406 | READ_ONCE(map->frozen)); |
401 | 407 | ||
@@ -549,6 +555,7 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf, | |||
549 | static int map_create(union bpf_attr *attr) | 555 | static int map_create(union bpf_attr *attr) |
550 | { | 556 | { |
551 | int numa_node = bpf_map_attr_numa_node(attr); | 557 | int numa_node = bpf_map_attr_numa_node(attr); |
558 | struct bpf_map_memory mem; | ||
552 | struct bpf_map *map; | 559 | struct bpf_map *map; |
553 | int f_flags; | 560 | int f_flags; |
554 | int err; | 561 | int err; |
@@ -573,7 +580,7 @@ static int map_create(union bpf_attr *attr) | |||
573 | 580 | ||
574 | err = bpf_obj_name_cpy(map->name, attr->map_name); | 581 | err = bpf_obj_name_cpy(map->name, attr->map_name); |
575 | if (err) | 582 | if (err) |
576 | goto free_map_nouncharge; | 583 | goto free_map; |
577 | 584 | ||
578 | atomic_set(&map->refcnt, 1); | 585 | atomic_set(&map->refcnt, 1); |
579 | atomic_set(&map->usercnt, 1); | 586 | atomic_set(&map->usercnt, 1); |
@@ -583,20 +590,20 @@ static int map_create(union bpf_attr *attr) | |||
583 | 590 | ||
584 | if (!attr->btf_value_type_id) { | 591 | if (!attr->btf_value_type_id) { |
585 | err = -EINVAL; | 592 | err = -EINVAL; |
586 | goto free_map_nouncharge; | 593 | goto free_map; |
587 | } | 594 | } |
588 | 595 | ||
589 | btf = btf_get_by_fd(attr->btf_fd); | 596 | btf = btf_get_by_fd(attr->btf_fd); |
590 | if (IS_ERR(btf)) { | 597 | if (IS_ERR(btf)) { |
591 | err = PTR_ERR(btf); | 598 | err = PTR_ERR(btf); |
592 | goto free_map_nouncharge; | 599 | goto free_map; |
593 | } | 600 | } |
594 | 601 | ||
595 | err = map_check_btf(map, btf, attr->btf_key_type_id, | 602 | err = map_check_btf(map, btf, attr->btf_key_type_id, |
596 | attr->btf_value_type_id); | 603 | attr->btf_value_type_id); |
597 | if (err) { | 604 | if (err) { |
598 | btf_put(btf); | 605 | btf_put(btf); |
599 | goto free_map_nouncharge; | 606 | goto free_map; |
600 | } | 607 | } |
601 | 608 | ||
602 | map->btf = btf; | 609 | map->btf = btf; |
@@ -608,15 +615,11 @@ static int map_create(union bpf_attr *attr) | |||
608 | 615 | ||
609 | err = security_bpf_map_alloc(map); | 616 | err = security_bpf_map_alloc(map); |
610 | if (err) | 617 | if (err) |
611 | goto free_map_nouncharge; | 618 | goto free_map; |
612 | |||
613 | err = bpf_map_init_memlock(map); | ||
614 | if (err) | ||
615 | goto free_map_sec; | ||
616 | 619 | ||
617 | err = bpf_map_alloc_id(map); | 620 | err = bpf_map_alloc_id(map); |
618 | if (err) | 621 | if (err) |
619 | goto free_map; | 622 | goto free_map_sec; |
620 | 623 | ||
621 | err = bpf_map_new_fd(map, f_flags); | 624 | err = bpf_map_new_fd(map, f_flags); |
622 | if (err < 0) { | 625 | if (err < 0) { |
@@ -632,13 +635,13 @@ static int map_create(union bpf_attr *attr) | |||
632 | 635 | ||
633 | return err; | 636 | return err; |
634 | 637 | ||
635 | free_map: | ||
636 | bpf_map_release_memlock(map); | ||
637 | free_map_sec: | 638 | free_map_sec: |
638 | security_bpf_map_free(map); | 639 | security_bpf_map_free(map); |
639 | free_map_nouncharge: | 640 | free_map: |
640 | btf_put(map->btf); | 641 | btf_put(map->btf); |
642 | bpf_map_charge_move(&mem, &map->memory); | ||
641 | map->ops->map_free(map); | 643 | map->ops->map_free(map); |
644 | bpf_map_charge_finish(&mem); | ||
642 | return err; | 645 | return err; |
643 | } | 646 | } |
644 | 647 | ||
@@ -1585,6 +1588,14 @@ bpf_prog_load_check_attach_type(enum bpf_prog_type prog_type, | |||
1585 | default: | 1588 | default: |
1586 | return -EINVAL; | 1589 | return -EINVAL; |
1587 | } | 1590 | } |
1591 | case BPF_PROG_TYPE_CGROUP_SKB: | ||
1592 | switch (expected_attach_type) { | ||
1593 | case BPF_CGROUP_INET_INGRESS: | ||
1594 | case BPF_CGROUP_INET_EGRESS: | ||
1595 | return 0; | ||
1596 | default: | ||
1597 | return -EINVAL; | ||
1598 | } | ||
1588 | default: | 1599 | default: |
1589 | return 0; | 1600 | return 0; |
1590 | } | 1601 | } |
@@ -1604,7 +1615,9 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr) | |||
1604 | if (CHECK_ATTR(BPF_PROG_LOAD)) | 1615 | if (CHECK_ATTR(BPF_PROG_LOAD)) |
1605 | return -EINVAL; | 1616 | return -EINVAL; |
1606 | 1617 | ||
1607 | if (attr->prog_flags & ~(BPF_F_STRICT_ALIGNMENT | BPF_F_ANY_ALIGNMENT)) | 1618 | if (attr->prog_flags & ~(BPF_F_STRICT_ALIGNMENT | |
1619 | BPF_F_ANY_ALIGNMENT | | ||
1620 | BPF_F_TEST_RND_HI32)) | ||
1608 | return -EINVAL; | 1621 | return -EINVAL; |
1609 | 1622 | ||
1610 | if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && | 1623 | if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && |
@@ -1834,6 +1847,10 @@ static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog, | |||
1834 | case BPF_PROG_TYPE_CGROUP_SOCK: | 1847 | case BPF_PROG_TYPE_CGROUP_SOCK: |
1835 | case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: | 1848 | case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: |
1836 | return attach_type == prog->expected_attach_type ? 0 : -EINVAL; | 1849 | return attach_type == prog->expected_attach_type ? 0 : -EINVAL; |
1850 | case BPF_PROG_TYPE_CGROUP_SKB: | ||
1851 | return prog->enforce_expected_attach_type && | ||
1852 | prog->expected_attach_type != attach_type ? | ||
1853 | -EINVAL : 0; | ||
1837 | default: | 1854 | default: |
1838 | return 0; | 1855 | return 0; |
1839 | } | 1856 | } |
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 95f9354495ad..5c2cb5bd84ce 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c | |||
@@ -176,7 +176,7 @@ struct bpf_verifier_stack_elem { | |||
176 | struct bpf_verifier_stack_elem *next; | 176 | struct bpf_verifier_stack_elem *next; |
177 | }; | 177 | }; |
178 | 178 | ||
179 | #define BPF_COMPLEXITY_LIMIT_STACK 1024 | 179 | #define BPF_COMPLEXITY_LIMIT_JMP_SEQ 8192 |
180 | #define BPF_COMPLEXITY_LIMIT_STATES 64 | 180 | #define BPF_COMPLEXITY_LIMIT_STATES 64 |
181 | 181 | ||
182 | #define BPF_MAP_PTR_UNPRIV 1UL | 182 | #define BPF_MAP_PTR_UNPRIV 1UL |
@@ -782,8 +782,9 @@ static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env, | |||
782 | if (err) | 782 | if (err) |
783 | goto err; | 783 | goto err; |
784 | elem->st.speculative |= speculative; | 784 | elem->st.speculative |= speculative; |
785 | if (env->stack_size > BPF_COMPLEXITY_LIMIT_STACK) { | 785 | if (env->stack_size > BPF_COMPLEXITY_LIMIT_JMP_SEQ) { |
786 | verbose(env, "BPF program is too complex\n"); | 786 | verbose(env, "The sequence of %d jumps is too complex.\n", |
787 | env->stack_size); | ||
787 | goto err; | 788 | goto err; |
788 | } | 789 | } |
789 | return &elem->st; | 790 | return &elem->st; |
@@ -981,6 +982,7 @@ static void mark_reg_not_init(struct bpf_verifier_env *env, | |||
981 | __mark_reg_not_init(regs + regno); | 982 | __mark_reg_not_init(regs + regno); |
982 | } | 983 | } |
983 | 984 | ||
985 | #define DEF_NOT_SUBREG (0) | ||
984 | static void init_reg_state(struct bpf_verifier_env *env, | 986 | static void init_reg_state(struct bpf_verifier_env *env, |
985 | struct bpf_func_state *state) | 987 | struct bpf_func_state *state) |
986 | { | 988 | { |
@@ -991,6 +993,7 @@ static void init_reg_state(struct bpf_verifier_env *env, | |||
991 | mark_reg_not_init(env, regs, i); | 993 | mark_reg_not_init(env, regs, i); |
992 | regs[i].live = REG_LIVE_NONE; | 994 | regs[i].live = REG_LIVE_NONE; |
993 | regs[i].parent = NULL; | 995 | regs[i].parent = NULL; |
996 | regs[i].subreg_def = DEF_NOT_SUBREG; | ||
994 | } | 997 | } |
995 | 998 | ||
996 | /* frame pointer */ | 999 | /* frame pointer */ |
@@ -1136,7 +1139,7 @@ next: | |||
1136 | */ | 1139 | */ |
1137 | static int mark_reg_read(struct bpf_verifier_env *env, | 1140 | static int mark_reg_read(struct bpf_verifier_env *env, |
1138 | const struct bpf_reg_state *state, | 1141 | const struct bpf_reg_state *state, |
1139 | struct bpf_reg_state *parent) | 1142 | struct bpf_reg_state *parent, u8 flag) |
1140 | { | 1143 | { |
1141 | bool writes = parent == state->parent; /* Observe write marks */ | 1144 | bool writes = parent == state->parent; /* Observe write marks */ |
1142 | int cnt = 0; | 1145 | int cnt = 0; |
@@ -1151,17 +1154,26 @@ static int mark_reg_read(struct bpf_verifier_env *env, | |||
1151 | parent->var_off.value, parent->off); | 1154 | parent->var_off.value, parent->off); |
1152 | return -EFAULT; | 1155 | return -EFAULT; |
1153 | } | 1156 | } |
1154 | if (parent->live & REG_LIVE_READ) | 1157 | /* The first condition is more likely to be true than the |
1158 | * second, checked it first. | ||
1159 | */ | ||
1160 | if ((parent->live & REG_LIVE_READ) == flag || | ||
1161 | parent->live & REG_LIVE_READ64) | ||
1155 | /* The parentage chain never changes and | 1162 | /* The parentage chain never changes and |
1156 | * this parent was already marked as LIVE_READ. | 1163 | * this parent was already marked as LIVE_READ. |
1157 | * There is no need to keep walking the chain again and | 1164 | * There is no need to keep walking the chain again and |
1158 | * keep re-marking all parents as LIVE_READ. | 1165 | * keep re-marking all parents as LIVE_READ. |
1159 | * This case happens when the same register is read | 1166 | * This case happens when the same register is read |
1160 | * multiple times without writes into it in-between. | 1167 | * multiple times without writes into it in-between. |
1168 | * Also, if parent has the stronger REG_LIVE_READ64 set, | ||
1169 | * then no need to set the weak REG_LIVE_READ32. | ||
1161 | */ | 1170 | */ |
1162 | break; | 1171 | break; |
1163 | /* ... then we depend on parent's value */ | 1172 | /* ... then we depend on parent's value */ |
1164 | parent->live |= REG_LIVE_READ; | 1173 | parent->live |= flag; |
1174 | /* REG_LIVE_READ64 overrides REG_LIVE_READ32. */ | ||
1175 | if (flag == REG_LIVE_READ64) | ||
1176 | parent->live &= ~REG_LIVE_READ32; | ||
1165 | state = parent; | 1177 | state = parent; |
1166 | parent = state->parent; | 1178 | parent = state->parent; |
1167 | writes = true; | 1179 | writes = true; |
@@ -1173,12 +1185,129 @@ static int mark_reg_read(struct bpf_verifier_env *env, | |||
1173 | return 0; | 1185 | return 0; |
1174 | } | 1186 | } |
1175 | 1187 | ||
1188 | /* This function is supposed to be used by the following 32-bit optimization | ||
1189 | * code only. It returns TRUE if the source or destination register operates | ||
1190 | * on 64-bit, otherwise return FALSE. | ||
1191 | */ | ||
1192 | static bool is_reg64(struct bpf_verifier_env *env, struct bpf_insn *insn, | ||
1193 | u32 regno, struct bpf_reg_state *reg, enum reg_arg_type t) | ||
1194 | { | ||
1195 | u8 code, class, op; | ||
1196 | |||
1197 | code = insn->code; | ||
1198 | class = BPF_CLASS(code); | ||
1199 | op = BPF_OP(code); | ||
1200 | if (class == BPF_JMP) { | ||
1201 | /* BPF_EXIT for "main" will reach here. Return TRUE | ||
1202 | * conservatively. | ||
1203 | */ | ||
1204 | if (op == BPF_EXIT) | ||
1205 | return true; | ||
1206 | if (op == BPF_CALL) { | ||
1207 | /* BPF to BPF call will reach here because of marking | ||
1208 | * caller saved clobber with DST_OP_NO_MARK for which we | ||
1209 | * don't care the register def because they are anyway | ||
1210 | * marked as NOT_INIT already. | ||
1211 | */ | ||
1212 | if (insn->src_reg == BPF_PSEUDO_CALL) | ||
1213 | return false; | ||
1214 | /* Helper call will reach here because of arg type | ||
1215 | * check, conservatively return TRUE. | ||
1216 | */ | ||
1217 | if (t == SRC_OP) | ||
1218 | return true; | ||
1219 | |||
1220 | return false; | ||
1221 | } | ||
1222 | } | ||
1223 | |||
1224 | if (class == BPF_ALU64 || class == BPF_JMP || | ||
1225 | /* BPF_END always use BPF_ALU class. */ | ||
1226 | (class == BPF_ALU && op == BPF_END && insn->imm == 64)) | ||
1227 | return true; | ||
1228 | |||
1229 | if (class == BPF_ALU || class == BPF_JMP32) | ||
1230 | return false; | ||
1231 | |||
1232 | if (class == BPF_LDX) { | ||
1233 | if (t != SRC_OP) | ||
1234 | return BPF_SIZE(code) == BPF_DW; | ||
1235 | /* LDX source must be ptr. */ | ||
1236 | return true; | ||
1237 | } | ||
1238 | |||
1239 | if (class == BPF_STX) { | ||
1240 | if (reg->type != SCALAR_VALUE) | ||
1241 | return true; | ||
1242 | return BPF_SIZE(code) == BPF_DW; | ||
1243 | } | ||
1244 | |||
1245 | if (class == BPF_LD) { | ||
1246 | u8 mode = BPF_MODE(code); | ||
1247 | |||
1248 | /* LD_IMM64 */ | ||
1249 | if (mode == BPF_IMM) | ||
1250 | return true; | ||
1251 | |||
1252 | /* Both LD_IND and LD_ABS return 32-bit data. */ | ||
1253 | if (t != SRC_OP) | ||
1254 | return false; | ||
1255 | |||
1256 | /* Implicit ctx ptr. */ | ||
1257 | if (regno == BPF_REG_6) | ||
1258 | return true; | ||
1259 | |||
1260 | /* Explicit source could be any width. */ | ||
1261 | return true; | ||
1262 | } | ||
1263 | |||
1264 | if (class == BPF_ST) | ||
1265 | /* The only source register for BPF_ST is a ptr. */ | ||
1266 | return true; | ||
1267 | |||
1268 | /* Conservatively return true at default. */ | ||
1269 | return true; | ||
1270 | } | ||
1271 | |||
1272 | /* Return TRUE if INSN doesn't have explicit value define. */ | ||
1273 | static bool insn_no_def(struct bpf_insn *insn) | ||
1274 | { | ||
1275 | u8 class = BPF_CLASS(insn->code); | ||
1276 | |||
1277 | return (class == BPF_JMP || class == BPF_JMP32 || | ||
1278 | class == BPF_STX || class == BPF_ST); | ||
1279 | } | ||
1280 | |||
1281 | /* Return TRUE if INSN has defined any 32-bit value explicitly. */ | ||
1282 | static bool insn_has_def32(struct bpf_verifier_env *env, struct bpf_insn *insn) | ||
1283 | { | ||
1284 | if (insn_no_def(insn)) | ||
1285 | return false; | ||
1286 | |||
1287 | return !is_reg64(env, insn, insn->dst_reg, NULL, DST_OP); | ||
1288 | } | ||
1289 | |||
1290 | static void mark_insn_zext(struct bpf_verifier_env *env, | ||
1291 | struct bpf_reg_state *reg) | ||
1292 | { | ||
1293 | s32 def_idx = reg->subreg_def; | ||
1294 | |||
1295 | if (def_idx == DEF_NOT_SUBREG) | ||
1296 | return; | ||
1297 | |||
1298 | env->insn_aux_data[def_idx - 1].zext_dst = true; | ||
1299 | /* The dst will be zero extended, so won't be sub-register anymore. */ | ||
1300 | reg->subreg_def = DEF_NOT_SUBREG; | ||
1301 | } | ||
1302 | |||
1176 | static int check_reg_arg(struct bpf_verifier_env *env, u32 regno, | 1303 | static int check_reg_arg(struct bpf_verifier_env *env, u32 regno, |
1177 | enum reg_arg_type t) | 1304 | enum reg_arg_type t) |
1178 | { | 1305 | { |
1179 | struct bpf_verifier_state *vstate = env->cur_state; | 1306 | struct bpf_verifier_state *vstate = env->cur_state; |
1180 | struct bpf_func_state *state = vstate->frame[vstate->curframe]; | 1307 | struct bpf_func_state *state = vstate->frame[vstate->curframe]; |
1308 | struct bpf_insn *insn = env->prog->insnsi + env->insn_idx; | ||
1181 | struct bpf_reg_state *reg, *regs = state->regs; | 1309 | struct bpf_reg_state *reg, *regs = state->regs; |
1310 | bool rw64; | ||
1182 | 1311 | ||
1183 | if (regno >= MAX_BPF_REG) { | 1312 | if (regno >= MAX_BPF_REG) { |
1184 | verbose(env, "R%d is invalid\n", regno); | 1313 | verbose(env, "R%d is invalid\n", regno); |
@@ -1186,6 +1315,7 @@ static int check_reg_arg(struct bpf_verifier_env *env, u32 regno, | |||
1186 | } | 1315 | } |
1187 | 1316 | ||
1188 | reg = ®s[regno]; | 1317 | reg = ®s[regno]; |
1318 | rw64 = is_reg64(env, insn, regno, reg, t); | ||
1189 | if (t == SRC_OP) { | 1319 | if (t == SRC_OP) { |
1190 | /* check whether register used as source operand can be read */ | 1320 | /* check whether register used as source operand can be read */ |
1191 | if (reg->type == NOT_INIT) { | 1321 | if (reg->type == NOT_INIT) { |
@@ -1196,7 +1326,11 @@ static int check_reg_arg(struct bpf_verifier_env *env, u32 regno, | |||
1196 | if (regno == BPF_REG_FP) | 1326 | if (regno == BPF_REG_FP) |
1197 | return 0; | 1327 | return 0; |
1198 | 1328 | ||
1199 | return mark_reg_read(env, reg, reg->parent); | 1329 | if (rw64) |
1330 | mark_insn_zext(env, reg); | ||
1331 | |||
1332 | return mark_reg_read(env, reg, reg->parent, | ||
1333 | rw64 ? REG_LIVE_READ64 : REG_LIVE_READ32); | ||
1200 | } else { | 1334 | } else { |
1201 | /* check whether register used as dest operand can be written to */ | 1335 | /* check whether register used as dest operand can be written to */ |
1202 | if (regno == BPF_REG_FP) { | 1336 | if (regno == BPF_REG_FP) { |
@@ -1204,6 +1338,7 @@ static int check_reg_arg(struct bpf_verifier_env *env, u32 regno, | |||
1204 | return -EACCES; | 1338 | return -EACCES; |
1205 | } | 1339 | } |
1206 | reg->live |= REG_LIVE_WRITTEN; | 1340 | reg->live |= REG_LIVE_WRITTEN; |
1341 | reg->subreg_def = rw64 ? DEF_NOT_SUBREG : env->insn_idx + 1; | ||
1207 | if (t == DST_OP) | 1342 | if (t == DST_OP) |
1208 | mark_reg_unknown(env, regs, regno); | 1343 | mark_reg_unknown(env, regs, regno); |
1209 | } | 1344 | } |
@@ -1383,7 +1518,8 @@ static int check_stack_read(struct bpf_verifier_env *env, | |||
1383 | state->regs[value_regno].live |= REG_LIVE_WRITTEN; | 1518 | state->regs[value_regno].live |= REG_LIVE_WRITTEN; |
1384 | } | 1519 | } |
1385 | mark_reg_read(env, ®_state->stack[spi].spilled_ptr, | 1520 | mark_reg_read(env, ®_state->stack[spi].spilled_ptr, |
1386 | reg_state->stack[spi].spilled_ptr.parent); | 1521 | reg_state->stack[spi].spilled_ptr.parent, |
1522 | REG_LIVE_READ64); | ||
1387 | return 0; | 1523 | return 0; |
1388 | } else { | 1524 | } else { |
1389 | int zeros = 0; | 1525 | int zeros = 0; |
@@ -1400,7 +1536,8 @@ static int check_stack_read(struct bpf_verifier_env *env, | |||
1400 | return -EACCES; | 1536 | return -EACCES; |
1401 | } | 1537 | } |
1402 | mark_reg_read(env, ®_state->stack[spi].spilled_ptr, | 1538 | mark_reg_read(env, ®_state->stack[spi].spilled_ptr, |
1403 | reg_state->stack[spi].spilled_ptr.parent); | 1539 | reg_state->stack[spi].spilled_ptr.parent, |
1540 | REG_LIVE_READ64); | ||
1404 | if (value_regno >= 0) { | 1541 | if (value_regno >= 0) { |
1405 | if (zeros == size) { | 1542 | if (zeros == size) { |
1406 | /* any size read into register is zero extended, | 1543 | /* any size read into register is zero extended, |
@@ -2109,6 +2246,12 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn | |||
2109 | value_regno); | 2246 | value_regno); |
2110 | if (reg_type_may_be_null(reg_type)) | 2247 | if (reg_type_may_be_null(reg_type)) |
2111 | regs[value_regno].id = ++env->id_gen; | 2248 | regs[value_regno].id = ++env->id_gen; |
2249 | /* A load of ctx field could have different | ||
2250 | * actual load size with the one encoded in the | ||
2251 | * insn. When the dst is PTR, it is for sure not | ||
2252 | * a sub-register. | ||
2253 | */ | ||
2254 | regs[value_regno].subreg_def = DEF_NOT_SUBREG; | ||
2112 | } | 2255 | } |
2113 | regs[value_regno].type = reg_type; | 2256 | regs[value_regno].type = reg_type; |
2114 | } | 2257 | } |
@@ -2368,7 +2511,8 @@ mark: | |||
2368 | * the whole slot to be marked as 'read' | 2511 | * the whole slot to be marked as 'read' |
2369 | */ | 2512 | */ |
2370 | mark_reg_read(env, &state->stack[spi].spilled_ptr, | 2513 | mark_reg_read(env, &state->stack[spi].spilled_ptr, |
2371 | state->stack[spi].spilled_ptr.parent); | 2514 | state->stack[spi].spilled_ptr.parent, |
2515 | REG_LIVE_READ64); | ||
2372 | } | 2516 | } |
2373 | return update_stack_depth(env, state, min_off); | 2517 | return update_stack_depth(env, state, min_off); |
2374 | } | 2518 | } |
@@ -3332,6 +3476,9 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn | |||
3332 | check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK); | 3476 | check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK); |
3333 | } | 3477 | } |
3334 | 3478 | ||
3479 | /* helper call returns 64-bit value. */ | ||
3480 | regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG; | ||
3481 | |||
3335 | /* update return register (already marked as written above) */ | 3482 | /* update return register (already marked as written above) */ |
3336 | if (fn->ret_type == RET_INTEGER) { | 3483 | if (fn->ret_type == RET_INTEGER) { |
3337 | /* sets type to SCALAR_VALUE */ | 3484 | /* sets type to SCALAR_VALUE */ |
@@ -4263,6 +4410,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) | |||
4263 | */ | 4410 | */ |
4264 | *dst_reg = *src_reg; | 4411 | *dst_reg = *src_reg; |
4265 | dst_reg->live |= REG_LIVE_WRITTEN; | 4412 | dst_reg->live |= REG_LIVE_WRITTEN; |
4413 | dst_reg->subreg_def = DEF_NOT_SUBREG; | ||
4266 | } else { | 4414 | } else { |
4267 | /* R1 = (u32) R2 */ | 4415 | /* R1 = (u32) R2 */ |
4268 | if (is_pointer_value(env, insn->src_reg)) { | 4416 | if (is_pointer_value(env, insn->src_reg)) { |
@@ -4273,6 +4421,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) | |||
4273 | } else if (src_reg->type == SCALAR_VALUE) { | 4421 | } else if (src_reg->type == SCALAR_VALUE) { |
4274 | *dst_reg = *src_reg; | 4422 | *dst_reg = *src_reg; |
4275 | dst_reg->live |= REG_LIVE_WRITTEN; | 4423 | dst_reg->live |= REG_LIVE_WRITTEN; |
4424 | dst_reg->subreg_def = env->insn_idx + 1; | ||
4276 | } else { | 4425 | } else { |
4277 | mark_reg_unknown(env, regs, | 4426 | mark_reg_unknown(env, regs, |
4278 | insn->dst_reg); | 4427 | insn->dst_reg); |
@@ -5352,16 +5501,23 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn) | |||
5352 | * Already marked as written above. | 5501 | * Already marked as written above. |
5353 | */ | 5502 | */ |
5354 | mark_reg_unknown(env, regs, BPF_REG_0); | 5503 | mark_reg_unknown(env, regs, BPF_REG_0); |
5504 | /* ld_abs load up to 32-bit skb data. */ | ||
5505 | regs[BPF_REG_0].subreg_def = env->insn_idx + 1; | ||
5355 | return 0; | 5506 | return 0; |
5356 | } | 5507 | } |
5357 | 5508 | ||
5358 | static int check_return_code(struct bpf_verifier_env *env) | 5509 | static int check_return_code(struct bpf_verifier_env *env) |
5359 | { | 5510 | { |
5511 | struct tnum enforce_attach_type_range = tnum_unknown; | ||
5360 | struct bpf_reg_state *reg; | 5512 | struct bpf_reg_state *reg; |
5361 | struct tnum range = tnum_range(0, 1); | 5513 | struct tnum range = tnum_range(0, 1); |
5362 | 5514 | ||
5363 | switch (env->prog->type) { | 5515 | switch (env->prog->type) { |
5364 | case BPF_PROG_TYPE_CGROUP_SKB: | 5516 | case BPF_PROG_TYPE_CGROUP_SKB: |
5517 | if (env->prog->expected_attach_type == BPF_CGROUP_INET_EGRESS) { | ||
5518 | range = tnum_range(0, 3); | ||
5519 | enforce_attach_type_range = tnum_range(2, 3); | ||
5520 | } | ||
5365 | case BPF_PROG_TYPE_CGROUP_SOCK: | 5521 | case BPF_PROG_TYPE_CGROUP_SOCK: |
5366 | case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: | 5522 | case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: |
5367 | case BPF_PROG_TYPE_SOCK_OPS: | 5523 | case BPF_PROG_TYPE_SOCK_OPS: |
@@ -5380,18 +5536,23 @@ static int check_return_code(struct bpf_verifier_env *env) | |||
5380 | } | 5536 | } |
5381 | 5537 | ||
5382 | if (!tnum_in(range, reg->var_off)) { | 5538 | if (!tnum_in(range, reg->var_off)) { |
5539 | char tn_buf[48]; | ||
5540 | |||
5383 | verbose(env, "At program exit the register R0 "); | 5541 | verbose(env, "At program exit the register R0 "); |
5384 | if (!tnum_is_unknown(reg->var_off)) { | 5542 | if (!tnum_is_unknown(reg->var_off)) { |
5385 | char tn_buf[48]; | ||
5386 | |||
5387 | tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); | 5543 | tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); |
5388 | verbose(env, "has value %s", tn_buf); | 5544 | verbose(env, "has value %s", tn_buf); |
5389 | } else { | 5545 | } else { |
5390 | verbose(env, "has unknown scalar value"); | 5546 | verbose(env, "has unknown scalar value"); |
5391 | } | 5547 | } |
5392 | verbose(env, " should have been 0 or 1\n"); | 5548 | tnum_strn(tn_buf, sizeof(tn_buf), range); |
5549 | verbose(env, " should have been %s\n", tn_buf); | ||
5393 | return -EINVAL; | 5550 | return -EINVAL; |
5394 | } | 5551 | } |
5552 | |||
5553 | if (!tnum_is_unknown(enforce_attach_type_range) && | ||
5554 | tnum_in(enforce_attach_type_range, reg->var_off)) | ||
5555 | env->prog->enforce_expected_attach_type = 1; | ||
5395 | return 0; | 5556 | return 0; |
5396 | } | 5557 | } |
5397 | 5558 | ||
@@ -5435,7 +5596,25 @@ enum { | |||
5435 | BRANCH = 2, | 5596 | BRANCH = 2, |
5436 | }; | 5597 | }; |
5437 | 5598 | ||
5438 | #define STATE_LIST_MARK ((struct bpf_verifier_state_list *) -1L) | 5599 | static u32 state_htab_size(struct bpf_verifier_env *env) |
5600 | { | ||
5601 | return env->prog->len; | ||
5602 | } | ||
5603 | |||
5604 | static struct bpf_verifier_state_list **explored_state( | ||
5605 | struct bpf_verifier_env *env, | ||
5606 | int idx) | ||
5607 | { | ||
5608 | struct bpf_verifier_state *cur = env->cur_state; | ||
5609 | struct bpf_func_state *state = cur->frame[cur->curframe]; | ||
5610 | |||
5611 | return &env->explored_states[(idx ^ state->callsite) % state_htab_size(env)]; | ||
5612 | } | ||
5613 | |||
5614 | static void init_explored_state(struct bpf_verifier_env *env, int idx) | ||
5615 | { | ||
5616 | env->insn_aux_data[idx].prune_point = true; | ||
5617 | } | ||
5439 | 5618 | ||
5440 | /* t, w, e - match pseudo-code above: | 5619 | /* t, w, e - match pseudo-code above: |
5441 | * t - index of current instruction | 5620 | * t - index of current instruction |
@@ -5461,7 +5640,7 @@ static int push_insn(int t, int w, int e, struct bpf_verifier_env *env) | |||
5461 | 5640 | ||
5462 | if (e == BRANCH) | 5641 | if (e == BRANCH) |
5463 | /* mark branch target for state pruning */ | 5642 | /* mark branch target for state pruning */ |
5464 | env->explored_states[w] = STATE_LIST_MARK; | 5643 | init_explored_state(env, w); |
5465 | 5644 | ||
5466 | if (insn_state[w] == 0) { | 5645 | if (insn_state[w] == 0) { |
5467 | /* tree-edge */ | 5646 | /* tree-edge */ |
@@ -5529,9 +5708,9 @@ peek_stack: | |||
5529 | else if (ret < 0) | 5708 | else if (ret < 0) |
5530 | goto err_free; | 5709 | goto err_free; |
5531 | if (t + 1 < insn_cnt) | 5710 | if (t + 1 < insn_cnt) |
5532 | env->explored_states[t + 1] = STATE_LIST_MARK; | 5711 | init_explored_state(env, t + 1); |
5533 | if (insns[t].src_reg == BPF_PSEUDO_CALL) { | 5712 | if (insns[t].src_reg == BPF_PSEUDO_CALL) { |
5534 | env->explored_states[t] = STATE_LIST_MARK; | 5713 | init_explored_state(env, t); |
5535 | ret = push_insn(t, t + insns[t].imm + 1, BRANCH, env); | 5714 | ret = push_insn(t, t + insns[t].imm + 1, BRANCH, env); |
5536 | if (ret == 1) | 5715 | if (ret == 1) |
5537 | goto peek_stack; | 5716 | goto peek_stack; |
@@ -5554,10 +5733,10 @@ peek_stack: | |||
5554 | * after every call and jump | 5733 | * after every call and jump |
5555 | */ | 5734 | */ |
5556 | if (t + 1 < insn_cnt) | 5735 | if (t + 1 < insn_cnt) |
5557 | env->explored_states[t + 1] = STATE_LIST_MARK; | 5736 | init_explored_state(env, t + 1); |
5558 | } else { | 5737 | } else { |
5559 | /* conditional jump with two edges */ | 5738 | /* conditional jump with two edges */ |
5560 | env->explored_states[t] = STATE_LIST_MARK; | 5739 | init_explored_state(env, t); |
5561 | ret = push_insn(t, t + 1, FALLTHROUGH, env); | 5740 | ret = push_insn(t, t + 1, FALLTHROUGH, env); |
5562 | if (ret == 1) | 5741 | if (ret == 1) |
5563 | goto peek_stack; | 5742 | goto peek_stack; |
@@ -6005,12 +6184,10 @@ static void clean_live_states(struct bpf_verifier_env *env, int insn, | |||
6005 | struct bpf_verifier_state_list *sl; | 6184 | struct bpf_verifier_state_list *sl; |
6006 | int i; | 6185 | int i; |
6007 | 6186 | ||
6008 | sl = env->explored_states[insn]; | 6187 | sl = *explored_state(env, insn); |
6009 | if (!sl) | 6188 | while (sl) { |
6010 | return; | 6189 | if (sl->state.insn_idx != insn || |
6011 | 6190 | sl->state.curframe != cur->curframe) | |
6012 | while (sl != STATE_LIST_MARK) { | ||
6013 | if (sl->state.curframe != cur->curframe) | ||
6014 | goto next; | 6191 | goto next; |
6015 | for (i = 0; i <= cur->curframe; i++) | 6192 | for (i = 0; i <= cur->curframe; i++) |
6016 | if (sl->state.frame[i]->callsite != cur->frame[i]->callsite) | 6193 | if (sl->state.frame[i]->callsite != cur->frame[i]->callsite) |
@@ -6292,20 +6469,33 @@ static bool states_equal(struct bpf_verifier_env *env, | |||
6292 | return true; | 6469 | return true; |
6293 | } | 6470 | } |
6294 | 6471 | ||
6472 | /* Return 0 if no propagation happened. Return negative error code if error | ||
6473 | * happened. Otherwise, return the propagated bit. | ||
6474 | */ | ||
6295 | static int propagate_liveness_reg(struct bpf_verifier_env *env, | 6475 | static int propagate_liveness_reg(struct bpf_verifier_env *env, |
6296 | struct bpf_reg_state *reg, | 6476 | struct bpf_reg_state *reg, |
6297 | struct bpf_reg_state *parent_reg) | 6477 | struct bpf_reg_state *parent_reg) |
6298 | { | 6478 | { |
6479 | u8 parent_flag = parent_reg->live & REG_LIVE_READ; | ||
6480 | u8 flag = reg->live & REG_LIVE_READ; | ||
6299 | int err; | 6481 | int err; |
6300 | 6482 | ||
6301 | if (parent_reg->live & REG_LIVE_READ || !(reg->live & REG_LIVE_READ)) | 6483 | /* When comes here, read flags of PARENT_REG or REG could be any of |
6484 | * REG_LIVE_READ64, REG_LIVE_READ32, REG_LIVE_NONE. There is no need | ||
6485 | * of propagation if PARENT_REG has strongest REG_LIVE_READ64. | ||
6486 | */ | ||
6487 | if (parent_flag == REG_LIVE_READ64 || | ||
6488 | /* Or if there is no read flag from REG. */ | ||
6489 | !flag || | ||
6490 | /* Or if the read flag from REG is the same as PARENT_REG. */ | ||
6491 | parent_flag == flag) | ||
6302 | return 0; | 6492 | return 0; |
6303 | 6493 | ||
6304 | err = mark_reg_read(env, reg, parent_reg); | 6494 | err = mark_reg_read(env, reg, parent_reg, flag); |
6305 | if (err) | 6495 | if (err) |
6306 | return err; | 6496 | return err; |
6307 | 6497 | ||
6308 | return 0; | 6498 | return flag; |
6309 | } | 6499 | } |
6310 | 6500 | ||
6311 | /* A write screens off any subsequent reads; but write marks come from the | 6501 | /* A write screens off any subsequent reads; but write marks come from the |
@@ -6339,8 +6529,10 @@ static int propagate_liveness(struct bpf_verifier_env *env, | |||
6339 | for (i = frame < vstate->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++) { | 6529 | for (i = frame < vstate->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++) { |
6340 | err = propagate_liveness_reg(env, &state_reg[i], | 6530 | err = propagate_liveness_reg(env, &state_reg[i], |
6341 | &parent_reg[i]); | 6531 | &parent_reg[i]); |
6342 | if (err) | 6532 | if (err < 0) |
6343 | return err; | 6533 | return err; |
6534 | if (err == REG_LIVE_READ64) | ||
6535 | mark_insn_zext(env, &parent_reg[i]); | ||
6344 | } | 6536 | } |
6345 | 6537 | ||
6346 | /* Propagate stack slots. */ | 6538 | /* Propagate stack slots. */ |
@@ -6350,11 +6542,11 @@ static int propagate_liveness(struct bpf_verifier_env *env, | |||
6350 | state_reg = &state->stack[i].spilled_ptr; | 6542 | state_reg = &state->stack[i].spilled_ptr; |
6351 | err = propagate_liveness_reg(env, state_reg, | 6543 | err = propagate_liveness_reg(env, state_reg, |
6352 | parent_reg); | 6544 | parent_reg); |
6353 | if (err) | 6545 | if (err < 0) |
6354 | return err; | 6546 | return err; |
6355 | } | 6547 | } |
6356 | } | 6548 | } |
6357 | return err; | 6549 | return 0; |
6358 | } | 6550 | } |
6359 | 6551 | ||
6360 | static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) | 6552 | static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) |
@@ -6364,18 +6556,21 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) | |||
6364 | struct bpf_verifier_state *cur = env->cur_state, *new; | 6556 | struct bpf_verifier_state *cur = env->cur_state, *new; |
6365 | int i, j, err, states_cnt = 0; | 6557 | int i, j, err, states_cnt = 0; |
6366 | 6558 | ||
6367 | pprev = &env->explored_states[insn_idx]; | 6559 | if (!env->insn_aux_data[insn_idx].prune_point) |
6368 | sl = *pprev; | ||
6369 | |||
6370 | if (!sl) | ||
6371 | /* this 'insn_idx' instruction wasn't marked, so we will not | 6560 | /* this 'insn_idx' instruction wasn't marked, so we will not |
6372 | * be doing state search here | 6561 | * be doing state search here |
6373 | */ | 6562 | */ |
6374 | return 0; | 6563 | return 0; |
6375 | 6564 | ||
6565 | pprev = explored_state(env, insn_idx); | ||
6566 | sl = *pprev; | ||
6567 | |||
6376 | clean_live_states(env, insn_idx, cur); | 6568 | clean_live_states(env, insn_idx, cur); |
6377 | 6569 | ||
6378 | while (sl != STATE_LIST_MARK) { | 6570 | while (sl) { |
6571 | states_cnt++; | ||
6572 | if (sl->state.insn_idx != insn_idx) | ||
6573 | goto next; | ||
6379 | if (states_equal(env, &sl->state, cur)) { | 6574 | if (states_equal(env, &sl->state, cur)) { |
6380 | sl->hit_cnt++; | 6575 | sl->hit_cnt++; |
6381 | /* reached equivalent register/stack state, | 6576 | /* reached equivalent register/stack state, |
@@ -6393,7 +6588,6 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) | |||
6393 | return err; | 6588 | return err; |
6394 | return 1; | 6589 | return 1; |
6395 | } | 6590 | } |
6396 | states_cnt++; | ||
6397 | sl->miss_cnt++; | 6591 | sl->miss_cnt++; |
6398 | /* heuristic to determine whether this state is beneficial | 6592 | /* heuristic to determine whether this state is beneficial |
6399 | * to keep checking from state equivalence point of view. | 6593 | * to keep checking from state equivalence point of view. |
@@ -6420,6 +6614,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) | |||
6420 | sl = *pprev; | 6614 | sl = *pprev; |
6421 | continue; | 6615 | continue; |
6422 | } | 6616 | } |
6617 | next: | ||
6423 | pprev = &sl->next; | 6618 | pprev = &sl->next; |
6424 | sl = *pprev; | 6619 | sl = *pprev; |
6425 | } | 6620 | } |
@@ -6451,8 +6646,9 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) | |||
6451 | kfree(new_sl); | 6646 | kfree(new_sl); |
6452 | return err; | 6647 | return err; |
6453 | } | 6648 | } |
6454 | new_sl->next = env->explored_states[insn_idx]; | 6649 | new->insn_idx = insn_idx; |
6455 | env->explored_states[insn_idx] = new_sl; | 6650 | new_sl->next = *explored_state(env, insn_idx); |
6651 | *explored_state(env, insn_idx) = new_sl; | ||
6456 | /* connect new state to parentage chain. Current frame needs all | 6652 | /* connect new state to parentage chain. Current frame needs all |
6457 | * registers connected. Only r6 - r9 of the callers are alive (pushed | 6653 | * registers connected. Only r6 - r9 of the callers are alive (pushed |
6458 | * to the stack implicitly by JITs) so in callers' frames connect just | 6654 | * to the stack implicitly by JITs) so in callers' frames connect just |
@@ -7130,14 +7326,23 @@ static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env) | |||
7130 | * insni[off, off + cnt). Adjust corresponding insn_aux_data by copying | 7326 | * insni[off, off + cnt). Adjust corresponding insn_aux_data by copying |
7131 | * [0, off) and [off, end) to new locations, so the patched range stays zero | 7327 | * [0, off) and [off, end) to new locations, so the patched range stays zero |
7132 | */ | 7328 | */ |
7133 | static int adjust_insn_aux_data(struct bpf_verifier_env *env, u32 prog_len, | 7329 | static int adjust_insn_aux_data(struct bpf_verifier_env *env, |
7134 | u32 off, u32 cnt) | 7330 | struct bpf_prog *new_prog, u32 off, u32 cnt) |
7135 | { | 7331 | { |
7136 | struct bpf_insn_aux_data *new_data, *old_data = env->insn_aux_data; | 7332 | struct bpf_insn_aux_data *new_data, *old_data = env->insn_aux_data; |
7333 | struct bpf_insn *insn = new_prog->insnsi; | ||
7334 | u32 prog_len; | ||
7137 | int i; | 7335 | int i; |
7138 | 7336 | ||
7337 | /* aux info at OFF always needs adjustment, no matter fast path | ||
7338 | * (cnt == 1) is taken or not. There is no guarantee INSN at OFF is the | ||
7339 | * original insn at old prog. | ||
7340 | */ | ||
7341 | old_data[off].zext_dst = insn_has_def32(env, insn + off + cnt - 1); | ||
7342 | |||
7139 | if (cnt == 1) | 7343 | if (cnt == 1) |
7140 | return 0; | 7344 | return 0; |
7345 | prog_len = new_prog->len; | ||
7141 | new_data = vzalloc(array_size(prog_len, | 7346 | new_data = vzalloc(array_size(prog_len, |
7142 | sizeof(struct bpf_insn_aux_data))); | 7347 | sizeof(struct bpf_insn_aux_data))); |
7143 | if (!new_data) | 7348 | if (!new_data) |
@@ -7145,8 +7350,10 @@ static int adjust_insn_aux_data(struct bpf_verifier_env *env, u32 prog_len, | |||
7145 | memcpy(new_data, old_data, sizeof(struct bpf_insn_aux_data) * off); | 7350 | memcpy(new_data, old_data, sizeof(struct bpf_insn_aux_data) * off); |
7146 | memcpy(new_data + off + cnt - 1, old_data + off, | 7351 | memcpy(new_data + off + cnt - 1, old_data + off, |
7147 | sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1)); | 7352 | sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1)); |
7148 | for (i = off; i < off + cnt - 1; i++) | 7353 | for (i = off; i < off + cnt - 1; i++) { |
7149 | new_data[i].seen = true; | 7354 | new_data[i].seen = true; |
7355 | new_data[i].zext_dst = insn_has_def32(env, insn + i); | ||
7356 | } | ||
7150 | env->insn_aux_data = new_data; | 7357 | env->insn_aux_data = new_data; |
7151 | vfree(old_data); | 7358 | vfree(old_data); |
7152 | return 0; | 7359 | return 0; |
@@ -7179,7 +7386,7 @@ static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 of | |||
7179 | env->insn_aux_data[off].orig_idx); | 7386 | env->insn_aux_data[off].orig_idx); |
7180 | return NULL; | 7387 | return NULL; |
7181 | } | 7388 | } |
7182 | if (adjust_insn_aux_data(env, new_prog->len, off, len)) | 7389 | if (adjust_insn_aux_data(env, new_prog, off, len)) |
7183 | return NULL; | 7390 | return NULL; |
7184 | adjust_subprog_starts(env, off, len); | 7391 | adjust_subprog_starts(env, off, len); |
7185 | return new_prog; | 7392 | return new_prog; |
@@ -7443,6 +7650,84 @@ static int opt_remove_nops(struct bpf_verifier_env *env) | |||
7443 | return 0; | 7650 | return 0; |
7444 | } | 7651 | } |
7445 | 7652 | ||
7653 | static int opt_subreg_zext_lo32_rnd_hi32(struct bpf_verifier_env *env, | ||
7654 | const union bpf_attr *attr) | ||
7655 | { | ||
7656 | struct bpf_insn *patch, zext_patch[2], rnd_hi32_patch[4]; | ||
7657 | struct bpf_insn_aux_data *aux = env->insn_aux_data; | ||
7658 | int i, patch_len, delta = 0, len = env->prog->len; | ||
7659 | struct bpf_insn *insns = env->prog->insnsi; | ||
7660 | struct bpf_prog *new_prog; | ||
7661 | bool rnd_hi32; | ||
7662 | |||
7663 | rnd_hi32 = attr->prog_flags & BPF_F_TEST_RND_HI32; | ||
7664 | zext_patch[1] = BPF_ZEXT_REG(0); | ||
7665 | rnd_hi32_patch[1] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, 0); | ||
7666 | rnd_hi32_patch[2] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_AX, 32); | ||
7667 | rnd_hi32_patch[3] = BPF_ALU64_REG(BPF_OR, 0, BPF_REG_AX); | ||
7668 | for (i = 0; i < len; i++) { | ||
7669 | int adj_idx = i + delta; | ||
7670 | struct bpf_insn insn; | ||
7671 | |||
7672 | insn = insns[adj_idx]; | ||
7673 | if (!aux[adj_idx].zext_dst) { | ||
7674 | u8 code, class; | ||
7675 | u32 imm_rnd; | ||
7676 | |||
7677 | if (!rnd_hi32) | ||
7678 | continue; | ||
7679 | |||
7680 | code = insn.code; | ||
7681 | class = BPF_CLASS(code); | ||
7682 | if (insn_no_def(&insn)) | ||
7683 | continue; | ||
7684 | |||
7685 | /* NOTE: arg "reg" (the fourth one) is only used for | ||
7686 | * BPF_STX which has been ruled out in above | ||
7687 | * check, it is safe to pass NULL here. | ||
7688 | */ | ||
7689 | if (is_reg64(env, &insn, insn.dst_reg, NULL, DST_OP)) { | ||
7690 | if (class == BPF_LD && | ||
7691 | BPF_MODE(code) == BPF_IMM) | ||
7692 | i++; | ||
7693 | continue; | ||
7694 | } | ||
7695 | |||
7696 | /* ctx load could be transformed into wider load. */ | ||
7697 | if (class == BPF_LDX && | ||
7698 | aux[adj_idx].ptr_type == PTR_TO_CTX) | ||
7699 | continue; | ||
7700 | |||
7701 | imm_rnd = get_random_int(); | ||
7702 | rnd_hi32_patch[0] = insn; | ||
7703 | rnd_hi32_patch[1].imm = imm_rnd; | ||
7704 | rnd_hi32_patch[3].dst_reg = insn.dst_reg; | ||
7705 | patch = rnd_hi32_patch; | ||
7706 | patch_len = 4; | ||
7707 | goto apply_patch_buffer; | ||
7708 | } | ||
7709 | |||
7710 | if (!bpf_jit_needs_zext()) | ||
7711 | continue; | ||
7712 | |||
7713 | zext_patch[0] = insn; | ||
7714 | zext_patch[1].dst_reg = insn.dst_reg; | ||
7715 | zext_patch[1].src_reg = insn.dst_reg; | ||
7716 | patch = zext_patch; | ||
7717 | patch_len = 2; | ||
7718 | apply_patch_buffer: | ||
7719 | new_prog = bpf_patch_insn_data(env, adj_idx, patch, patch_len); | ||
7720 | if (!new_prog) | ||
7721 | return -ENOMEM; | ||
7722 | env->prog = new_prog; | ||
7723 | insns = new_prog->insnsi; | ||
7724 | aux = env->insn_aux_data; | ||
7725 | delta += patch_len - 1; | ||
7726 | } | ||
7727 | |||
7728 | return 0; | ||
7729 | } | ||
7730 | |||
7446 | /* convert load instructions that access fields of a context type into a | 7731 | /* convert load instructions that access fields of a context type into a |
7447 | * sequence of instructions that access fields of the underlying structure: | 7732 | * sequence of instructions that access fields of the underlying structure: |
7448 | * struct __sk_buff -> struct sk_buff | 7733 | * struct __sk_buff -> struct sk_buff |
@@ -8130,16 +8415,15 @@ static void free_states(struct bpf_verifier_env *env) | |||
8130 | if (!env->explored_states) | 8415 | if (!env->explored_states) |
8131 | return; | 8416 | return; |
8132 | 8417 | ||
8133 | for (i = 0; i < env->prog->len; i++) { | 8418 | for (i = 0; i < state_htab_size(env); i++) { |
8134 | sl = env->explored_states[i]; | 8419 | sl = env->explored_states[i]; |
8135 | 8420 | ||
8136 | if (sl) | 8421 | while (sl) { |
8137 | while (sl != STATE_LIST_MARK) { | 8422 | sln = sl->next; |
8138 | sln = sl->next; | 8423 | free_verifier_state(&sl->state, false); |
8139 | free_verifier_state(&sl->state, false); | 8424 | kfree(sl); |
8140 | kfree(sl); | 8425 | sl = sln; |
8141 | sl = sln; | 8426 | } |
8142 | } | ||
8143 | } | 8427 | } |
8144 | 8428 | ||
8145 | kvfree(env->explored_states); | 8429 | kvfree(env->explored_states); |
@@ -8239,7 +8523,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, | |||
8239 | goto skip_full_check; | 8523 | goto skip_full_check; |
8240 | } | 8524 | } |
8241 | 8525 | ||
8242 | env->explored_states = kvcalloc(env->prog->len, | 8526 | env->explored_states = kvcalloc(state_htab_size(env), |
8243 | sizeof(struct bpf_verifier_state_list *), | 8527 | sizeof(struct bpf_verifier_state_list *), |
8244 | GFP_USER); | 8528 | GFP_USER); |
8245 | ret = -ENOMEM; | 8529 | ret = -ENOMEM; |
@@ -8294,6 +8578,15 @@ skip_full_check: | |||
8294 | if (ret == 0) | 8578 | if (ret == 0) |
8295 | ret = fixup_bpf_calls(env); | 8579 | ret = fixup_bpf_calls(env); |
8296 | 8580 | ||
8581 | /* do 32-bit optimization after insn patching has done so those patched | ||
8582 | * insns could be handled correctly. | ||
8583 | */ | ||
8584 | if (ret == 0 && !bpf_prog_is_dev_bound(env->prog->aux)) { | ||
8585 | ret = opt_subreg_zext_lo32_rnd_hi32(env, attr); | ||
8586 | env->prog->aux->verifier_zext = bpf_jit_needs_zext() ? !ret | ||
8587 | : false; | ||
8588 | } | ||
8589 | |||
8297 | if (ret == 0) | 8590 | if (ret == 0) |
8298 | ret = fixup_call_args(env); | 8591 | ret = fixup_call_args(env); |
8299 | 8592 | ||
diff --git a/kernel/bpf/xskmap.c b/kernel/bpf/xskmap.c index 686d244e798d..22066c28ba61 100644 --- a/kernel/bpf/xskmap.c +++ b/kernel/bpf/xskmap.c | |||
@@ -37,13 +37,9 @@ static struct bpf_map *xsk_map_alloc(union bpf_attr *attr) | |||
37 | 37 | ||
38 | cost = (u64)m->map.max_entries * sizeof(struct xdp_sock *); | 38 | cost = (u64)m->map.max_entries * sizeof(struct xdp_sock *); |
39 | cost += sizeof(struct list_head) * num_possible_cpus(); | 39 | cost += sizeof(struct list_head) * num_possible_cpus(); |
40 | if (cost >= U32_MAX - PAGE_SIZE) | ||
41 | goto free_m; | ||
42 | |||
43 | m->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT; | ||
44 | 40 | ||
45 | /* Notice returns -EPERM on if map size is larger than memlock limit */ | 41 | /* Notice returns -EPERM on if map size is larger than memlock limit */ |
46 | err = bpf_map_precharge_memlock(m->map.pages); | 42 | err = bpf_map_charge_init(&m->map.memory, cost); |
47 | if (err) | 43 | if (err) |
48 | goto free_m; | 44 | goto free_m; |
49 | 45 | ||
@@ -51,7 +47,7 @@ static struct bpf_map *xsk_map_alloc(union bpf_attr *attr) | |||
51 | 47 | ||
52 | m->flush_list = alloc_percpu(struct list_head); | 48 | m->flush_list = alloc_percpu(struct list_head); |
53 | if (!m->flush_list) | 49 | if (!m->flush_list) |
54 | goto free_m; | 50 | goto free_charge; |
55 | 51 | ||
56 | for_each_possible_cpu(cpu) | 52 | for_each_possible_cpu(cpu) |
57 | INIT_LIST_HEAD(per_cpu_ptr(m->flush_list, cpu)); | 53 | INIT_LIST_HEAD(per_cpu_ptr(m->flush_list, cpu)); |
@@ -65,6 +61,8 @@ static struct bpf_map *xsk_map_alloc(union bpf_attr *attr) | |||
65 | 61 | ||
66 | free_percpu: | 62 | free_percpu: |
67 | free_percpu(m->flush_list); | 63 | free_percpu(m->flush_list); |
64 | free_charge: | ||
65 | bpf_map_charge_finish(&m->map.memory); | ||
68 | free_m: | 66 | free_m: |
69 | kfree(m); | 67 | kfree(m); |
70 | return ERR_PTR(err); | 68 | return ERR_PTR(err); |
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 217cec4e22c6..ef9cfbfc82a9 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c | |||
@@ -4955,8 +4955,6 @@ static void css_release_work_fn(struct work_struct *work) | |||
4955 | if (cgrp->kn) | 4955 | if (cgrp->kn) |
4956 | RCU_INIT_POINTER(*(void __rcu __force **)&cgrp->kn->priv, | 4956 | RCU_INIT_POINTER(*(void __rcu __force **)&cgrp->kn->priv, |
4957 | NULL); | 4957 | NULL); |
4958 | |||
4959 | cgroup_bpf_put(cgrp); | ||
4960 | } | 4958 | } |
4961 | 4959 | ||
4962 | mutex_unlock(&cgroup_mutex); | 4960 | mutex_unlock(&cgroup_mutex); |
@@ -5482,6 +5480,8 @@ static int cgroup_destroy_locked(struct cgroup *cgrp) | |||
5482 | 5480 | ||
5483 | cgroup1_check_for_release(parent); | 5481 | cgroup1_check_for_release(parent); |
5484 | 5482 | ||
5483 | cgroup_bpf_offline(cgrp); | ||
5484 | |||
5485 | /* put the base reference */ | 5485 | /* put the base reference */ |
5486 | percpu_ref_kill(&cgrp->self.refcnt); | 5486 | percpu_ref_kill(&cgrp->self.refcnt); |
5487 | 5487 | ||
@@ -6221,6 +6221,7 @@ void cgroup_sk_alloc(struct sock_cgroup_data *skcd) | |||
6221 | * Don't use cgroup_get_live(). | 6221 | * Don't use cgroup_get_live(). |
6222 | */ | 6222 | */ |
6223 | cgroup_get(sock_cgroup_ptr(skcd)); | 6223 | cgroup_get(sock_cgroup_ptr(skcd)); |
6224 | cgroup_bpf_get(sock_cgroup_ptr(skcd)); | ||
6224 | return; | 6225 | return; |
6225 | } | 6226 | } |
6226 | 6227 | ||
@@ -6232,6 +6233,7 @@ void cgroup_sk_alloc(struct sock_cgroup_data *skcd) | |||
6232 | cset = task_css_set(current); | 6233 | cset = task_css_set(current); |
6233 | if (likely(cgroup_tryget(cset->dfl_cgrp))) { | 6234 | if (likely(cgroup_tryget(cset->dfl_cgrp))) { |
6234 | skcd->val = (unsigned long)cset->dfl_cgrp; | 6235 | skcd->val = (unsigned long)cset->dfl_cgrp; |
6236 | cgroup_bpf_get(cset->dfl_cgrp); | ||
6235 | break; | 6237 | break; |
6236 | } | 6238 | } |
6237 | cpu_relax(); | 6239 | cpu_relax(); |
@@ -6242,7 +6244,10 @@ void cgroup_sk_alloc(struct sock_cgroup_data *skcd) | |||
6242 | 6244 | ||
6243 | void cgroup_sk_free(struct sock_cgroup_data *skcd) | 6245 | void cgroup_sk_free(struct sock_cgroup_data *skcd) |
6244 | { | 6246 | { |
6245 | cgroup_put(sock_cgroup_ptr(skcd)); | 6247 | struct cgroup *cgrp = sock_cgroup_ptr(skcd); |
6248 | |||
6249 | cgroup_bpf_put(cgrp); | ||
6250 | cgroup_put(cgrp); | ||
6246 | } | 6251 | } |
6247 | 6252 | ||
6248 | #endif /* CONFIG_SOCK_CGROUP_DATA */ | 6253 | #endif /* CONFIG_SOCK_CGROUP_DATA */ |
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index f92d6ad5e080..3994a231eb92 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c | |||
@@ -19,6 +19,9 @@ | |||
19 | #include "trace_probe.h" | 19 | #include "trace_probe.h" |
20 | #include "trace.h" | 20 | #include "trace.h" |
21 | 21 | ||
22 | #define bpf_event_rcu_dereference(p) \ | ||
23 | rcu_dereference_protected(p, lockdep_is_held(&bpf_event_mutex)) | ||
24 | |||
22 | #ifdef CONFIG_MODULES | 25 | #ifdef CONFIG_MODULES |
23 | struct bpf_trace_module { | 26 | struct bpf_trace_module { |
24 | struct module *module; | 27 | struct module *module; |
@@ -567,6 +570,69 @@ static const struct bpf_func_proto bpf_probe_read_str_proto = { | |||
567 | .arg3_type = ARG_ANYTHING, | 570 | .arg3_type = ARG_ANYTHING, |
568 | }; | 571 | }; |
569 | 572 | ||
573 | struct send_signal_irq_work { | ||
574 | struct irq_work irq_work; | ||
575 | struct task_struct *task; | ||
576 | u32 sig; | ||
577 | }; | ||
578 | |||
579 | static DEFINE_PER_CPU(struct send_signal_irq_work, send_signal_work); | ||
580 | |||
581 | static void do_bpf_send_signal(struct irq_work *entry) | ||
582 | { | ||
583 | struct send_signal_irq_work *work; | ||
584 | |||
585 | work = container_of(entry, struct send_signal_irq_work, irq_work); | ||
586 | group_send_sig_info(work->sig, SEND_SIG_PRIV, work->task, PIDTYPE_TGID); | ||
587 | } | ||
588 | |||
589 | BPF_CALL_1(bpf_send_signal, u32, sig) | ||
590 | { | ||
591 | struct send_signal_irq_work *work = NULL; | ||
592 | |||
593 | /* Similar to bpf_probe_write_user, task needs to be | ||
594 | * in a sound condition and kernel memory access be | ||
595 | * permitted in order to send signal to the current | ||
596 | * task. | ||
597 | */ | ||
598 | if (unlikely(current->flags & (PF_KTHREAD | PF_EXITING))) | ||
599 | return -EPERM; | ||
600 | if (unlikely(uaccess_kernel())) | ||
601 | return -EPERM; | ||
602 | if (unlikely(!nmi_uaccess_okay())) | ||
603 | return -EPERM; | ||
604 | |||
605 | if (in_nmi()) { | ||
606 | /* Do an early check on signal validity. Otherwise, | ||
607 | * the error is lost in deferred irq_work. | ||
608 | */ | ||
609 | if (unlikely(!valid_signal(sig))) | ||
610 | return -EINVAL; | ||
611 | |||
612 | work = this_cpu_ptr(&send_signal_work); | ||
613 | if (work->irq_work.flags & IRQ_WORK_BUSY) | ||
614 | return -EBUSY; | ||
615 | |||
616 | /* Add the current task, which is the target of sending signal, | ||
617 | * to the irq_work. The current task may change when queued | ||
618 | * irq works get executed. | ||
619 | */ | ||
620 | work->task = current; | ||
621 | work->sig = sig; | ||
622 | irq_work_queue(&work->irq_work); | ||
623 | return 0; | ||
624 | } | ||
625 | |||
626 | return group_send_sig_info(sig, SEND_SIG_PRIV, current, PIDTYPE_TGID); | ||
627 | } | ||
628 | |||
629 | static const struct bpf_func_proto bpf_send_signal_proto = { | ||
630 | .func = bpf_send_signal, | ||
631 | .gpl_only = false, | ||
632 | .ret_type = RET_INTEGER, | ||
633 | .arg1_type = ARG_ANYTHING, | ||
634 | }; | ||
635 | |||
570 | static const struct bpf_func_proto * | 636 | static const struct bpf_func_proto * |
571 | tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) | 637 | tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) |
572 | { | 638 | { |
@@ -617,6 +683,8 @@ tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) | |||
617 | case BPF_FUNC_get_current_cgroup_id: | 683 | case BPF_FUNC_get_current_cgroup_id: |
618 | return &bpf_get_current_cgroup_id_proto; | 684 | return &bpf_get_current_cgroup_id_proto; |
619 | #endif | 685 | #endif |
686 | case BPF_FUNC_send_signal: | ||
687 | return &bpf_send_signal_proto; | ||
620 | default: | 688 | default: |
621 | return NULL; | 689 | return NULL; |
622 | } | 690 | } |
@@ -1034,7 +1102,7 @@ static DEFINE_MUTEX(bpf_event_mutex); | |||
1034 | int perf_event_attach_bpf_prog(struct perf_event *event, | 1102 | int perf_event_attach_bpf_prog(struct perf_event *event, |
1035 | struct bpf_prog *prog) | 1103 | struct bpf_prog *prog) |
1036 | { | 1104 | { |
1037 | struct bpf_prog_array __rcu *old_array; | 1105 | struct bpf_prog_array *old_array; |
1038 | struct bpf_prog_array *new_array; | 1106 | struct bpf_prog_array *new_array; |
1039 | int ret = -EEXIST; | 1107 | int ret = -EEXIST; |
1040 | 1108 | ||
@@ -1052,7 +1120,7 @@ int perf_event_attach_bpf_prog(struct perf_event *event, | |||
1052 | if (event->prog) | 1120 | if (event->prog) |
1053 | goto unlock; | 1121 | goto unlock; |
1054 | 1122 | ||
1055 | old_array = event->tp_event->prog_array; | 1123 | old_array = bpf_event_rcu_dereference(event->tp_event->prog_array); |
1056 | if (old_array && | 1124 | if (old_array && |
1057 | bpf_prog_array_length(old_array) >= BPF_TRACE_MAX_PROGS) { | 1125 | bpf_prog_array_length(old_array) >= BPF_TRACE_MAX_PROGS) { |
1058 | ret = -E2BIG; | 1126 | ret = -E2BIG; |
@@ -1075,7 +1143,7 @@ unlock: | |||
1075 | 1143 | ||
1076 | void perf_event_detach_bpf_prog(struct perf_event *event) | 1144 | void perf_event_detach_bpf_prog(struct perf_event *event) |
1077 | { | 1145 | { |
1078 | struct bpf_prog_array __rcu *old_array; | 1146 | struct bpf_prog_array *old_array; |
1079 | struct bpf_prog_array *new_array; | 1147 | struct bpf_prog_array *new_array; |
1080 | int ret; | 1148 | int ret; |
1081 | 1149 | ||
@@ -1084,7 +1152,7 @@ void perf_event_detach_bpf_prog(struct perf_event *event) | |||
1084 | if (!event->prog) | 1152 | if (!event->prog) |
1085 | goto unlock; | 1153 | goto unlock; |
1086 | 1154 | ||
1087 | old_array = event->tp_event->prog_array; | 1155 | old_array = bpf_event_rcu_dereference(event->tp_event->prog_array); |
1088 | ret = bpf_prog_array_copy(old_array, event->prog, NULL, &new_array); | 1156 | ret = bpf_prog_array_copy(old_array, event->prog, NULL, &new_array); |
1089 | if (ret == -ENOENT) | 1157 | if (ret == -ENOENT) |
1090 | goto unlock; | 1158 | goto unlock; |
@@ -1106,6 +1174,7 @@ int perf_event_query_prog_array(struct perf_event *event, void __user *info) | |||
1106 | { | 1174 | { |
1107 | struct perf_event_query_bpf __user *uquery = info; | 1175 | struct perf_event_query_bpf __user *uquery = info; |
1108 | struct perf_event_query_bpf query = {}; | 1176 | struct perf_event_query_bpf query = {}; |
1177 | struct bpf_prog_array *progs; | ||
1109 | u32 *ids, prog_cnt, ids_len; | 1178 | u32 *ids, prog_cnt, ids_len; |
1110 | int ret; | 1179 | int ret; |
1111 | 1180 | ||
@@ -1130,10 +1199,8 @@ int perf_event_query_prog_array(struct perf_event *event, void __user *info) | |||
1130 | */ | 1199 | */ |
1131 | 1200 | ||
1132 | mutex_lock(&bpf_event_mutex); | 1201 | mutex_lock(&bpf_event_mutex); |
1133 | ret = bpf_prog_array_copy_info(event->tp_event->prog_array, | 1202 | progs = bpf_event_rcu_dereference(event->tp_event->prog_array); |
1134 | ids, | 1203 | ret = bpf_prog_array_copy_info(progs, ids, ids_len, &prog_cnt); |
1135 | ids_len, | ||
1136 | &prog_cnt); | ||
1137 | mutex_unlock(&bpf_event_mutex); | 1204 | mutex_unlock(&bpf_event_mutex); |
1138 | 1205 | ||
1139 | if (copy_to_user(&uquery->prog_cnt, &prog_cnt, sizeof(prog_cnt)) || | 1206 | if (copy_to_user(&uquery->prog_cnt, &prog_cnt, sizeof(prog_cnt)) || |
@@ -1343,5 +1410,18 @@ static int __init bpf_event_init(void) | |||
1343 | return 0; | 1410 | return 0; |
1344 | } | 1411 | } |
1345 | 1412 | ||
1413 | static int __init send_signal_irq_work_init(void) | ||
1414 | { | ||
1415 | int cpu; | ||
1416 | struct send_signal_irq_work *work; | ||
1417 | |||
1418 | for_each_possible_cpu(cpu) { | ||
1419 | work = per_cpu_ptr(&send_signal_work, cpu); | ||
1420 | init_irq_work(&work->irq_work, do_bpf_send_signal); | ||
1421 | } | ||
1422 | return 0; | ||
1423 | } | ||
1424 | |||
1346 | fs_initcall(bpf_event_init); | 1425 | fs_initcall(bpf_event_init); |
1426 | subsys_initcall(send_signal_irq_work_init); | ||
1347 | #endif /* CONFIG_MODULES */ | 1427 | #endif /* CONFIG_MODULES */ |