aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/bpf/arraymap.c
diff options
context:
space:
mode:
authorAlexei Starovoitov <ast@kernel.org>2018-01-07 20:33:02 -0500
committerDaniel Borkmann <daniel@iogearbox.net>2018-01-08 18:53:49 -0500
commitb2157399cc9898260d6031c5bfe45fe137c1fbe7 (patch)
tree42bfbdc815713fa37f3874c4f46e0f892b08e1f8 /kernel/bpf/arraymap.c
parent2b36047e7889b7efee22c11e17f035f721855731 (diff)
bpf: prevent out-of-bounds speculation
Under speculation, CPUs may mis-predict branches in bounds checks. Thus, memory accesses under a bounds check may be speculated even if the bounds check fails, providing a primitive for building a side channel. To avoid leaking kernel data round up array-based maps and mask the index after bounds check, so speculated load with out of bounds index will load either valid value from the array or zero from the padded area. Unconditionally mask index for all array types even when max_entries are not rounded to power of 2 for root user. When map is created by unpriv user generate a sequence of bpf insns that includes AND operation to make sure that JITed code includes the same 'index & index_mask' operation. If prog_array map is created by unpriv user replace bpf_tail_call(ctx, map, index); with if (index >= max_entries) { index &= map->index_mask; bpf_tail_call(ctx, map, index); } (along with roundup to power 2) to prevent out-of-bounds speculation. There is secondary redundant 'if (index >= max_entries)' in the interpreter and in all JITs, but they can be optimized later if necessary. Other array-like maps (cpumap, devmap, sockmap, perf_event_array, cgroup_array) cannot be used by unpriv, so no changes there. That fixes bpf side of "Variant 1: bounds check bypass (CVE-2017-5753)" on all architectures with and without JIT. v2->v3: Daniel noticed that attack potentially can be crafted via syscall commands without loading the program, so add masking to those paths as well. Signed-off-by: Alexei Starovoitov <ast@kernel.org> Acked-by: John Fastabend <john.fastabend@gmail.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Diffstat (limited to 'kernel/bpf/arraymap.c')
-rw-r--r--kernel/bpf/arraymap.c47
1 files changed, 36 insertions, 11 deletions
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 7c25426d3cf5..aaa319848e7d 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -53,9 +53,10 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
53{ 53{
54 bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY; 54 bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
55 int numa_node = bpf_map_attr_numa_node(attr); 55 int numa_node = bpf_map_attr_numa_node(attr);
56 u32 elem_size, index_mask, max_entries;
57 bool unpriv = !capable(CAP_SYS_ADMIN);
56 struct bpf_array *array; 58 struct bpf_array *array;
57 u64 array_size; 59 u64 array_size;
58 u32 elem_size;
59 60
60 /* check sanity of attributes */ 61 /* check sanity of attributes */
61 if (attr->max_entries == 0 || attr->key_size != 4 || 62 if (attr->max_entries == 0 || attr->key_size != 4 ||
@@ -72,11 +73,20 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
72 73
73 elem_size = round_up(attr->value_size, 8); 74 elem_size = round_up(attr->value_size, 8);
74 75
76 max_entries = attr->max_entries;
77 index_mask = roundup_pow_of_two(max_entries) - 1;
78
79 if (unpriv)
80 /* round up array size to nearest power of 2,
81 * since cpu will speculate within index_mask limits
82 */
83 max_entries = index_mask + 1;
84
75 array_size = sizeof(*array); 85 array_size = sizeof(*array);
76 if (percpu) 86 if (percpu)
77 array_size += (u64) attr->max_entries * sizeof(void *); 87 array_size += (u64) max_entries * sizeof(void *);
78 else 88 else
79 array_size += (u64) attr->max_entries * elem_size; 89 array_size += (u64) max_entries * elem_size;
80 90
81 /* make sure there is no u32 overflow later in round_up() */ 91 /* make sure there is no u32 overflow later in round_up() */
82 if (array_size >= U32_MAX - PAGE_SIZE) 92 if (array_size >= U32_MAX - PAGE_SIZE)
@@ -86,6 +96,8 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
86 array = bpf_map_area_alloc(array_size, numa_node); 96 array = bpf_map_area_alloc(array_size, numa_node);
87 if (!array) 97 if (!array)
88 return ERR_PTR(-ENOMEM); 98 return ERR_PTR(-ENOMEM);
99 array->index_mask = index_mask;
100 array->map.unpriv_array = unpriv;
89 101
90 /* copy mandatory map attributes */ 102 /* copy mandatory map attributes */
91 array->map.map_type = attr->map_type; 103 array->map.map_type = attr->map_type;
@@ -121,12 +133,13 @@ static void *array_map_lookup_elem(struct bpf_map *map, void *key)
121 if (unlikely(index >= array->map.max_entries)) 133 if (unlikely(index >= array->map.max_entries))
122 return NULL; 134 return NULL;
123 135
124 return array->value + array->elem_size * index; 136 return array->value + array->elem_size * (index & array->index_mask);
125} 137}
126 138
127/* emit BPF instructions equivalent to C code of array_map_lookup_elem() */ 139/* emit BPF instructions equivalent to C code of array_map_lookup_elem() */
128static u32 array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf) 140static u32 array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
129{ 141{
142 struct bpf_array *array = container_of(map, struct bpf_array, map);
130 struct bpf_insn *insn = insn_buf; 143 struct bpf_insn *insn = insn_buf;
131 u32 elem_size = round_up(map->value_size, 8); 144 u32 elem_size = round_up(map->value_size, 8);
132 const int ret = BPF_REG_0; 145 const int ret = BPF_REG_0;
@@ -135,7 +148,12 @@ static u32 array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
135 148
136 *insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value)); 149 *insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value));
137 *insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0); 150 *insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
138 *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 3); 151 if (map->unpriv_array) {
152 *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 4);
153 *insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask);
154 } else {
155 *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 3);
156 }
139 157
140 if (is_power_of_2(elem_size)) { 158 if (is_power_of_2(elem_size)) {
141 *insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size)); 159 *insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size));
@@ -157,7 +175,7 @@ static void *percpu_array_map_lookup_elem(struct bpf_map *map, void *key)
157 if (unlikely(index >= array->map.max_entries)) 175 if (unlikely(index >= array->map.max_entries))
158 return NULL; 176 return NULL;
159 177
160 return this_cpu_ptr(array->pptrs[index]); 178 return this_cpu_ptr(array->pptrs[index & array->index_mask]);
161} 179}
162 180
163int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value) 181int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value)
@@ -177,7 +195,7 @@ int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value)
177 */ 195 */
178 size = round_up(map->value_size, 8); 196 size = round_up(map->value_size, 8);
179 rcu_read_lock(); 197 rcu_read_lock();
180 pptr = array->pptrs[index]; 198 pptr = array->pptrs[index & array->index_mask];
181 for_each_possible_cpu(cpu) { 199 for_each_possible_cpu(cpu) {
182 bpf_long_memcpy(value + off, per_cpu_ptr(pptr, cpu), size); 200 bpf_long_memcpy(value + off, per_cpu_ptr(pptr, cpu), size);
183 off += size; 201 off += size;
@@ -225,10 +243,11 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value,
225 return -EEXIST; 243 return -EEXIST;
226 244
227 if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) 245 if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
228 memcpy(this_cpu_ptr(array->pptrs[index]), 246 memcpy(this_cpu_ptr(array->pptrs[index & array->index_mask]),
229 value, map->value_size); 247 value, map->value_size);
230 else 248 else
231 memcpy(array->value + array->elem_size * index, 249 memcpy(array->value +
250 array->elem_size * (index & array->index_mask),
232 value, map->value_size); 251 value, map->value_size);
233 return 0; 252 return 0;
234} 253}
@@ -262,7 +281,7 @@ int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value,
262 */ 281 */
263 size = round_up(map->value_size, 8); 282 size = round_up(map->value_size, 8);
264 rcu_read_lock(); 283 rcu_read_lock();
265 pptr = array->pptrs[index]; 284 pptr = array->pptrs[index & array->index_mask];
266 for_each_possible_cpu(cpu) { 285 for_each_possible_cpu(cpu) {
267 bpf_long_memcpy(per_cpu_ptr(pptr, cpu), value + off, size); 286 bpf_long_memcpy(per_cpu_ptr(pptr, cpu), value + off, size);
268 off += size; 287 off += size;
@@ -613,6 +632,7 @@ static void *array_of_map_lookup_elem(struct bpf_map *map, void *key)
613static u32 array_of_map_gen_lookup(struct bpf_map *map, 632static u32 array_of_map_gen_lookup(struct bpf_map *map,
614 struct bpf_insn *insn_buf) 633 struct bpf_insn *insn_buf)
615{ 634{
635 struct bpf_array *array = container_of(map, struct bpf_array, map);
616 u32 elem_size = round_up(map->value_size, 8); 636 u32 elem_size = round_up(map->value_size, 8);
617 struct bpf_insn *insn = insn_buf; 637 struct bpf_insn *insn = insn_buf;
618 const int ret = BPF_REG_0; 638 const int ret = BPF_REG_0;
@@ -621,7 +641,12 @@ static u32 array_of_map_gen_lookup(struct bpf_map *map,
621 641
622 *insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value)); 642 *insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value));
623 *insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0); 643 *insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
624 *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 5); 644 if (map->unpriv_array) {
645 *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 6);
646 *insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask);
647 } else {
648 *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 5);
649 }
625 if (is_power_of_2(elem_size)) 650 if (is_power_of_2(elem_size))
626 *insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size)); 651 *insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size));
627 else 652 else