diff options
| author | Thomas Gleixner <tglx@linutronix.de> | 2015-01-12 04:51:13 -0500 |
|---|---|---|
| committer | Thomas Gleixner <tglx@linutronix.de> | 2015-01-12 04:51:13 -0500 |
| commit | 2f5eaf66e580f64032b365a00157b6b58c266b37 (patch) | |
| tree | 7852017c864f0eb3833782e2a017952bd8531458 /kernel/bpf | |
| parent | c291ee622165cb2c8d4e7af63fffd499354a23be (diff) | |
| parent | 91d1179212161f220938198b742c328ad38fd0a3 (diff) | |
Merge tag 'irqchip-urgent-3.19' of git://git.infradead.org/users/jcooper/linux into irq/urgent
irqchip urgent fixes for v3.19 from Jason Cooper
- mtk-sysirq: Fix error handling
- hip04: Fix cpu map for 16bit value
- gic-v3-its: Clear a warning regarding decimal constants
- omap-intc: Fix legacy DMA regression
- atmel-aic-common: Retain priority when changing type
Diffstat (limited to 'kernel/bpf')
| -rw-r--r-- | kernel/bpf/Makefile | 2 | ||||
| -rw-r--r-- | kernel/bpf/arraymap.c | 156 | ||||
| -rw-r--r-- | kernel/bpf/hashtab.c | 367 | ||||
| -rw-r--r-- | kernel/bpf/helpers.c | 89 | ||||
| -rw-r--r-- | kernel/bpf/syscall.c | 6 | ||||
| -rw-r--r-- | kernel/bpf/test_stub.c | 56 | ||||
| -rw-r--r-- | kernel/bpf/verifier.c | 171 |
7 files changed, 750 insertions, 97 deletions
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile index 0daf7f6ae7df..a5ae60f0b0a2 100644 --- a/kernel/bpf/Makefile +++ b/kernel/bpf/Makefile | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | obj-y := core.o | 1 | obj-y := core.o |
| 2 | obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o | 2 | obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o hashtab.o arraymap.o helpers.o |
| 3 | ifdef CONFIG_TEST_BPF | 3 | ifdef CONFIG_TEST_BPF |
| 4 | obj-$(CONFIG_BPF_SYSCALL) += test_stub.o | 4 | obj-$(CONFIG_BPF_SYSCALL) += test_stub.o |
| 5 | endif | 5 | endif |
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c new file mode 100644 index 000000000000..9eb4d8a7cd87 --- /dev/null +++ b/kernel/bpf/arraymap.c | |||
| @@ -0,0 +1,156 @@ | |||
| 1 | /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com | ||
| 2 | * | ||
| 3 | * This program is free software; you can redistribute it and/or | ||
| 4 | * modify it under the terms of version 2 of the GNU General Public | ||
| 5 | * License as published by the Free Software Foundation. | ||
| 6 | * | ||
| 7 | * This program is distributed in the hope that it will be useful, but | ||
| 8 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 10 | * General Public License for more details. | ||
| 11 | */ | ||
| 12 | #include <linux/bpf.h> | ||
| 13 | #include <linux/err.h> | ||
| 14 | #include <linux/vmalloc.h> | ||
| 15 | #include <linux/slab.h> | ||
| 16 | #include <linux/mm.h> | ||
| 17 | |||
| 18 | struct bpf_array { | ||
| 19 | struct bpf_map map; | ||
| 20 | u32 elem_size; | ||
| 21 | char value[0] __aligned(8); | ||
| 22 | }; | ||
| 23 | |||
| 24 | /* Called from syscall */ | ||
| 25 | static struct bpf_map *array_map_alloc(union bpf_attr *attr) | ||
| 26 | { | ||
| 27 | struct bpf_array *array; | ||
| 28 | u32 elem_size, array_size; | ||
| 29 | |||
| 30 | /* check sanity of attributes */ | ||
| 31 | if (attr->max_entries == 0 || attr->key_size != 4 || | ||
| 32 | attr->value_size == 0) | ||
| 33 | return ERR_PTR(-EINVAL); | ||
| 34 | |||
| 35 | elem_size = round_up(attr->value_size, 8); | ||
| 36 | |||
| 37 | /* check round_up into zero and u32 overflow */ | ||
| 38 | if (elem_size == 0 || | ||
| 39 | attr->max_entries > (U32_MAX - sizeof(*array)) / elem_size) | ||
| 40 | return ERR_PTR(-ENOMEM); | ||
| 41 | |||
| 42 | array_size = sizeof(*array) + attr->max_entries * elem_size; | ||
| 43 | |||
| 44 | /* allocate all map elements and zero-initialize them */ | ||
| 45 | array = kzalloc(array_size, GFP_USER | __GFP_NOWARN); | ||
| 46 | if (!array) { | ||
| 47 | array = vzalloc(array_size); | ||
| 48 | if (!array) | ||
| 49 | return ERR_PTR(-ENOMEM); | ||
| 50 | } | ||
| 51 | |||
| 52 | /* copy mandatory map attributes */ | ||
| 53 | array->map.key_size = attr->key_size; | ||
| 54 | array->map.value_size = attr->value_size; | ||
| 55 | array->map.max_entries = attr->max_entries; | ||
| 56 | |||
| 57 | array->elem_size = elem_size; | ||
| 58 | |||
| 59 | return &array->map; | ||
| 60 | } | ||
| 61 | |||
| 62 | /* Called from syscall or from eBPF program */ | ||
| 63 | static void *array_map_lookup_elem(struct bpf_map *map, void *key) | ||
| 64 | { | ||
| 65 | struct bpf_array *array = container_of(map, struct bpf_array, map); | ||
| 66 | u32 index = *(u32 *)key; | ||
| 67 | |||
| 68 | if (index >= array->map.max_entries) | ||
| 69 | return NULL; | ||
| 70 | |||
| 71 | return array->value + array->elem_size * index; | ||
| 72 | } | ||
| 73 | |||
| 74 | /* Called from syscall */ | ||
| 75 | static int array_map_get_next_key(struct bpf_map *map, void *key, void *next_key) | ||
| 76 | { | ||
| 77 | struct bpf_array *array = container_of(map, struct bpf_array, map); | ||
| 78 | u32 index = *(u32 *)key; | ||
| 79 | u32 *next = (u32 *)next_key; | ||
| 80 | |||
| 81 | if (index >= array->map.max_entries) { | ||
| 82 | *next = 0; | ||
| 83 | return 0; | ||
| 84 | } | ||
| 85 | |||
| 86 | if (index == array->map.max_entries - 1) | ||
| 87 | return -ENOENT; | ||
| 88 | |||
| 89 | *next = index + 1; | ||
| 90 | return 0; | ||
| 91 | } | ||
| 92 | |||
| 93 | /* Called from syscall or from eBPF program */ | ||
| 94 | static int array_map_update_elem(struct bpf_map *map, void *key, void *value, | ||
| 95 | u64 map_flags) | ||
| 96 | { | ||
| 97 | struct bpf_array *array = container_of(map, struct bpf_array, map); | ||
| 98 | u32 index = *(u32 *)key; | ||
| 99 | |||
| 100 | if (map_flags > BPF_EXIST) | ||
| 101 | /* unknown flags */ | ||
| 102 | return -EINVAL; | ||
| 103 | |||
| 104 | if (index >= array->map.max_entries) | ||
| 105 | /* all elements were pre-allocated, cannot insert a new one */ | ||
| 106 | return -E2BIG; | ||
| 107 | |||
| 108 | if (map_flags == BPF_NOEXIST) | ||
| 109 | /* all elements already exist */ | ||
| 110 | return -EEXIST; | ||
| 111 | |||
| 112 | memcpy(array->value + array->elem_size * index, value, array->elem_size); | ||
| 113 | return 0; | ||
| 114 | } | ||
| 115 | |||
| 116 | /* Called from syscall or from eBPF program */ | ||
| 117 | static int array_map_delete_elem(struct bpf_map *map, void *key) | ||
| 118 | { | ||
| 119 | return -EINVAL; | ||
| 120 | } | ||
| 121 | |||
| 122 | /* Called when map->refcnt goes to zero, either from workqueue or from syscall */ | ||
| 123 | static void array_map_free(struct bpf_map *map) | ||
| 124 | { | ||
| 125 | struct bpf_array *array = container_of(map, struct bpf_array, map); | ||
| 126 | |||
| 127 | /* at this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0, | ||
| 128 | * so the programs (can be more than one that used this map) were | ||
| 129 | * disconnected from events. Wait for outstanding programs to complete | ||
| 130 | * and free the array | ||
| 131 | */ | ||
| 132 | synchronize_rcu(); | ||
| 133 | |||
| 134 | kvfree(array); | ||
| 135 | } | ||
| 136 | |||
| 137 | static struct bpf_map_ops array_ops = { | ||
| 138 | .map_alloc = array_map_alloc, | ||
| 139 | .map_free = array_map_free, | ||
| 140 | .map_get_next_key = array_map_get_next_key, | ||
| 141 | .map_lookup_elem = array_map_lookup_elem, | ||
| 142 | .map_update_elem = array_map_update_elem, | ||
| 143 | .map_delete_elem = array_map_delete_elem, | ||
| 144 | }; | ||
| 145 | |||
| 146 | static struct bpf_map_type_list tl = { | ||
| 147 | .ops = &array_ops, | ||
| 148 | .type = BPF_MAP_TYPE_ARRAY, | ||
| 149 | }; | ||
| 150 | |||
| 151 | static int __init register_array_map(void) | ||
| 152 | { | ||
| 153 | bpf_register_map_type(&tl); | ||
| 154 | return 0; | ||
| 155 | } | ||
| 156 | late_initcall(register_array_map); | ||
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c new file mode 100644 index 000000000000..b3ba43674310 --- /dev/null +++ b/kernel/bpf/hashtab.c | |||
| @@ -0,0 +1,367 @@ | |||
| 1 | /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com | ||
| 2 | * | ||
| 3 | * This program is free software; you can redistribute it and/or | ||
| 4 | * modify it under the terms of version 2 of the GNU General Public | ||
| 5 | * License as published by the Free Software Foundation. | ||
| 6 | * | ||
| 7 | * This program is distributed in the hope that it will be useful, but | ||
| 8 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 10 | * General Public License for more details. | ||
| 11 | */ | ||
| 12 | #include <linux/bpf.h> | ||
| 13 | #include <linux/jhash.h> | ||
| 14 | #include <linux/filter.h> | ||
| 15 | #include <linux/vmalloc.h> | ||
| 16 | |||
| 17 | struct bpf_htab { | ||
| 18 | struct bpf_map map; | ||
| 19 | struct hlist_head *buckets; | ||
| 20 | spinlock_t lock; | ||
| 21 | u32 count; /* number of elements in this hashtable */ | ||
| 22 | u32 n_buckets; /* number of hash buckets */ | ||
| 23 | u32 elem_size; /* size of each element in bytes */ | ||
| 24 | }; | ||
| 25 | |||
| 26 | /* each htab element is struct htab_elem + key + value */ | ||
| 27 | struct htab_elem { | ||
| 28 | struct hlist_node hash_node; | ||
| 29 | struct rcu_head rcu; | ||
| 30 | u32 hash; | ||
| 31 | char key[0] __aligned(8); | ||
| 32 | }; | ||
| 33 | |||
| 34 | /* Called from syscall */ | ||
| 35 | static struct bpf_map *htab_map_alloc(union bpf_attr *attr) | ||
| 36 | { | ||
| 37 | struct bpf_htab *htab; | ||
| 38 | int err, i; | ||
| 39 | |||
| 40 | htab = kzalloc(sizeof(*htab), GFP_USER); | ||
| 41 | if (!htab) | ||
| 42 | return ERR_PTR(-ENOMEM); | ||
| 43 | |||
| 44 | /* mandatory map attributes */ | ||
| 45 | htab->map.key_size = attr->key_size; | ||
| 46 | htab->map.value_size = attr->value_size; | ||
| 47 | htab->map.max_entries = attr->max_entries; | ||
| 48 | |||
| 49 | /* check sanity of attributes. | ||
| 50 | * value_size == 0 may be allowed in the future to use map as a set | ||
| 51 | */ | ||
| 52 | err = -EINVAL; | ||
| 53 | if (htab->map.max_entries == 0 || htab->map.key_size == 0 || | ||
| 54 | htab->map.value_size == 0) | ||
| 55 | goto free_htab; | ||
| 56 | |||
| 57 | /* hash table size must be power of 2 */ | ||
| 58 | htab->n_buckets = roundup_pow_of_two(htab->map.max_entries); | ||
| 59 | |||
| 60 | err = -E2BIG; | ||
| 61 | if (htab->map.key_size > MAX_BPF_STACK) | ||
| 62 | /* eBPF programs initialize keys on stack, so they cannot be | ||
| 63 | * larger than max stack size | ||
| 64 | */ | ||
| 65 | goto free_htab; | ||
| 66 | |||
| 67 | err = -ENOMEM; | ||
| 68 | /* prevent zero size kmalloc and check for u32 overflow */ | ||
| 69 | if (htab->n_buckets == 0 || | ||
| 70 | htab->n_buckets > U32_MAX / sizeof(struct hlist_head)) | ||
| 71 | goto free_htab; | ||
| 72 | |||
| 73 | htab->buckets = kmalloc_array(htab->n_buckets, sizeof(struct hlist_head), | ||
| 74 | GFP_USER | __GFP_NOWARN); | ||
| 75 | |||
| 76 | if (!htab->buckets) { | ||
| 77 | htab->buckets = vmalloc(htab->n_buckets * sizeof(struct hlist_head)); | ||
| 78 | if (!htab->buckets) | ||
| 79 | goto free_htab; | ||
| 80 | } | ||
| 81 | |||
| 82 | for (i = 0; i < htab->n_buckets; i++) | ||
| 83 | INIT_HLIST_HEAD(&htab->buckets[i]); | ||
| 84 | |||
| 85 | spin_lock_init(&htab->lock); | ||
| 86 | htab->count = 0; | ||
| 87 | |||
| 88 | htab->elem_size = sizeof(struct htab_elem) + | ||
| 89 | round_up(htab->map.key_size, 8) + | ||
| 90 | htab->map.value_size; | ||
| 91 | return &htab->map; | ||
| 92 | |||
| 93 | free_htab: | ||
| 94 | kfree(htab); | ||
| 95 | return ERR_PTR(err); | ||
| 96 | } | ||
| 97 | |||
| 98 | static inline u32 htab_map_hash(const void *key, u32 key_len) | ||
| 99 | { | ||
| 100 | return jhash(key, key_len, 0); | ||
| 101 | } | ||
| 102 | |||
| 103 | static inline struct hlist_head *select_bucket(struct bpf_htab *htab, u32 hash) | ||
| 104 | { | ||
| 105 | return &htab->buckets[hash & (htab->n_buckets - 1)]; | ||
| 106 | } | ||
| 107 | |||
| 108 | static struct htab_elem *lookup_elem_raw(struct hlist_head *head, u32 hash, | ||
| 109 | void *key, u32 key_size) | ||
| 110 | { | ||
| 111 | struct htab_elem *l; | ||
| 112 | |||
| 113 | hlist_for_each_entry_rcu(l, head, hash_node) | ||
| 114 | if (l->hash == hash && !memcmp(&l->key, key, key_size)) | ||
| 115 | return l; | ||
| 116 | |||
| 117 | return NULL; | ||
| 118 | } | ||
| 119 | |||
| 120 | /* Called from syscall or from eBPF program */ | ||
| 121 | static void *htab_map_lookup_elem(struct bpf_map *map, void *key) | ||
| 122 | { | ||
| 123 | struct bpf_htab *htab = container_of(map, struct bpf_htab, map); | ||
| 124 | struct hlist_head *head; | ||
| 125 | struct htab_elem *l; | ||
| 126 | u32 hash, key_size; | ||
| 127 | |||
| 128 | /* Must be called with rcu_read_lock. */ | ||
| 129 | WARN_ON_ONCE(!rcu_read_lock_held()); | ||
| 130 | |||
| 131 | key_size = map->key_size; | ||
| 132 | |||
| 133 | hash = htab_map_hash(key, key_size); | ||
| 134 | |||
| 135 | head = select_bucket(htab, hash); | ||
| 136 | |||
| 137 | l = lookup_elem_raw(head, hash, key, key_size); | ||
| 138 | |||
| 139 | if (l) | ||
| 140 | return l->key + round_up(map->key_size, 8); | ||
| 141 | |||
| 142 | return NULL; | ||
| 143 | } | ||
| 144 | |||
| 145 | /* Called from syscall */ | ||
| 146 | static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key) | ||
| 147 | { | ||
| 148 | struct bpf_htab *htab = container_of(map, struct bpf_htab, map); | ||
| 149 | struct hlist_head *head; | ||
| 150 | struct htab_elem *l, *next_l; | ||
| 151 | u32 hash, key_size; | ||
| 152 | int i; | ||
| 153 | |||
| 154 | WARN_ON_ONCE(!rcu_read_lock_held()); | ||
| 155 | |||
| 156 | key_size = map->key_size; | ||
| 157 | |||
| 158 | hash = htab_map_hash(key, key_size); | ||
| 159 | |||
| 160 | head = select_bucket(htab, hash); | ||
| 161 | |||
| 162 | /* lookup the key */ | ||
| 163 | l = lookup_elem_raw(head, hash, key, key_size); | ||
| 164 | |||
| 165 | if (!l) { | ||
| 166 | i = 0; | ||
| 167 | goto find_first_elem; | ||
| 168 | } | ||
| 169 | |||
| 170 | /* key was found, get next key in the same bucket */ | ||
| 171 | next_l = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(&l->hash_node)), | ||
| 172 | struct htab_elem, hash_node); | ||
| 173 | |||
| 174 | if (next_l) { | ||
| 175 | /* if next elem in this hash list is non-zero, just return it */ | ||
| 176 | memcpy(next_key, next_l->key, key_size); | ||
| 177 | return 0; | ||
| 178 | } | ||
| 179 | |||
| 180 | /* no more elements in this hash list, go to the next bucket */ | ||
| 181 | i = hash & (htab->n_buckets - 1); | ||
| 182 | i++; | ||
| 183 | |||
| 184 | find_first_elem: | ||
| 185 | /* iterate over buckets */ | ||
| 186 | for (; i < htab->n_buckets; i++) { | ||
| 187 | head = select_bucket(htab, i); | ||
| 188 | |||
| 189 | /* pick first element in the bucket */ | ||
| 190 | next_l = hlist_entry_safe(rcu_dereference_raw(hlist_first_rcu(head)), | ||
| 191 | struct htab_elem, hash_node); | ||
| 192 | if (next_l) { | ||
| 193 | /* if it's not empty, just return it */ | ||
| 194 | memcpy(next_key, next_l->key, key_size); | ||
| 195 | return 0; | ||
| 196 | } | ||
| 197 | } | ||
| 198 | |||
| 199 | /* itereated over all buckets and all elements */ | ||
| 200 | return -ENOENT; | ||
| 201 | } | ||
| 202 | |||
| 203 | /* Called from syscall or from eBPF program */ | ||
| 204 | static int htab_map_update_elem(struct bpf_map *map, void *key, void *value, | ||
| 205 | u64 map_flags) | ||
| 206 | { | ||
| 207 | struct bpf_htab *htab = container_of(map, struct bpf_htab, map); | ||
| 208 | struct htab_elem *l_new, *l_old; | ||
| 209 | struct hlist_head *head; | ||
| 210 | unsigned long flags; | ||
| 211 | u32 key_size; | ||
| 212 | int ret; | ||
| 213 | |||
| 214 | if (map_flags > BPF_EXIST) | ||
| 215 | /* unknown flags */ | ||
| 216 | return -EINVAL; | ||
| 217 | |||
| 218 | WARN_ON_ONCE(!rcu_read_lock_held()); | ||
| 219 | |||
| 220 | /* allocate new element outside of lock */ | ||
| 221 | l_new = kmalloc(htab->elem_size, GFP_ATOMIC); | ||
| 222 | if (!l_new) | ||
| 223 | return -ENOMEM; | ||
| 224 | |||
| 225 | key_size = map->key_size; | ||
| 226 | |||
| 227 | memcpy(l_new->key, key, key_size); | ||
| 228 | memcpy(l_new->key + round_up(key_size, 8), value, map->value_size); | ||
| 229 | |||
| 230 | l_new->hash = htab_map_hash(l_new->key, key_size); | ||
| 231 | |||
| 232 | /* bpf_map_update_elem() can be called in_irq() */ | ||
| 233 | spin_lock_irqsave(&htab->lock, flags); | ||
| 234 | |||
| 235 | head = select_bucket(htab, l_new->hash); | ||
| 236 | |||
| 237 | l_old = lookup_elem_raw(head, l_new->hash, key, key_size); | ||
| 238 | |||
| 239 | if (!l_old && unlikely(htab->count >= map->max_entries)) { | ||
| 240 | /* if elem with this 'key' doesn't exist and we've reached | ||
| 241 | * max_entries limit, fail insertion of new elem | ||
| 242 | */ | ||
| 243 | ret = -E2BIG; | ||
| 244 | goto err; | ||
| 245 | } | ||
| 246 | |||
| 247 | if (l_old && map_flags == BPF_NOEXIST) { | ||
| 248 | /* elem already exists */ | ||
| 249 | ret = -EEXIST; | ||
| 250 | goto err; | ||
| 251 | } | ||
| 252 | |||
| 253 | if (!l_old && map_flags == BPF_EXIST) { | ||
| 254 | /* elem doesn't exist, cannot update it */ | ||
| 255 | ret = -ENOENT; | ||
| 256 | goto err; | ||
| 257 | } | ||
| 258 | |||
| 259 | /* add new element to the head of the list, so that concurrent | ||
| 260 | * search will find it before old elem | ||
| 261 | */ | ||
| 262 | hlist_add_head_rcu(&l_new->hash_node, head); | ||
| 263 | if (l_old) { | ||
| 264 | hlist_del_rcu(&l_old->hash_node); | ||
| 265 | kfree_rcu(l_old, rcu); | ||
| 266 | } else { | ||
| 267 | htab->count++; | ||
| 268 | } | ||
| 269 | spin_unlock_irqrestore(&htab->lock, flags); | ||
| 270 | |||
| 271 | return 0; | ||
| 272 | err: | ||
| 273 | spin_unlock_irqrestore(&htab->lock, flags); | ||
| 274 | kfree(l_new); | ||
| 275 | return ret; | ||
| 276 | } | ||
| 277 | |||
| 278 | /* Called from syscall or from eBPF program */ | ||
| 279 | static int htab_map_delete_elem(struct bpf_map *map, void *key) | ||
| 280 | { | ||
| 281 | struct bpf_htab *htab = container_of(map, struct bpf_htab, map); | ||
| 282 | struct hlist_head *head; | ||
| 283 | struct htab_elem *l; | ||
| 284 | unsigned long flags; | ||
| 285 | u32 hash, key_size; | ||
| 286 | int ret = -ENOENT; | ||
| 287 | |||
| 288 | WARN_ON_ONCE(!rcu_read_lock_held()); | ||
| 289 | |||
| 290 | key_size = map->key_size; | ||
| 291 | |||
| 292 | hash = htab_map_hash(key, key_size); | ||
| 293 | |||
| 294 | spin_lock_irqsave(&htab->lock, flags); | ||
| 295 | |||
| 296 | head = select_bucket(htab, hash); | ||
| 297 | |||
| 298 | l = lookup_elem_raw(head, hash, key, key_size); | ||
| 299 | |||
| 300 | if (l) { | ||
| 301 | hlist_del_rcu(&l->hash_node); | ||
| 302 | htab->count--; | ||
| 303 | kfree_rcu(l, rcu); | ||
| 304 | ret = 0; | ||
| 305 | } | ||
| 306 | |||
| 307 | spin_unlock_irqrestore(&htab->lock, flags); | ||
| 308 | return ret; | ||
| 309 | } | ||
| 310 | |||
| 311 | static void delete_all_elements(struct bpf_htab *htab) | ||
| 312 | { | ||
| 313 | int i; | ||
| 314 | |||
| 315 | for (i = 0; i < htab->n_buckets; i++) { | ||
| 316 | struct hlist_head *head = select_bucket(htab, i); | ||
| 317 | struct hlist_node *n; | ||
| 318 | struct htab_elem *l; | ||
| 319 | |||
| 320 | hlist_for_each_entry_safe(l, n, head, hash_node) { | ||
| 321 | hlist_del_rcu(&l->hash_node); | ||
| 322 | htab->count--; | ||
| 323 | kfree(l); | ||
| 324 | } | ||
| 325 | } | ||
| 326 | } | ||
| 327 | |||
| 328 | /* Called when map->refcnt goes to zero, either from workqueue or from syscall */ | ||
| 329 | static void htab_map_free(struct bpf_map *map) | ||
| 330 | { | ||
| 331 | struct bpf_htab *htab = container_of(map, struct bpf_htab, map); | ||
| 332 | |||
| 333 | /* at this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0, | ||
| 334 | * so the programs (can be more than one that used this map) were | ||
| 335 | * disconnected from events. Wait for outstanding critical sections in | ||
| 336 | * these programs to complete | ||
| 337 | */ | ||
| 338 | synchronize_rcu(); | ||
| 339 | |||
| 340 | /* some of kfree_rcu() callbacks for elements of this map may not have | ||
| 341 | * executed. It's ok. Proceed to free residual elements and map itself | ||
| 342 | */ | ||
| 343 | delete_all_elements(htab); | ||
| 344 | kvfree(htab->buckets); | ||
| 345 | kfree(htab); | ||
| 346 | } | ||
| 347 | |||
| 348 | static struct bpf_map_ops htab_ops = { | ||
| 349 | .map_alloc = htab_map_alloc, | ||
| 350 | .map_free = htab_map_free, | ||
| 351 | .map_get_next_key = htab_map_get_next_key, | ||
| 352 | .map_lookup_elem = htab_map_lookup_elem, | ||
| 353 | .map_update_elem = htab_map_update_elem, | ||
| 354 | .map_delete_elem = htab_map_delete_elem, | ||
| 355 | }; | ||
| 356 | |||
| 357 | static struct bpf_map_type_list tl = { | ||
| 358 | .ops = &htab_ops, | ||
| 359 | .type = BPF_MAP_TYPE_HASH, | ||
| 360 | }; | ||
| 361 | |||
| 362 | static int __init register_htab_map(void) | ||
| 363 | { | ||
| 364 | bpf_register_map_type(&tl); | ||
| 365 | return 0; | ||
| 366 | } | ||
| 367 | late_initcall(register_htab_map); | ||
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c new file mode 100644 index 000000000000..9e3414d85459 --- /dev/null +++ b/kernel/bpf/helpers.c | |||
| @@ -0,0 +1,89 @@ | |||
| 1 | /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com | ||
| 2 | * | ||
| 3 | * This program is free software; you can redistribute it and/or | ||
| 4 | * modify it under the terms of version 2 of the GNU General Public | ||
| 5 | * License as published by the Free Software Foundation. | ||
| 6 | * | ||
| 7 | * This program is distributed in the hope that it will be useful, but | ||
| 8 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 10 | * General Public License for more details. | ||
| 11 | */ | ||
| 12 | #include <linux/bpf.h> | ||
| 13 | #include <linux/rcupdate.h> | ||
| 14 | |||
| 15 | /* If kernel subsystem is allowing eBPF programs to call this function, | ||
| 16 | * inside its own verifier_ops->get_func_proto() callback it should return | ||
| 17 | * bpf_map_lookup_elem_proto, so that verifier can properly check the arguments | ||
| 18 | * | ||
| 19 | * Different map implementations will rely on rcu in map methods | ||
| 20 | * lookup/update/delete, therefore eBPF programs must run under rcu lock | ||
| 21 | * if program is allowed to access maps, so check rcu_read_lock_held in | ||
| 22 | * all three functions. | ||
| 23 | */ | ||
| 24 | static u64 bpf_map_lookup_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) | ||
| 25 | { | ||
| 26 | /* verifier checked that R1 contains a valid pointer to bpf_map | ||
| 27 | * and R2 points to a program stack and map->key_size bytes were | ||
| 28 | * initialized | ||
| 29 | */ | ||
| 30 | struct bpf_map *map = (struct bpf_map *) (unsigned long) r1; | ||
| 31 | void *key = (void *) (unsigned long) r2; | ||
| 32 | void *value; | ||
| 33 | |||
| 34 | WARN_ON_ONCE(!rcu_read_lock_held()); | ||
| 35 | |||
| 36 | value = map->ops->map_lookup_elem(map, key); | ||
| 37 | |||
| 38 | /* lookup() returns either pointer to element value or NULL | ||
| 39 | * which is the meaning of PTR_TO_MAP_VALUE_OR_NULL type | ||
| 40 | */ | ||
| 41 | return (unsigned long) value; | ||
| 42 | } | ||
| 43 | |||
| 44 | struct bpf_func_proto bpf_map_lookup_elem_proto = { | ||
| 45 | .func = bpf_map_lookup_elem, | ||
| 46 | .gpl_only = false, | ||
| 47 | .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, | ||
| 48 | .arg1_type = ARG_CONST_MAP_PTR, | ||
| 49 | .arg2_type = ARG_PTR_TO_MAP_KEY, | ||
| 50 | }; | ||
| 51 | |||
| 52 | static u64 bpf_map_update_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) | ||
| 53 | { | ||
| 54 | struct bpf_map *map = (struct bpf_map *) (unsigned long) r1; | ||
| 55 | void *key = (void *) (unsigned long) r2; | ||
| 56 | void *value = (void *) (unsigned long) r3; | ||
| 57 | |||
| 58 | WARN_ON_ONCE(!rcu_read_lock_held()); | ||
| 59 | |||
| 60 | return map->ops->map_update_elem(map, key, value, r4); | ||
| 61 | } | ||
| 62 | |||
| 63 | struct bpf_func_proto bpf_map_update_elem_proto = { | ||
| 64 | .func = bpf_map_update_elem, | ||
| 65 | .gpl_only = false, | ||
| 66 | .ret_type = RET_INTEGER, | ||
| 67 | .arg1_type = ARG_CONST_MAP_PTR, | ||
| 68 | .arg2_type = ARG_PTR_TO_MAP_KEY, | ||
| 69 | .arg3_type = ARG_PTR_TO_MAP_VALUE, | ||
| 70 | .arg4_type = ARG_ANYTHING, | ||
| 71 | }; | ||
| 72 | |||
| 73 | static u64 bpf_map_delete_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) | ||
| 74 | { | ||
| 75 | struct bpf_map *map = (struct bpf_map *) (unsigned long) r1; | ||
| 76 | void *key = (void *) (unsigned long) r2; | ||
| 77 | |||
| 78 | WARN_ON_ONCE(!rcu_read_lock_held()); | ||
| 79 | |||
| 80 | return map->ops->map_delete_elem(map, key); | ||
| 81 | } | ||
| 82 | |||
| 83 | struct bpf_func_proto bpf_map_delete_elem_proto = { | ||
| 84 | .func = bpf_map_delete_elem, | ||
| 85 | .gpl_only = false, | ||
| 86 | .ret_type = RET_INTEGER, | ||
| 87 | .arg1_type = ARG_CONST_MAP_PTR, | ||
| 88 | .arg2_type = ARG_PTR_TO_MAP_KEY, | ||
| 89 | }; | ||
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index ba61c8c16032..088ac0b1b106 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c | |||
| @@ -169,7 +169,7 @@ static int map_lookup_elem(union bpf_attr *attr) | |||
| 169 | if (copy_from_user(key, ukey, map->key_size) != 0) | 169 | if (copy_from_user(key, ukey, map->key_size) != 0) |
| 170 | goto free_key; | 170 | goto free_key; |
| 171 | 171 | ||
| 172 | err = -ESRCH; | 172 | err = -ENOENT; |
| 173 | rcu_read_lock(); | 173 | rcu_read_lock(); |
| 174 | value = map->ops->map_lookup_elem(map, key); | 174 | value = map->ops->map_lookup_elem(map, key); |
| 175 | if (!value) | 175 | if (!value) |
| @@ -190,7 +190,7 @@ err_put: | |||
| 190 | return err; | 190 | return err; |
| 191 | } | 191 | } |
| 192 | 192 | ||
| 193 | #define BPF_MAP_UPDATE_ELEM_LAST_FIELD value | 193 | #define BPF_MAP_UPDATE_ELEM_LAST_FIELD flags |
| 194 | 194 | ||
| 195 | static int map_update_elem(union bpf_attr *attr) | 195 | static int map_update_elem(union bpf_attr *attr) |
| 196 | { | 196 | { |
| @@ -231,7 +231,7 @@ static int map_update_elem(union bpf_attr *attr) | |||
| 231 | * therefore all map accessors rely on this fact, so do the same here | 231 | * therefore all map accessors rely on this fact, so do the same here |
| 232 | */ | 232 | */ |
| 233 | rcu_read_lock(); | 233 | rcu_read_lock(); |
| 234 | err = map->ops->map_update_elem(map, key, value); | 234 | err = map->ops->map_update_elem(map, key, value, attr->flags); |
| 235 | rcu_read_unlock(); | 235 | rcu_read_unlock(); |
| 236 | 236 | ||
| 237 | free_value: | 237 | free_value: |
diff --git a/kernel/bpf/test_stub.c b/kernel/bpf/test_stub.c index fcaddff4003e..0ceae1e6e8b5 100644 --- a/kernel/bpf/test_stub.c +++ b/kernel/bpf/test_stub.c | |||
| @@ -18,26 +18,18 @@ struct bpf_context { | |||
| 18 | u64 arg2; | 18 | u64 arg2; |
| 19 | }; | 19 | }; |
| 20 | 20 | ||
| 21 | static u64 test_func(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) | ||
| 22 | { | ||
| 23 | return 0; | ||
| 24 | } | ||
| 25 | |||
| 26 | static struct bpf_func_proto test_funcs[] = { | ||
| 27 | [BPF_FUNC_unspec] = { | ||
| 28 | .func = test_func, | ||
| 29 | .gpl_only = true, | ||
| 30 | .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, | ||
| 31 | .arg1_type = ARG_CONST_MAP_PTR, | ||
| 32 | .arg2_type = ARG_PTR_TO_MAP_KEY, | ||
| 33 | }, | ||
| 34 | }; | ||
| 35 | |||
| 36 | static const struct bpf_func_proto *test_func_proto(enum bpf_func_id func_id) | 21 | static const struct bpf_func_proto *test_func_proto(enum bpf_func_id func_id) |
| 37 | { | 22 | { |
| 38 | if (func_id < 0 || func_id >= ARRAY_SIZE(test_funcs)) | 23 | switch (func_id) { |
| 24 | case BPF_FUNC_map_lookup_elem: | ||
| 25 | return &bpf_map_lookup_elem_proto; | ||
| 26 | case BPF_FUNC_map_update_elem: | ||
| 27 | return &bpf_map_update_elem_proto; | ||
| 28 | case BPF_FUNC_map_delete_elem: | ||
| 29 | return &bpf_map_delete_elem_proto; | ||
| 30 | default: | ||
| 39 | return NULL; | 31 | return NULL; |
| 40 | return &test_funcs[func_id]; | 32 | } |
| 41 | } | 33 | } |
| 42 | 34 | ||
| 43 | static const struct bpf_context_access { | 35 | static const struct bpf_context_access { |
| @@ -78,38 +70,8 @@ static struct bpf_prog_type_list tl_prog = { | |||
| 78 | .type = BPF_PROG_TYPE_UNSPEC, | 70 | .type = BPF_PROG_TYPE_UNSPEC, |
| 79 | }; | 71 | }; |
| 80 | 72 | ||
| 81 | static struct bpf_map *test_map_alloc(union bpf_attr *attr) | ||
| 82 | { | ||
| 83 | struct bpf_map *map; | ||
| 84 | |||
| 85 | map = kzalloc(sizeof(*map), GFP_USER); | ||
| 86 | if (!map) | ||
| 87 | return ERR_PTR(-ENOMEM); | ||
| 88 | |||
| 89 | map->key_size = attr->key_size; | ||
| 90 | map->value_size = attr->value_size; | ||
| 91 | map->max_entries = attr->max_entries; | ||
| 92 | return map; | ||
| 93 | } | ||
| 94 | |||
| 95 | static void test_map_free(struct bpf_map *map) | ||
| 96 | { | ||
| 97 | kfree(map); | ||
| 98 | } | ||
| 99 | |||
| 100 | static struct bpf_map_ops test_map_ops = { | ||
| 101 | .map_alloc = test_map_alloc, | ||
| 102 | .map_free = test_map_free, | ||
| 103 | }; | ||
| 104 | |||
| 105 | static struct bpf_map_type_list tl_map = { | ||
| 106 | .ops = &test_map_ops, | ||
| 107 | .type = BPF_MAP_TYPE_UNSPEC, | ||
| 108 | }; | ||
| 109 | |||
| 110 | static int __init register_test_ops(void) | 73 | static int __init register_test_ops(void) |
| 111 | { | 74 | { |
| 112 | bpf_register_map_type(&tl_map); | ||
| 113 | bpf_register_prog_type(&tl_prog); | 75 | bpf_register_prog_type(&tl_prog); |
| 114 | return 0; | 76 | return 0; |
| 115 | } | 77 | } |
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 9f81818f2941..a28e09c7825d 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c | |||
| @@ -153,22 +153,19 @@ struct reg_state { | |||
| 153 | 153 | ||
| 154 | enum bpf_stack_slot_type { | 154 | enum bpf_stack_slot_type { |
| 155 | STACK_INVALID, /* nothing was stored in this stack slot */ | 155 | STACK_INVALID, /* nothing was stored in this stack slot */ |
| 156 | STACK_SPILL, /* 1st byte of register spilled into stack */ | 156 | STACK_SPILL, /* register spilled into stack */ |
| 157 | STACK_SPILL_PART, /* other 7 bytes of register spill */ | ||
| 158 | STACK_MISC /* BPF program wrote some data into this slot */ | 157 | STACK_MISC /* BPF program wrote some data into this slot */ |
| 159 | }; | 158 | }; |
| 160 | 159 | ||
| 161 | struct bpf_stack_slot { | 160 | #define BPF_REG_SIZE 8 /* size of eBPF register in bytes */ |
| 162 | enum bpf_stack_slot_type stype; | ||
| 163 | struct reg_state reg_st; | ||
| 164 | }; | ||
| 165 | 161 | ||
| 166 | /* state of the program: | 162 | /* state of the program: |
| 167 | * type of all registers and stack info | 163 | * type of all registers and stack info |
| 168 | */ | 164 | */ |
| 169 | struct verifier_state { | 165 | struct verifier_state { |
| 170 | struct reg_state regs[MAX_BPF_REG]; | 166 | struct reg_state regs[MAX_BPF_REG]; |
| 171 | struct bpf_stack_slot stack[MAX_BPF_STACK]; | 167 | u8 stack_slot_type[MAX_BPF_STACK]; |
| 168 | struct reg_state spilled_regs[MAX_BPF_STACK / BPF_REG_SIZE]; | ||
| 172 | }; | 169 | }; |
| 173 | 170 | ||
| 174 | /* linked list of verifier states used to prune search */ | 171 | /* linked list of verifier states used to prune search */ |
| @@ -259,10 +256,10 @@ static void print_verifier_state(struct verifier_env *env) | |||
| 259 | env->cur_state.regs[i].map_ptr->key_size, | 256 | env->cur_state.regs[i].map_ptr->key_size, |
| 260 | env->cur_state.regs[i].map_ptr->value_size); | 257 | env->cur_state.regs[i].map_ptr->value_size); |
| 261 | } | 258 | } |
| 262 | for (i = 0; i < MAX_BPF_STACK; i++) { | 259 | for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) { |
| 263 | if (env->cur_state.stack[i].stype == STACK_SPILL) | 260 | if (env->cur_state.stack_slot_type[i] == STACK_SPILL) |
| 264 | verbose(" fp%d=%s", -MAX_BPF_STACK + i, | 261 | verbose(" fp%d=%s", -MAX_BPF_STACK + i, |
| 265 | reg_type_str[env->cur_state.stack[i].reg_st.type]); | 262 | reg_type_str[env->cur_state.spilled_regs[i / BPF_REG_SIZE].type]); |
| 266 | } | 263 | } |
| 267 | verbose("\n"); | 264 | verbose("\n"); |
| 268 | } | 265 | } |
| @@ -539,8 +536,10 @@ static int bpf_size_to_bytes(int bpf_size) | |||
| 539 | static int check_stack_write(struct verifier_state *state, int off, int size, | 536 | static int check_stack_write(struct verifier_state *state, int off, int size, |
| 540 | int value_regno) | 537 | int value_regno) |
| 541 | { | 538 | { |
| 542 | struct bpf_stack_slot *slot; | ||
| 543 | int i; | 539 | int i; |
| 540 | /* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0, | ||
| 541 | * so it's aligned access and [off, off + size) are within stack limits | ||
| 542 | */ | ||
| 544 | 543 | ||
| 545 | if (value_regno >= 0 && | 544 | if (value_regno >= 0 && |
| 546 | (state->regs[value_regno].type == PTR_TO_MAP_VALUE || | 545 | (state->regs[value_regno].type == PTR_TO_MAP_VALUE || |
| @@ -548,30 +547,24 @@ static int check_stack_write(struct verifier_state *state, int off, int size, | |||
| 548 | state->regs[value_regno].type == PTR_TO_CTX)) { | 547 | state->regs[value_regno].type == PTR_TO_CTX)) { |
| 549 | 548 | ||
| 550 | /* register containing pointer is being spilled into stack */ | 549 | /* register containing pointer is being spilled into stack */ |
| 551 | if (size != 8) { | 550 | if (size != BPF_REG_SIZE) { |
| 552 | verbose("invalid size of register spill\n"); | 551 | verbose("invalid size of register spill\n"); |
| 553 | return -EACCES; | 552 | return -EACCES; |
| 554 | } | 553 | } |
| 555 | 554 | ||
| 556 | slot = &state->stack[MAX_BPF_STACK + off]; | ||
| 557 | slot->stype = STACK_SPILL; | ||
| 558 | /* save register state */ | 555 | /* save register state */ |
| 559 | slot->reg_st = state->regs[value_regno]; | 556 | state->spilled_regs[(MAX_BPF_STACK + off) / BPF_REG_SIZE] = |
| 560 | for (i = 1; i < 8; i++) { | 557 | state->regs[value_regno]; |
| 561 | slot = &state->stack[MAX_BPF_STACK + off + i]; | ||
| 562 | slot->stype = STACK_SPILL_PART; | ||
| 563 | slot->reg_st.type = UNKNOWN_VALUE; | ||
| 564 | slot->reg_st.map_ptr = NULL; | ||
| 565 | } | ||
| 566 | } else { | ||
| 567 | 558 | ||
| 559 | for (i = 0; i < BPF_REG_SIZE; i++) | ||
| 560 | state->stack_slot_type[MAX_BPF_STACK + off + i] = STACK_SPILL; | ||
| 561 | } else { | ||
| 568 | /* regular write of data into stack */ | 562 | /* regular write of data into stack */ |
| 569 | for (i = 0; i < size; i++) { | 563 | state->spilled_regs[(MAX_BPF_STACK + off) / BPF_REG_SIZE] = |
| 570 | slot = &state->stack[MAX_BPF_STACK + off + i]; | 564 | (struct reg_state) {}; |
| 571 | slot->stype = STACK_MISC; | 565 | |
| 572 | slot->reg_st.type = UNKNOWN_VALUE; | 566 | for (i = 0; i < size; i++) |
| 573 | slot->reg_st.map_ptr = NULL; | 567 | state->stack_slot_type[MAX_BPF_STACK + off + i] = STACK_MISC; |
| 574 | } | ||
| 575 | } | 568 | } |
| 576 | return 0; | 569 | return 0; |
| 577 | } | 570 | } |
| @@ -579,19 +572,18 @@ static int check_stack_write(struct verifier_state *state, int off, int size, | |||
| 579 | static int check_stack_read(struct verifier_state *state, int off, int size, | 572 | static int check_stack_read(struct verifier_state *state, int off, int size, |
| 580 | int value_regno) | 573 | int value_regno) |
| 581 | { | 574 | { |
| 575 | u8 *slot_type; | ||
| 582 | int i; | 576 | int i; |
| 583 | struct bpf_stack_slot *slot; | ||
| 584 | 577 | ||
| 585 | slot = &state->stack[MAX_BPF_STACK + off]; | 578 | slot_type = &state->stack_slot_type[MAX_BPF_STACK + off]; |
| 586 | 579 | ||
| 587 | if (slot->stype == STACK_SPILL) { | 580 | if (slot_type[0] == STACK_SPILL) { |
| 588 | if (size != 8) { | 581 | if (size != BPF_REG_SIZE) { |
| 589 | verbose("invalid size of register spill\n"); | 582 | verbose("invalid size of register spill\n"); |
| 590 | return -EACCES; | 583 | return -EACCES; |
| 591 | } | 584 | } |
| 592 | for (i = 1; i < 8; i++) { | 585 | for (i = 1; i < BPF_REG_SIZE; i++) { |
| 593 | if (state->stack[MAX_BPF_STACK + off + i].stype != | 586 | if (slot_type[i] != STACK_SPILL) { |
| 594 | STACK_SPILL_PART) { | ||
| 595 | verbose("corrupted spill memory\n"); | 587 | verbose("corrupted spill memory\n"); |
| 596 | return -EACCES; | 588 | return -EACCES; |
| 597 | } | 589 | } |
| @@ -599,12 +591,12 @@ static int check_stack_read(struct verifier_state *state, int off, int size, | |||
| 599 | 591 | ||
| 600 | if (value_regno >= 0) | 592 | if (value_regno >= 0) |
| 601 | /* restore register state from stack */ | 593 | /* restore register state from stack */ |
| 602 | state->regs[value_regno] = slot->reg_st; | 594 | state->regs[value_regno] = |
| 595 | state->spilled_regs[(MAX_BPF_STACK + off) / BPF_REG_SIZE]; | ||
| 603 | return 0; | 596 | return 0; |
| 604 | } else { | 597 | } else { |
| 605 | for (i = 0; i < size; i++) { | 598 | for (i = 0; i < size; i++) { |
| 606 | if (state->stack[MAX_BPF_STACK + off + i].stype != | 599 | if (slot_type[i] != STACK_MISC) { |
| 607 | STACK_MISC) { | ||
| 608 | verbose("invalid read from stack off %d+%d size %d\n", | 600 | verbose("invalid read from stack off %d+%d size %d\n", |
| 609 | off, i, size); | 601 | off, i, size); |
| 610 | return -EACCES; | 602 | return -EACCES; |
| @@ -747,7 +739,7 @@ static int check_stack_boundary(struct verifier_env *env, | |||
| 747 | } | 739 | } |
| 748 | 740 | ||
| 749 | for (i = 0; i < access_size; i++) { | 741 | for (i = 0; i < access_size; i++) { |
| 750 | if (state->stack[MAX_BPF_STACK + off + i].stype != STACK_MISC) { | 742 | if (state->stack_slot_type[MAX_BPF_STACK + off + i] != STACK_MISC) { |
| 751 | verbose("invalid indirect read from stack off %d+%d size %d\n", | 743 | verbose("invalid indirect read from stack off %d+%d size %d\n", |
| 752 | off, i, access_size); | 744 | off, i, access_size); |
| 753 | return -EACCES; | 745 | return -EACCES; |
| @@ -1180,6 +1172,70 @@ static int check_ld_imm(struct verifier_env *env, struct bpf_insn *insn) | |||
| 1180 | return 0; | 1172 | return 0; |
| 1181 | } | 1173 | } |
| 1182 | 1174 | ||
| 1175 | /* verify safety of LD_ABS|LD_IND instructions: | ||
| 1176 | * - they can only appear in the programs where ctx == skb | ||
| 1177 | * - since they are wrappers of function calls, they scratch R1-R5 registers, | ||
| 1178 | * preserve R6-R9, and store return value into R0 | ||
| 1179 | * | ||
| 1180 | * Implicit input: | ||
| 1181 | * ctx == skb == R6 == CTX | ||
| 1182 | * | ||
| 1183 | * Explicit input: | ||
| 1184 | * SRC == any register | ||
| 1185 | * IMM == 32-bit immediate | ||
| 1186 | * | ||
| 1187 | * Output: | ||
| 1188 | * R0 - 8/16/32-bit skb data converted to cpu endianness | ||
| 1189 | */ | ||
| 1190 | static int check_ld_abs(struct verifier_env *env, struct bpf_insn *insn) | ||
| 1191 | { | ||
| 1192 | struct reg_state *regs = env->cur_state.regs; | ||
| 1193 | u8 mode = BPF_MODE(insn->code); | ||
| 1194 | struct reg_state *reg; | ||
| 1195 | int i, err; | ||
| 1196 | |||
| 1197 | if (env->prog->aux->prog_type != BPF_PROG_TYPE_SOCKET_FILTER) { | ||
| 1198 | verbose("BPF_LD_ABS|IND instructions are only allowed in socket filters\n"); | ||
| 1199 | return -EINVAL; | ||
| 1200 | } | ||
| 1201 | |||
| 1202 | if (insn->dst_reg != BPF_REG_0 || insn->off != 0 || | ||
| 1203 | (mode == BPF_ABS && insn->src_reg != BPF_REG_0)) { | ||
| 1204 | verbose("BPF_LD_ABS uses reserved fields\n"); | ||
| 1205 | return -EINVAL; | ||
| 1206 | } | ||
| 1207 | |||
| 1208 | /* check whether implicit source operand (register R6) is readable */ | ||
| 1209 | err = check_reg_arg(regs, BPF_REG_6, SRC_OP); | ||
| 1210 | if (err) | ||
| 1211 | return err; | ||
| 1212 | |||
| 1213 | if (regs[BPF_REG_6].type != PTR_TO_CTX) { | ||
| 1214 | verbose("at the time of BPF_LD_ABS|IND R6 != pointer to skb\n"); | ||
| 1215 | return -EINVAL; | ||
| 1216 | } | ||
| 1217 | |||
| 1218 | if (mode == BPF_IND) { | ||
| 1219 | /* check explicit source operand */ | ||
| 1220 | err = check_reg_arg(regs, insn->src_reg, SRC_OP); | ||
| 1221 | if (err) | ||
| 1222 | return err; | ||
| 1223 | } | ||
| 1224 | |||
| 1225 | /* reset caller saved regs to unreadable */ | ||
| 1226 | for (i = 0; i < CALLER_SAVED_REGS; i++) { | ||
| 1227 | reg = regs + caller_saved[i]; | ||
| 1228 | reg->type = NOT_INIT; | ||
| 1229 | reg->imm = 0; | ||
| 1230 | } | ||
| 1231 | |||
| 1232 | /* mark destination R0 register as readable, since it contains | ||
| 1233 | * the value fetched from the packet | ||
| 1234 | */ | ||
| 1235 | regs[BPF_REG_0].type = UNKNOWN_VALUE; | ||
| 1236 | return 0; | ||
| 1237 | } | ||
| 1238 | |||
| 1183 | /* non-recursive DFS pseudo code | 1239 | /* non-recursive DFS pseudo code |
| 1184 | * 1 procedure DFS-iterative(G,v): | 1240 | * 1 procedure DFS-iterative(G,v): |
| 1185 | * 2 label v as discovered | 1241 | * 2 label v as discovered |
| @@ -1417,12 +1473,33 @@ static bool states_equal(struct verifier_state *old, struct verifier_state *cur) | |||
| 1417 | } | 1473 | } |
| 1418 | 1474 | ||
| 1419 | for (i = 0; i < MAX_BPF_STACK; i++) { | 1475 | for (i = 0; i < MAX_BPF_STACK; i++) { |
| 1420 | if (memcmp(&old->stack[i], &cur->stack[i], | 1476 | if (old->stack_slot_type[i] == STACK_INVALID) |
| 1421 | sizeof(old->stack[0])) != 0) { | 1477 | continue; |
| 1422 | if (old->stack[i].stype == STACK_INVALID) | 1478 | if (old->stack_slot_type[i] != cur->stack_slot_type[i]) |
| 1423 | continue; | 1479 | /* Ex: old explored (safe) state has STACK_SPILL in |
| 1480 | * this stack slot, but current has has STACK_MISC -> | ||
| 1481 | * this verifier states are not equivalent, | ||
| 1482 | * return false to continue verification of this path | ||
| 1483 | */ | ||
| 1424 | return false; | 1484 | return false; |
| 1425 | } | 1485 | if (i % BPF_REG_SIZE) |
| 1486 | continue; | ||
| 1487 | if (memcmp(&old->spilled_regs[i / BPF_REG_SIZE], | ||
| 1488 | &cur->spilled_regs[i / BPF_REG_SIZE], | ||
| 1489 | sizeof(old->spilled_regs[0]))) | ||
| 1490 | /* when explored and current stack slot types are | ||
| 1491 | * the same, check that stored pointers types | ||
| 1492 | * are the same as well. | ||
| 1493 | * Ex: explored safe path could have stored | ||
| 1494 | * (struct reg_state) {.type = PTR_TO_STACK, .imm = -8} | ||
| 1495 | * but current path has stored: | ||
| 1496 | * (struct reg_state) {.type = PTR_TO_STACK, .imm = -16} | ||
| 1497 | * such verifier states are not equivalent. | ||
| 1498 | * return false to continue verification of this path | ||
| 1499 | */ | ||
| 1500 | return false; | ||
| 1501 | else | ||
| 1502 | continue; | ||
| 1426 | } | 1503 | } |
| 1427 | return true; | 1504 | return true; |
| 1428 | } | 1505 | } |
| @@ -1664,8 +1741,10 @@ process_bpf_exit: | |||
| 1664 | u8 mode = BPF_MODE(insn->code); | 1741 | u8 mode = BPF_MODE(insn->code); |
| 1665 | 1742 | ||
| 1666 | if (mode == BPF_ABS || mode == BPF_IND) { | 1743 | if (mode == BPF_ABS || mode == BPF_IND) { |
| 1667 | verbose("LD_ABS is not supported yet\n"); | 1744 | err = check_ld_abs(env, insn); |
| 1668 | return -EINVAL; | 1745 | if (err) |
| 1746 | return err; | ||
| 1747 | |||
| 1669 | } else if (mode == BPF_IMM) { | 1748 | } else if (mode == BPF_IMM) { |
| 1670 | err = check_ld_imm(env, insn); | 1749 | err = check_ld_imm(env, insn); |
| 1671 | if (err) | 1750 | if (err) |
