Merge tag 'irqchip-urgent-3.19' of git://git.infradead.org/users/jcooper/linux into irq/urgent

irqchip urgent fixes for v3.19 from Jason Cooper - mtk-sysirq: Fix error handling - hip04: Fix cpu map for 16bit value - gic-v3-its: Clear a warning regarding decimal constants - omap-intc: Fix legacy DMA regression - atmel-aic-common: Retain priority when changing type
author: Thomas Gleixner <tglx@linutronix.de> 2015-01-12 04:51:13 -0500
committer: Thomas Gleixner <tglx@linutronix.de> 2015-01-12 04:51:13 -0500
commit: 2f5eaf66e580f64032b365a00157b6b58c266b37 (patch)
tree: 7852017c864f0eb3833782e2a017952bd8531458 /kernel/bpf
parent: c291ee622165cb2c8d4e7af63fffd499354a23be (diff)
parent: 91d1179212161f220938198b742c328ad38fd0a3 (diff)
7 files changed, 750 insertions, 97 deletions
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index 0daf7f6ae7df..a5ae60f0b0a2 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -1,5 +1,5 @@
 obj-y := core.o
-obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o
+obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o hashtab.o arraymap.o helpers.o
 ifdef CONFIG_TEST_BPF
 obj-$(CONFIG_BPF_SYSCALL) += test_stub.o
 endif
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
new file mode 100644
index 000000000000..9eb4d8a7cd87
--- /dev/null
+++ b/kernel/bpf/arraymap.c
@@ -0,0 +1,156 @@
+/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+#include <linux/bpf.h>
+#include <linux/err.h>
+#include <linux/vmalloc.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+struct bpf_array {
+        struct bpf_map map;
+        u32 elem_size;
+        char value[0] __aligned(8);
+};
+/* Called from syscall */
+static struct bpf_map *array_map_alloc(union bpf_attr *attr)
+{
+        struct bpf_array *array;
+        u32 elem_size, array_size;
+        /* check sanity of attributes */
+        if (attr->max_entries == 0 || attr->key_size != 4 ||
+            attr->value_size == 0)
+                return ERR_PTR(-EINVAL);
+        elem_size = round_up(attr->value_size, 8);
+        /* check round_up into zero and u32 overflow */
+        if (elem_size == 0 ||
+            attr->max_entries > (U32_MAX - sizeof(*array)) / elem_size)
+                return ERR_PTR(-ENOMEM);
+        array_size = sizeof(*array) + attr->max_entries * elem_size;
+        /* allocate all map elements and zero-initialize them */
+        array = kzalloc(array_size, GFP_USER | __GFP_NOWARN);
+        if (!array) {
+                array = vzalloc(array_size);
+                if (!array)
+                        return ERR_PTR(-ENOMEM);
+        }
+        /* copy mandatory map attributes */
+        array->map.key_size = attr->key_size;
+        array->map.value_size = attr->value_size;
+        array->map.max_entries = attr->max_entries;
+        array->elem_size = elem_size;
+        return &array->map;
+}
+/* Called from syscall or from eBPF program */
+static void *array_map_lookup_elem(struct bpf_map *map, void *key)
+{
+        struct bpf_array *array = container_of(map, struct bpf_array, map);
+        u32 index = *(u32 *)key;
+        if (index >= array->map.max_entries)
+                return NULL;
+        return array->value + array->elem_size * index;
+}
+/* Called from syscall */
+static int array_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
+{
+        struct bpf_array *array = container_of(map, struct bpf_array, map);
+        u32 index = *(u32 *)key;
+        u32 *next = (u32 *)next_key;
+        if (index >= array->map.max_entries) {
+                *next = 0;
+                return 0;
+        }
+        if (index == array->map.max_entries - 1)
+                return -ENOENT;
+        *next = index + 1;
+        return 0;
+}
+/* Called from syscall or from eBPF program */
+static int array_map_update_elem(struct bpf_map *map, void *key, void *value,
+                                 u64 map_flags)
+{
+        struct bpf_array *array = container_of(map, struct bpf_array, map);
+        u32 index = *(u32 *)key;
+        if (map_flags > BPF_EXIST)
+                /* unknown flags */
+                return -EINVAL;
+        if (index >= array->map.max_entries)
+                /* all elements were pre-allocated, cannot insert a new one */
+                return -E2BIG;
+        if (map_flags == BPF_NOEXIST)
+                /* all elements already exist */
+                return -EEXIST;
+        memcpy(array->value + array->elem_size * index, value, array->elem_size);
+        return 0;
+}
+/* Called from syscall or from eBPF program */
+static int array_map_delete_elem(struct bpf_map *map, void *key)
+{
+        return -EINVAL;
+}
+/* Called when map->refcnt goes to zero, either from workqueue or from syscall */
+static void array_map_free(struct bpf_map *map)
+{
+        struct bpf_array *array = container_of(map, struct bpf_array, map);
+        /* at this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0,
+         * so the programs (can be more than one that used this map) were
+         * disconnected from events. Wait for outstanding programs to complete
+         * and free the array
+         */
+        synchronize_rcu();
+        kvfree(array);
+}
+static struct bpf_map_ops array_ops = {
+        .map_alloc = array_map_alloc,
+        .map_free = array_map_free,
+        .map_get_next_key = array_map_get_next_key,
+        .map_lookup_elem = array_map_lookup_elem,
+        .map_update_elem = array_map_update_elem,
+        .map_delete_elem = array_map_delete_elem,
+};
+static struct bpf_map_type_list tl = {
+        .ops = &array_ops,
+        .type = BPF_MAP_TYPE_ARRAY,
+};
+static int __init register_array_map(void)
+{
+        bpf_register_map_type(&tl);
+        return 0;
+}
+late_initcall(register_array_map);
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
new file mode 100644
index 000000000000..b3ba43674310
--- /dev/null
+++ b/kernel/bpf/hashtab.c
@@ -0,0 +1,367 @@
+/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+#include <linux/bpf.h>
+#include <linux/jhash.h>
+#include <linux/filter.h>
+#include <linux/vmalloc.h>
+struct bpf_htab {
+        struct bpf_map map;
+        struct hlist_head *buckets;
+        spinlock_t lock;
+        u32 count;      /* number of elements in this hashtable */
+        u32 n_buckets;  /* number of hash buckets */
+        u32 elem_size;  /* size of each element in bytes */
+};
+/* each htab element is struct htab_elem + key + value */
+struct htab_elem {
+        struct hlist_node hash_node;
+        struct rcu_head rcu;
+        u32 hash;
+        char key[0] __aligned(8);
+};
+/* Called from syscall */
+static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
+{
+        struct bpf_htab *htab;
+        int err, i;
+        htab = kzalloc(sizeof(*htab), GFP_USER);
+        if (!htab)
+                return ERR_PTR(-ENOMEM);
+        /* mandatory map attributes */
+        htab->map.key_size = attr->key_size;
+        htab->map.value_size = attr->value_size;
+        htab->map.max_entries = attr->max_entries;
+        /* check sanity of attributes.
+         * value_size == 0 may be allowed in the future to use map as a set
+         */
+        err = -EINVAL;
+        if (htab->map.max_entries == 0 || htab->map.key_size == 0 ||
+            htab->map.value_size == 0)
+                goto free_htab;
+        /* hash table size must be power of 2 */
+        htab->n_buckets = roundup_pow_of_two(htab->map.max_entries);
+        err = -E2BIG;
+        if (htab->map.key_size > MAX_BPF_STACK)
+                /* eBPF programs initialize keys on stack, so they cannot be
+                 * larger than max stack size
+                 */
+                goto free_htab;
+        err = -ENOMEM;
+        /* prevent zero size kmalloc and check for u32 overflow */
+        if (htab->n_buckets == 0 ||
+            htab->n_buckets > U32_MAX / sizeof(struct hlist_head))
+                goto free_htab;
+        htab->buckets = kmalloc_array(htab->n_buckets, sizeof(struct hlist_head),
+                                      GFP_USER | __GFP_NOWARN);
+        if (!htab->buckets) {
+                htab->buckets = vmalloc(htab->n_buckets * sizeof(struct hlist_head));
+                if (!htab->buckets)
+                        goto free_htab;
+        }
+        for (i = 0; i < htab->n_buckets; i++)
+                INIT_HLIST_HEAD(&htab->buckets[i]);
+        spin_lock_init(&htab->lock);
+        htab->count = 0;
+        htab->elem_size = sizeof(struct htab_elem) +
+                          round_up(htab->map.key_size, 8) +
+                          htab->map.value_size;
+        return &htab->map;
+free_htab:
+        kfree(htab);
+        return ERR_PTR(err);
+}
+static inline u32 htab_map_hash(const void *key, u32 key_len)
+{
+        return jhash(key, key_len, 0);
+}
+static inline struct hlist_head *select_bucket(struct bpf_htab *htab, u32 hash)
+{
+        return &htab->buckets[hash & (htab->n_buckets - 1)];
+}
+static struct htab_elem *lookup_elem_raw(struct hlist_head *head, u32 hash,
+                                         void *key, u32 key_size)
+{
+        struct htab_elem *l;
+        hlist_for_each_entry_rcu(l, head, hash_node)
+                if (l->hash == hash && !memcmp(&l->key, key, key_size))
+                        return l;
+        return NULL;
+}
+/* Called from syscall or from eBPF program */
+static void *htab_map_lookup_elem(struct bpf_map *map, void *key)
+{
+        struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
+        struct hlist_head *head;
+        struct htab_elem *l;
+        u32 hash, key_size;
+        /* Must be called with rcu_read_lock. */
+        WARN_ON_ONCE(!rcu_read_lock_held());
+        key_size = map->key_size;
+        hash = htab_map_hash(key, key_size);
+        head = select_bucket(htab, hash);
+        l = lookup_elem_raw(head, hash, key, key_size);
+        if (l)
+                return l->key + round_up(map->key_size, 8);
+        return NULL;
+}
+/* Called from syscall */
+static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
+{
+        struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
+        struct hlist_head *head;
+        struct htab_elem *l, *next_l;
+        u32 hash, key_size;
+        int i;
+        WARN_ON_ONCE(!rcu_read_lock_held());
+        key_size = map->key_size;
+        hash = htab_map_hash(key, key_size);
+        head = select_bucket(htab, hash);
+        /* lookup the key */
+        l = lookup_elem_raw(head, hash, key, key_size);
+        if (!l) {
+                i = 0;
+                goto find_first_elem;
+        }
+        /* key was found, get next key in the same bucket */
+        next_l = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(&l->hash_node)),
+                                  struct htab_elem, hash_node);
+        if (next_l) {
+                /* if next elem in this hash list is non-zero, just return it */
+                memcpy(next_key, next_l->key, key_size);
+                return 0;
+        }
+        /* no more elements in this hash list, go to the next bucket */
+        i = hash & (htab->n_buckets - 1);
+        i++;
+find_first_elem:
+        /* iterate over buckets */
+        for (; i < htab->n_buckets; i++) {
+                head = select_bucket(htab, i);
+                /* pick first element in the bucket */
+                next_l = hlist_entry_safe(rcu_dereference_raw(hlist_first_rcu(head)),
+                                          struct htab_elem, hash_node);
+                if (next_l) {
+                        /* if it's not empty, just return it */
+                        memcpy(next_key, next_l->key, key_size);
+                        return 0;
+                }
+        }
+        /* itereated over all buckets and all elements */
+        return -ENOENT;
+}
+/* Called from syscall or from eBPF program */
+static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
+                                u64 map_flags)
+{
+        struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
+        struct htab_elem *l_new, *l_old;
+        struct hlist_head *head;
+        unsigned long flags;
+        u32 key_size;
+        int ret;
+        if (map_flags > BPF_EXIST)
+                /* unknown flags */
+                return -EINVAL;
+        WARN_ON_ONCE(!rcu_read_lock_held());
+        /* allocate new element outside of lock */
+        l_new = kmalloc(htab->elem_size, GFP_ATOMIC);
+        if (!l_new)
+                return -ENOMEM;
+        key_size = map->key_size;
+        memcpy(l_new->key, key, key_size);
+        memcpy(l_new->key + round_up(key_size, 8), value, map->value_size);
+        l_new->hash = htab_map_hash(l_new->key, key_size);
+        /* bpf_map_update_elem() can be called in_irq() */
+        spin_lock_irqsave(&htab->lock, flags);
+        head = select_bucket(htab, l_new->hash);
+        l_old = lookup_elem_raw(head, l_new->hash, key, key_size);
+        if (!l_old && unlikely(htab->count >= map->max_entries)) {
+                /* if elem with this 'key' doesn't exist and we've reached
+                 * max_entries limit, fail insertion of new elem
+                 */
+                ret = -E2BIG;
+                goto err;
+        }
+        if (l_old && map_flags == BPF_NOEXIST) {
+                /* elem already exists */
+                ret = -EEXIST;
+                goto err;
+        }
+        if (!l_old && map_flags == BPF_EXIST) {
+                /* elem doesn't exist, cannot update it */
+                ret = -ENOENT;
+                goto err;
+        }
+        /* add new element to the head of the list, so that concurrent
+         * search will find it before old elem
+         */
+        hlist_add_head_rcu(&l_new->hash_node, head);
+        if (l_old) {
+                hlist_del_rcu(&l_old->hash_node);
+                kfree_rcu(l_old, rcu);
+        } else {
+                htab->count++;
+        }
+        spin_unlock_irqrestore(&htab->lock, flags);
+        return 0;
+err:
+        spin_unlock_irqrestore(&htab->lock, flags);
+        kfree(l_new);
+        return ret;
+}
+/* Called from syscall or from eBPF program */
+static int htab_map_delete_elem(struct bpf_map *map, void *key)
+{
+        struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
+        struct hlist_head *head;
+        struct htab_elem *l;
+        unsigned long flags;
+        u32 hash, key_size;
+        int ret = -ENOENT;
+        WARN_ON_ONCE(!rcu_read_lock_held());
+        key_size = map->key_size;
+        hash = htab_map_hash(key, key_size);
+        spin_lock_irqsave(&htab->lock, flags);
+        head = select_bucket(htab, hash);
+        l = lookup_elem_raw(head, hash, key, key_size);
+        if (l) {
+                hlist_del_rcu(&l->hash_node);
+                htab->count--;
+                kfree_rcu(l, rcu);
+                ret = 0;
+        }
+        spin_unlock_irqrestore(&htab->lock, flags);
+        return ret;
+}
+static void delete_all_elements(struct bpf_htab *htab)
+{
+        int i;
+        for (i = 0; i < htab->n_buckets; i++) {
+                struct hlist_head *head = select_bucket(htab, i);
+                struct hlist_node *n;
+                struct htab_elem *l;
+                hlist_for_each_entry_safe(l, n, head, hash_node) {
+                        hlist_del_rcu(&l->hash_node);
+                        htab->count--;
+                        kfree(l);
+                }
+        }
+}
+/* Called when map->refcnt goes to zero, either from workqueue or from syscall */
+static void htab_map_free(struct bpf_map *map)
+{
+        struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
+        /* at this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0,
+         * so the programs (can be more than one that used this map) were
+         * disconnected from events. Wait for outstanding critical sections in
+         * these programs to complete
+         */
+        synchronize_rcu();
+        /* some of kfree_rcu() callbacks for elements of this map may not have
+         * executed. It's ok. Proceed to free residual elements and map itself
+         */
+        delete_all_elements(htab);
+        kvfree(htab->buckets);
+        kfree(htab);
+}
+static struct bpf_map_ops htab_ops = {
+        .map_alloc = htab_map_alloc,
+        .map_free = htab_map_free,
+        .map_get_next_key = htab_map_get_next_key,
+        .map_lookup_elem = htab_map_lookup_elem,
+        .map_update_elem = htab_map_update_elem,
+        .map_delete_elem = htab_map_delete_elem,
+};
+static struct bpf_map_type_list tl = {
+        .ops = &htab_ops,
+        .type = BPF_MAP_TYPE_HASH,
+};
+static int __init register_htab_map(void)
+{
+        bpf_register_map_type(&tl);
+        return 0;
+}
+late_initcall(register_htab_map);
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
new file mode 100644
index 000000000000..9e3414d85459
--- /dev/null
+++ b/kernel/bpf/helpers.c
@@ -0,0 +1,89 @@
+/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+#include <linux/bpf.h>
+#include <linux/rcupdate.h>
+/* If kernel subsystem is allowing eBPF programs to call this function,
+ * inside its own verifier_ops->get_func_proto() callback it should return
+ * bpf_map_lookup_elem_proto, so that verifier can properly check the arguments
+ *
+ * Different map implementations will rely on rcu in map methods
+ * lookup/update/delete, therefore eBPF programs must run under rcu lock
+ * if program is allowed to access maps, so check rcu_read_lock_held in
+ * all three functions.
+ */
+static u64 bpf_map_lookup_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+{
+        /* verifier checked that R1 contains a valid pointer to bpf_map
+         * and R2 points to a program stack and map->key_size bytes were
+         * initialized
+         */
+        struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
+        void *key = (void *) (unsigned long) r2;
+        void *value;
+        WARN_ON_ONCE(!rcu_read_lock_held());
+        value = map->ops->map_lookup_elem(map, key);
+        /* lookup() returns either pointer to element value or NULL
+         * which is the meaning of PTR_TO_MAP_VALUE_OR_NULL type
+         */
+        return (unsigned long) value;
+}
+struct bpf_func_proto bpf_map_lookup_elem_proto = {
+        .func = bpf_map_lookup_elem,
+        .gpl_only = false,
+        .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
+        .arg1_type = ARG_CONST_MAP_PTR,
+        .arg2_type = ARG_PTR_TO_MAP_KEY,
+};
+static u64 bpf_map_update_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+{
+        struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
+        void *key = (void *) (unsigned long) r2;
+        void *value = (void *) (unsigned long) r3;
+        WARN_ON_ONCE(!rcu_read_lock_held());
+        return map->ops->map_update_elem(map, key, value, r4);
+}
+struct bpf_func_proto bpf_map_update_elem_proto = {
+        .func = bpf_map_update_elem,
+        .gpl_only = false,
+        .ret_type = RET_INTEGER,
+        .arg1_type = ARG_CONST_MAP_PTR,
+        .arg2_type = ARG_PTR_TO_MAP_KEY,
+        .arg3_type = ARG_PTR_TO_MAP_VALUE,
+        .arg4_type = ARG_ANYTHING,
+};
+static u64 bpf_map_delete_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+{
+        struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
+        void *key = (void *) (unsigned long) r2;
+        WARN_ON_ONCE(!rcu_read_lock_held());
+        return map->ops->map_delete_elem(map, key);
+}
+struct bpf_func_proto bpf_map_delete_elem_proto = {
+        .func = bpf_map_delete_elem,
+        .gpl_only = false,
+        .ret_type = RET_INTEGER,
+        .arg1_type = ARG_CONST_MAP_PTR,
+        .arg2_type = ARG_PTR_TO_MAP_KEY,
+};
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index ba61c8c16032..088ac0b1b106 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -169,7 +169,7 @@ static int map_lookup_elem(union bpf_attr *attr)
        if (copy_from_user(key, ukey, map->key_size) != 0)
                goto free_key;
-        err = -ESRCH;
+        err = -ENOENT;
        rcu_read_lock();
        value = map->ops->map_lookup_elem(map, key);
        if (!value)
@@ -190,7 +190,7 @@ err_put:
        return err;
 }
-#define BPF_MAP_UPDATE_ELEM_LAST_FIELD value
+#define BPF_MAP_UPDATE_ELEM_LAST_FIELD flags
 static int map_update_elem(union bpf_attr *attr)
 {
@@ -231,7 +231,7 @@ static int map_update_elem(union bpf_attr *attr)
         * therefore all map accessors rely on this fact, so do the same here
         */
        rcu_read_lock();
-        err = map->ops->map_update_elem(map, key, value);
+        err = map->ops->map_update_elem(map, key, value, attr->flags);
        rcu_read_unlock();
 free_value:
diff --git a/kernel/bpf/test_stub.c b/kernel/bpf/test_stub.c
index fcaddff4003e..0ceae1e6e8b5 100644
--- a/kernel/bpf/test_stub.c
+++ b/kernel/bpf/test_stub.c
@@ -18,26 +18,18 @@ struct bpf_context {
        u64 arg2;
 };
-static u64 test_func(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
-{
-        return 0;
-}
-static struct bpf_func_proto test_funcs[] = {
-        [BPF_FUNC_unspec] = {
-                .func = test_func,
-                .gpl_only = true,
-                .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
-                .arg1_type = ARG_CONST_MAP_PTR,
-                .arg2_type = ARG_PTR_TO_MAP_KEY,
-        },
-};
 static const struct bpf_func_proto *test_func_proto(enum bpf_func_id func_id)
 {
-        if (func_id < 0 || func_id >= ARRAY_SIZE(test_funcs))
+        switch (func_id) {
+        case BPF_FUNC_map_lookup_elem:
+                return &bpf_map_lookup_elem_proto;
+        case BPF_FUNC_map_update_elem:
+                return &bpf_map_update_elem_proto;
+        case BPF_FUNC_map_delete_elem:
+                return &bpf_map_delete_elem_proto;
+        default:
                return NULL;
-        return &test_funcs[func_id];
+        }
 }
 static const struct bpf_context_access {
@@ -78,38 +70,8 @@ static struct bpf_prog_type_list tl_prog = {
        .type = BPF_PROG_TYPE_UNSPEC,
 };
-static struct bpf_map *test_map_alloc(union bpf_attr *attr)
-{
-        struct bpf_map *map;
-        map = kzalloc(sizeof(*map), GFP_USER);
-        if (!map)
-                return ERR_PTR(-ENOMEM);
-        map->key_size = attr->key_size;
-        map->value_size = attr->value_size;
-        map->max_entries = attr->max_entries;
-        return map;
-}
-static void test_map_free(struct bpf_map *map)
-{
-        kfree(map);
-}
-static struct bpf_map_ops test_map_ops = {
-        .map_alloc = test_map_alloc,
-        .map_free = test_map_free,
-};
-static struct bpf_map_type_list tl_map = {
-        .ops = &test_map_ops,
-        .type = BPF_MAP_TYPE_UNSPEC,
-};
 static int __init register_test_ops(void)
 {
-        bpf_register_map_type(&tl_map);
        bpf_register_prog_type(&tl_prog);
        return 0;
 }
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 9f81818f2941..a28e09c7825d 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -153,22 +153,19 @@ struct reg_state {
 enum bpf_stack_slot_type {
        STACK_INVALID,    /* nothing was stored in this stack slot */
-        STACK_SPILL,      /* 1st byte of register spilled into stack */
+        STACK_SPILL,      /* register spilled into stack */
-        STACK_SPILL_PART, /* other 7 bytes of register spill */
        STACK_MISC        /* BPF program wrote some data into this slot */
 };
-struct bpf_stack_slot {
+#define BPF_REG_SIZE 8  /* size of eBPF register in bytes */
-        enum bpf_stack_slot_type stype;
-        struct reg_state reg_st;
-};
 /* state of the program:
 * type of all registers and stack info
 */
 struct verifier_state {
        struct reg_state regs[MAX_BPF_REG];
-        struct bpf_stack_slot stack[MAX_BPF_STACK];
+        u8 stack_slot_type[MAX_BPF_STACK];
+        struct reg_state spilled_regs[MAX_BPF_STACK / BPF_REG_SIZE];
 };
 /* linked list of verifier states used to prune search */
@@ -259,10 +256,10 @@ static void print_verifier_state(struct verifier_env *env)
                                env->cur_state.regs[i].map_ptr->key_size,
                                env->cur_state.regs[i].map_ptr->value_size);
        }
-        for (i = 0; i < MAX_BPF_STACK; i++) {
+        for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) {
-                if (env->cur_state.stack[i].stype == STACK_SPILL)
+                if (env->cur_state.stack_slot_type[i] == STACK_SPILL)
                        verbose(" fp%d=%s", -MAX_BPF_STACK + i,
-                                reg_type_str[env->cur_state.stack[i].reg_st.type]);
+                                reg_type_str[env->cur_state.spilled_regs[i / BPF_REG_SIZE].type]);
        }
        verbose("\n");
 }
@@ -539,8 +536,10 @@ static int bpf_size_to_bytes(int bpf_size)
 static int check_stack_write(struct verifier_state *state, int off, int size,
                             int value_regno)
 {
-        struct bpf_stack_slot *slot;
        int i;
+        /* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0,
+         * so it's aligned access and [off, off + size) are within stack limits
+         */
        if (value_regno >= 0 &&
            (state->regs[value_regno].type == PTR_TO_MAP_VALUE ||
@@ -548,30 +547,24 @@ static int check_stack_write(struct verifier_state *state, int off, int size,
             state->regs[value_regno].type == PTR_TO_CTX)) {
                /* register containing pointer is being spilled into stack */
-                if (size != 8) {
+                if (size != BPF_REG_SIZE) {
                        verbose("invalid size of register spill\n");
                        return -EACCES;
                }
-                slot = &state->stack[MAX_BPF_STACK + off];
-                slot->stype = STACK_SPILL;
                /* save register state */
-                slot->reg_st = state->regs[value_regno];
+                state->spilled_regs[(MAX_BPF_STACK + off) / BPF_REG_SIZE] =
-                for (i = 1; i < 8; i++) {
+                        state->regs[value_regno];
-                        slot = &state->stack[MAX_BPF_STACK + off + i];
-                        slot->stype = STACK_SPILL_PART;
-                        slot->reg_st.type = UNKNOWN_VALUE;
-                        slot->reg_st.map_ptr = NULL;
-                }
-        } else {
+                for (i = 0; i < BPF_REG_SIZE; i++)
+                        state->stack_slot_type[MAX_BPF_STACK + off + i] = STACK_SPILL;
+        } else {
                /* regular write of data into stack */
-                for (i = 0; i < size; i++) {
+                state->spilled_regs[(MAX_BPF_STACK + off) / BPF_REG_SIZE] =
-                        slot = &state->stack[MAX_BPF_STACK + off + i];
+                        (struct reg_state) {};
-                        slot->stype = STACK_MISC;
-                        slot->reg_st.type = UNKNOWN_VALUE;
+                for (i = 0; i < size; i++)
-                        slot->reg_st.map_ptr = NULL;
+                        state->stack_slot_type[MAX_BPF_STACK + off + i] = STACK_MISC;
-                }
        }
        return 0;
 }
@@ -579,19 +572,18 @@ static int check_stack_write(struct verifier_state *state, int off, int size,
 static int check_stack_read(struct verifier_state *state, int off, int size,
                            int value_regno)
 {
+        u8 *slot_type;
        int i;
-        struct bpf_stack_slot *slot;
-        slot = &state->stack[MAX_BPF_STACK + off];
+        slot_type = &state->stack_slot_type[MAX_BPF_STACK + off];
-        if (slot->stype == STACK_SPILL) {
+        if (slot_type[0] == STACK_SPILL) {
-                if (size != 8) {
+                if (size != BPF_REG_SIZE) {
                        verbose("invalid size of register spill\n");
                        return -EACCES;
                }
-                for (i = 1; i < 8; i++) {
+                for (i = 1; i < BPF_REG_SIZE; i++) {
-                        if (state->stack[MAX_BPF_STACK + off + i].stype !=
+                        if (slot_type[i] != STACK_SPILL) {
-                            STACK_SPILL_PART) {
                                verbose("corrupted spill memory\n");
                                return -EACCES;
                        }
@@ -599,12 +591,12 @@ static int check_stack_read(struct verifier_state *state, int off, int size,
                if (value_regno >= 0)
                        /* restore register state from stack */
-                        state->regs[value_regno] = slot->reg_st;
+                        state->regs[value_regno] =
+                                state->spilled_regs[(MAX_BPF_STACK + off) / BPF_REG_SIZE];
                return 0;
        } else {
                for (i = 0; i < size; i++) {
-                        if (state->stack[MAX_BPF_STACK + off + i].stype !=
+                        if (slot_type[i] != STACK_MISC) {
-                            STACK_MISC) {
                                verbose("invalid read from stack off %d+%d size %d\n",
                                        off, i, size);
                                return -EACCES;
@@ -747,7 +739,7 @@ static int check_stack_boundary(struct verifier_env *env,
        }
        for (i = 0; i < access_size; i++) {
-                if (state->stack[MAX_BPF_STACK + off + i].stype != STACK_MISC) {
+                if (state->stack_slot_type[MAX_BPF_STACK + off + i] != STACK_MISC) {
                        verbose("invalid indirect read from stack off %d+%d size %d\n",
                                off, i, access_size);
                        return -EACCES;
@@ -1180,6 +1172,70 @@ static int check_ld_imm(struct verifier_env *env, struct bpf_insn *insn)
        return 0;
 }
+/* verify safety of LD_ABS|LD_IND instructions:
+ * - they can only appear in the programs where ctx == skb
+ * - since they are wrappers of function calls, they scratch R1-R5 registers,
+ *   preserve R6-R9, and store return value into R0
+ *
+ * Implicit input:
+ *   ctx == skb == R6 == CTX
+ *
+ * Explicit input:
+ *   SRC == any register
+ *   IMM == 32-bit immediate
+ *
+ * Output:
+ *   R0 - 8/16/32-bit skb data converted to cpu endianness
+ */
+static int check_ld_abs(struct verifier_env *env, struct bpf_insn *insn)
+{
+        struct reg_state *regs = env->cur_state.regs;
+        u8 mode = BPF_MODE(insn->code);
+        struct reg_state *reg;
+        int i, err;
+        if (env->prog->aux->prog_type != BPF_PROG_TYPE_SOCKET_FILTER) {
+                verbose("BPF_LD_ABS|IND instructions are only allowed in socket filters\n");
+                return -EINVAL;
+        }
+        if (insn->dst_reg != BPF_REG_0 || insn->off != 0 ||
+            (mode == BPF_ABS && insn->src_reg != BPF_REG_0)) {
+                verbose("BPF_LD_ABS uses reserved fields\n");
+                return -EINVAL;
+        }
+        /* check whether implicit source operand (register R6) is readable */
+        err = check_reg_arg(regs, BPF_REG_6, SRC_OP);
+        if (err)
+                return err;
+        if (regs[BPF_REG_6].type != PTR_TO_CTX) {
+                verbose("at the time of BPF_LD_ABS|IND R6 != pointer to skb\n");
+                return -EINVAL;
+        }
+        if (mode == BPF_IND) {
+                /* check explicit source operand */
+                err = check_reg_arg(regs, insn->src_reg, SRC_OP);
+                if (err)
+                        return err;
+        }
+        /* reset caller saved regs to unreadable */
+        for (i = 0; i < CALLER_SAVED_REGS; i++) {
+                reg = regs + caller_saved[i];
+                reg->type = NOT_INIT;
+                reg->imm = 0;
+        }
+        /* mark destination R0 register as readable, since it contains
+         * the value fetched from the packet
+         */
+        regs[BPF_REG_0].type = UNKNOWN_VALUE;
+        return 0;
+}
 /* non-recursive DFS pseudo code
 * 1  procedure DFS-iterative(G,v):
 * 2      label v as discovered
@@ -1417,12 +1473,33 @@ static bool states_equal(struct verifier_state *old, struct verifier_state *cur)
        }
        for (i = 0; i < MAX_BPF_STACK; i++) {
-                if (memcmp(&old->stack[i], &cur->stack[i],
+                if (old->stack_slot_type[i] == STACK_INVALID)
-                           sizeof(old->stack[0])) != 0) {
+                        continue;
-                        if (old->stack[i].stype == STACK_INVALID)
+                if (old->stack_slot_type[i] != cur->stack_slot_type[i])
-                                continue;
+                        /* Ex: old explored (safe) state has STACK_SPILL in
+                         * this stack slot, but current has has STACK_MISC ->
+                         * this verifier states are not equivalent,
+                         * return false to continue verification of this path
+                         */
                        return false;
-                }
+                if (i % BPF_REG_SIZE)
+                        continue;
+                if (memcmp(&old->spilled_regs[i / BPF_REG_SIZE],
+                           &cur->spilled_regs[i / BPF_REG_SIZE],
+                           sizeof(old->spilled_regs[0])))
+                        /* when explored and current stack slot types are
+                         * the same, check that stored pointers types
+                         * are the same as well.
+                         * Ex: explored safe path could have stored
+                         * (struct reg_state) {.type = PTR_TO_STACK, .imm = -8}
+                         * but current path has stored:
+                         * (struct reg_state) {.type = PTR_TO_STACK, .imm = -16}
+                         * such verifier states are not equivalent.
+                         * return false to continue verification of this path
+                         */
+                        return false;
+                else
+                        continue;
        }
        return true;
 }
@@ -1664,8 +1741,10 @@ process_bpf_exit:
                        u8 mode = BPF_MODE(insn->code);
                        if (mode == BPF_ABS || mode == BPF_IND) {
-                                verbose("LD_ABS is not supported yet\n");
+                                err = check_ld_abs(env, insn);
-                                return -EINVAL;
+                                if (err)
+                                        return err;
                        } else if (mode == BPF_IMM) {
                                err = check_ld_imm(env, insn);
                                if (err)
author	Thomas Gleixner <tglx@linutronix.de>	2015-01-12 04:51:13 -0500
committer	Thomas Gleixner <tglx@linutronix.de>	2015-01-12 04:51:13 -0500
commit	2f5eaf66e580f64032b365a00157b6b58c266b37 (patch)
tree	7852017c864f0eb3833782e2a017952bd8531458 /kernel/bpf
parent	c291ee622165cb2c8d4e7af63fffd499354a23be (diff)
parent	91d1179212161f220938198b742c328ad38fd0a3 (diff)