10 files changed, 855 insertions, 125 deletions
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index 1276474ac3cd..e1ce4f4fd7fd 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -1,7 +1,7 @@
 obj-y := core.o
 obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o
-obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o
+obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o
 ifeq ($(CONFIG_PERF_EVENTS),y)
 obj-$(CONFIG_BPF_SYSCALL) += stackmap.o
 endif
diff --git a/kernel/bpf/bpf_lru_list.c b/kernel/bpf/bpf_lru_list.c
index 89b7ef41c86b..f62d1d56f41d 100644
--- a/kernel/bpf/bpf_lru_list.c
+++ b/kernel/bpf/bpf_lru_list.c
@@ -213,11 +213,10 @@ __bpf_lru_list_shrink_inactive(struct bpf_lru *lru,
                               enum bpf_lru_list_type tgt_free_type)
 {
        struct list_head *inactive = &l->lists[BPF_LRU_LIST_T_INACTIVE];
-        struct bpf_lru_node *node, *tmp_node, *first_node;
+        struct bpf_lru_node *node, *tmp_node;
        unsigned int nshrinked = 0;
        unsigned int i = 0;
-        first_node = list_first_entry(inactive, struct bpf_lru_node, list);
        list_for_each_entry_safe_reverse(node, tmp_node, inactive, list) {
                if (bpf_lru_node_is_ref(node)) {
                        __bpf_lru_node_move(l, node, BPF_LRU_LIST_T_ACTIVE);
@@ -361,7 +360,8 @@ static void __local_list_add_pending(struct bpf_lru *lru,
        list_add(&node->list, local_pending_list(loc_l));
 }
-struct bpf_lru_node *__local_list_pop_free(struct bpf_lru_locallist *loc_l)
+static struct bpf_lru_node *
+__local_list_pop_free(struct bpf_lru_locallist *loc_l)
 {
        struct bpf_lru_node *node;
@@ -374,8 +374,8 @@ struct bpf_lru_node *__local_list_pop_free(struct bpf_lru_locallist *loc_l)
        return node;
 }
-struct bpf_lru_node *__local_list_pop_pending(struct bpf_lru *lru,
+static struct bpf_lru_node *
-                                              struct bpf_lru_locallist *loc_l)
+__local_list_pop_pending(struct bpf_lru *lru, struct bpf_lru_locallist *loc_l)
 {
        struct bpf_lru_node *node;
        bool force = false;
@@ -558,8 +558,9 @@ void bpf_lru_push_free(struct bpf_lru *lru, struct bpf_lru_node *node)
                bpf_common_lru_push_free(lru, node);
 }
-void bpf_common_lru_populate(struct bpf_lru *lru, void *buf, u32 node_offset,
+static void bpf_common_lru_populate(struct bpf_lru *lru, void *buf,
-                             u32 elem_size, u32 nr_elems)
+                                    u32 node_offset, u32 elem_size,
+                                    u32 nr_elems)
 {
        struct bpf_lru_list *l = &lru->common_lru.lru_list;
        u32 i;
@@ -575,8 +576,9 @@ void bpf_common_lru_populate(struct bpf_lru *lru, void *buf, u32 node_offset,
        }
 }
-void bpf_percpu_lru_populate(struct bpf_lru *lru, void *buf, u32 node_offset,
+static void bpf_percpu_lru_populate(struct bpf_lru *lru, void *buf,
-                             u32 elem_size, u32 nr_elems)
+                                    u32 node_offset, u32 elem_size,
+                                    u32 nr_elems)
 {
        u32 i, pcpu_entries;
        int cpu;
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 503d4211988a..fddd76b1b627 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1173,3 +1173,12 @@ int __weak skb_copy_bits(const struct sk_buff *skb, int offset, void *to,
 {
        return -EFAULT;
 }
+/* All definitions of tracepoints related to BPF. */
+#define CREATE_TRACE_POINTS
+#include <linux/bpf_trace.h>
+EXPORT_TRACEPOINT_SYMBOL_GPL(xdp_exception);
+EXPORT_TRACEPOINT_SYMBOL_GPL(bpf_prog_get_type);
+EXPORT_TRACEPOINT_SYMBOL_GPL(bpf_prog_put_rcu);
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 045cbe673356..3d24e238221e 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -176,6 +176,6 @@ const struct bpf_func_proto bpf_get_current_comm_proto = {
        .func           = bpf_get_current_comm,
        .gpl_only       = false,
        .ret_type       = RET_INTEGER,
-        .arg1_type      = ARG_PTR_TO_RAW_STACK,
+        .arg1_type      = ARG_PTR_TO_UNINIT_MEM,
-        .arg2_type      = ARG_CONST_STACK_SIZE,
+        .arg2_type      = ARG_CONST_SIZE,
 };
diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
index 0b030c9126d3..fddcae801724 100644
--- a/kernel/bpf/inode.c
+++ b/kernel/bpf/inode.c
@@ -21,6 +21,7 @@
 #include <linux/parser.h>
 #include <linux/filter.h>
 #include <linux/bpf.h>
+#include <linux/bpf_trace.h>
 enum bpf_type {
        BPF_TYPE_UNSPEC = 0,
@@ -281,6 +282,13 @@ int bpf_obj_pin_user(u32 ufd, const char __user *pathname)
        ret = bpf_obj_do_pin(pname, raw, type);
        if (ret != 0)
                bpf_any_put(raw, type);
+        if ((trace_bpf_obj_pin_prog_enabled() ||
+             trace_bpf_obj_pin_map_enabled()) && !ret) {
+                if (type == BPF_TYPE_PROG)
+                        trace_bpf_obj_pin_prog(raw, ufd, pname);
+                if (type == BPF_TYPE_MAP)
+                        trace_bpf_obj_pin_map(raw, ufd, pname);
+        }
 out:
        putname(pname);
        return ret;
@@ -342,8 +350,15 @@ int bpf_obj_get_user(const char __user *pathname)
        else
                goto out;
-        if (ret < 0)
+        if (ret < 0) {
                bpf_any_put(raw, type);
+        } else if (trace_bpf_obj_get_prog_enabled() ||
+                   trace_bpf_obj_get_map_enabled()) {
+                if (type == BPF_TYPE_PROG)
+                        trace_bpf_obj_get_prog(raw, ret, pname);
+                if (type == BPF_TYPE_MAP)
+                        trace_bpf_obj_get_map(raw, ret, pname);
+        }
 out:
        putname(pname);
        return ret;
diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c
new file mode 100644
index 000000000000..e0f6a0bd279b
--- /dev/null
+++ b/kernel/bpf/lpm_trie.c
@@ -0,0 +1,521 @@
+/*
+ * Longest prefix match list implementation
+ *
+ * Copyright (c) 2016,2017 Daniel Mack
+ * Copyright (c) 2016 David Herrmann
+ *
+ * This file is subject to the terms and conditions of version 2 of the GNU
+ * General Public License.  See the file COPYING in the main directory of the
+ * Linux distribution for more details.
+ */
+#include <linux/bpf.h>
+#include <linux/err.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/vmalloc.h>
+#include <net/ipv6.h>
+/* Intermediate node */
+#define LPM_TREE_NODE_FLAG_IM BIT(0)
+struct lpm_trie_node;
+struct lpm_trie_node {
+        struct rcu_head rcu;
+        struct lpm_trie_node __rcu      *child[2];
+        u32                             prefixlen;
+        u32                             flags;
+        u8                              data[0];
+};
+struct lpm_trie {
+        struct bpf_map                  map;
+        struct lpm_trie_node __rcu      *root;
+        size_t                          n_entries;
+        size_t                          max_prefixlen;
+        size_t                          data_size;
+        raw_spinlock_t                  lock;
+};
+/* This trie implements a longest prefix match algorithm that can be used to
+ * match IP addresses to a stored set of ranges.
+ *
+ * Data stored in @data of struct bpf_lpm_key and struct lpm_trie_node is
+ * interpreted as big endian, so data[0] stores the most significant byte.
+ *
+ * Match ranges are internally stored in instances of struct lpm_trie_node
+ * which each contain their prefix length as well as two pointers that may
+ * lead to more nodes containing more specific matches. Each node also stores
+ * a value that is defined by and returned to userspace via the update_elem
+ * and lookup functions.
+ *
+ * For instance, let's start with a trie that was created with a prefix length
+ * of 32, so it can be used for IPv4 addresses, and one single element that
+ * matches 192.168.0.0/16. The data array would hence contain
+ * [0xc0, 0xa8, 0x00, 0x00] in big-endian notation. This documentation will
+ * stick to IP-address notation for readability though.
+ *
+ * As the trie is empty initially, the new node (1) will be places as root
+ * node, denoted as (R) in the example below. As there are no other node, both
+ * child pointers are %NULL.
+ *
+ *              +----------------+
+ *              |       (1)  (R) |
+ *              | 192.168.0.0/16 |
+ *              |    value: 1    |
+ *              |   [0]    [1]   |
+ *              +----------------+
+ *
+ * Next, let's add a new node (2) matching 192.168.0.0/24. As there is already
+ * a node with the same data and a smaller prefix (ie, a less specific one),
+ * node (2) will become a child of (1). In child index depends on the next bit
+ * that is outside of what (1) matches, and that bit is 0, so (2) will be
+ * child[0] of (1):
+ *
+ *              +----------------+
+ *              |       (1)  (R) |
+ *              | 192.168.0.0/16 |
+ *              |    value: 1    |
+ *              |   [0]    [1]   |
+ *              +----------------+
+ *                   |
+ *    +----------------+
+ *    |       (2)      |
+ *    | 192.168.0.0/24 |
+ *    |    value: 2    |
+ *    |   [0]    [1]   |
+ *    +----------------+
+ *
+ * The child[1] slot of (1) could be filled with another node which has bit #17
+ * (the next bit after the ones that (1) matches on) set to 1. For instance,
+ * 192.168.128.0/24:
+ *
+ *              +----------------+
+ *              |       (1)  (R) |
+ *              | 192.168.0.0/16 |
+ *              |    value: 1    |
+ *              |   [0]    [1]   |
+ *              +----------------+
+ *                   |      |
+ *    +----------------+  +------------------+
+ *    |       (2)      |  |        (3)       |
+ *    | 192.168.0.0/24 |  | 192.168.128.0/24 |
+ *    |    value: 2    |  |     value: 3     |
+ *    |   [0]    [1]   |  |    [0]    [1]    |
+ *    +----------------+  +------------------+
+ *
+ * Let's add another node (4) to the game for 192.168.1.0/24. In order to place
+ * it, node (1) is looked at first, and because (4) of the semantics laid out
+ * above (bit #17 is 0), it would normally be attached to (1) as child[0].
+ * However, that slot is already allocated, so a new node is needed in between.
+ * That node does not have a value attached to it and it will never be
+ * returned to users as result of a lookup. It is only there to differentiate
+ * the traversal further. It will get a prefix as wide as necessary to
+ * distinguish its two children:
+ *
+ *                      +----------------+
+ *                      |       (1)  (R) |
+ *                      | 192.168.0.0/16 |
+ *                      |    value: 1    |
+ *                      |   [0]    [1]   |
+ *                      +----------------+
+ *                           |      |
+ *            +----------------+  +------------------+
+ *            |       (4)  (I) |  |        (3)       |
+ *            | 192.168.0.0/23 |  | 192.168.128.0/24 |
+ *            |    value: ---  |  |     value: 3     |
+ *            |   [0]    [1]   |  |    [0]    [1]    |
+ *            +----------------+  +------------------+
+ *                 |      |
+ *  +----------------+  +----------------+
+ *  |       (2)      |  |       (5)      |
+ *  | 192.168.0.0/24 |  | 192.168.1.0/24 |
+ *  |    value: 2    |  |     value: 5   |
+ *  |   [0]    [1]   |  |   [0]    [1]   |
+ *  +----------------+  +----------------+
+ *
+ * 192.168.1.1/32 would be a child of (5) etc.
+ *
+ * An intermediate node will be turned into a 'real' node on demand. In the
+ * example above, (4) would be re-used if 192.168.0.0/23 is added to the trie.
+ *
+ * A fully populated trie would have a height of 32 nodes, as the trie was
+ * created with a prefix length of 32.
+ *
+ * The lookup starts at the root node. If the current node matches and if there
+ * is a child that can be used to become more specific, the trie is traversed
+ * downwards. The last node in the traversal that is a non-intermediate one is
+ * returned.
+ */
+static inline int extract_bit(const u8 *data, size_t index)
+{
+        return !!(data[index / 8] & (1 << (7 - (index % 8))));
+}
+/**
+ * longest_prefix_match() - determine the longest prefix
+ * @trie:       The trie to get internal sizes from
+ * @node:       The node to operate on
+ * @key:        The key to compare to @node
+ *
+ * Determine the longest prefix of @node that matches the bits in @key.
+ */
+static size_t longest_prefix_match(const struct lpm_trie *trie,
+                                   const struct lpm_trie_node *node,
+                                   const struct bpf_lpm_trie_key *key)
+{
+        size_t prefixlen = 0;
+        size_t i;
+        for (i = 0; i < trie->data_size; i++) {
+                size_t b;
+                b = 8 - fls(node->data[i] ^ key->data[i]);
+                prefixlen += b;
+                if (prefixlen >= node->prefixlen || prefixlen >= key->prefixlen)
+                        return min(node->prefixlen, key->prefixlen);
+                if (b < 8)
+                        break;
+        }
+        return prefixlen;
+}
+/* Called from syscall or from eBPF program */
+static void *trie_lookup_elem(struct bpf_map *map, void *_key)
+{
+        struct lpm_trie *trie = container_of(map, struct lpm_trie, map);
+        struct lpm_trie_node *node, *found = NULL;
+        struct bpf_lpm_trie_key *key = _key;
+        /* Start walking the trie from the root node ... */
+        for (node = rcu_dereference(trie->root); node;) {
+                unsigned int next_bit;
+                size_t matchlen;
+                /* Determine the longest prefix of @node that matches @key.
+                 * If it's the maximum possible prefix for this trie, we have
+                 * an exact match and can return it directly.
+                 */
+                matchlen = longest_prefix_match(trie, node, key);
+                if (matchlen == trie->max_prefixlen) {
+                        found = node;
+                        break;
+                }
+                /* If the number of bits that match is smaller than the prefix
+                 * length of @node, bail out and return the node we have seen
+                 * last in the traversal (ie, the parent).
+                 */
+                if (matchlen < node->prefixlen)
+                        break;
+                /* Consider this node as return candidate unless it is an
+                 * artificially added intermediate one.
+                 */
+                if (!(node->flags & LPM_TREE_NODE_FLAG_IM))
+                        found = node;
+                /* If the node match is fully satisfied, let's see if we can
+                 * become more specific. Determine the next bit in the key and
+                 * traverse down.
+                 */
+                next_bit = extract_bit(key->data, node->prefixlen);
+                node = rcu_dereference(node->child[next_bit]);
+        }
+        if (!found)
+                return NULL;
+        return found->data + trie->data_size;
+}
+static struct lpm_trie_node *lpm_trie_node_alloc(const struct lpm_trie *trie,
+                                                 const void *value)
+{
+        struct lpm_trie_node *node;
+        size_t size = sizeof(struct lpm_trie_node) + trie->data_size;
+        if (value)
+                size += trie->map.value_size;
+        node = kmalloc(size, GFP_ATOMIC | __GFP_NOWARN);
+        if (!node)
+                return NULL;
+        node->flags = 0;
+        if (value)
+                memcpy(node->data + trie->data_size, value,
+                       trie->map.value_size);
+        return node;
+}
+/* Called from syscall or from eBPF program */
+static int trie_update_elem(struct bpf_map *map,
+                            void *_key, void *value, u64 flags)
+{
+        struct lpm_trie *trie = container_of(map, struct lpm_trie, map);
+        struct lpm_trie_node *node, *im_node = NULL, *new_node = NULL;
+        struct lpm_trie_node __rcu **slot;
+        struct bpf_lpm_trie_key *key = _key;
+        unsigned long irq_flags;
+        unsigned int next_bit;
+        size_t matchlen = 0;
+        int ret = 0;
+        if (unlikely(flags > BPF_EXIST))
+                return -EINVAL;
+        if (key->prefixlen > trie->max_prefixlen)
+                return -EINVAL;
+        raw_spin_lock_irqsave(&trie->lock, irq_flags);
+        /* Allocate and fill a new node */
+        if (trie->n_entries == trie->map.max_entries) {
+                ret = -ENOSPC;
+                goto out;
+        }
+        new_node = lpm_trie_node_alloc(trie, value);
+        if (!new_node) {
+                ret = -ENOMEM;
+                goto out;
+        }
+        trie->n_entries++;
+        new_node->prefixlen = key->prefixlen;
+        RCU_INIT_POINTER(new_node->child[0], NULL);
+        RCU_INIT_POINTER(new_node->child[1], NULL);
+        memcpy(new_node->data, key->data, trie->data_size);
+        /* Now find a slot to attach the new node. To do that, walk the tree
+         * from the root and match as many bits as possible for each node until
+         * we either find an empty slot or a slot that needs to be replaced by
+         * an intermediate node.
+         */
+        slot = &trie->root;
+        while ((node = rcu_dereference_protected(*slot,
+                                        lockdep_is_held(&trie->lock)))) {
+                matchlen = longest_prefix_match(trie, node, key);
+                if (node->prefixlen != matchlen ||
+                    node->prefixlen == key->prefixlen ||
+                    node->prefixlen == trie->max_prefixlen)
+                        break;
+                next_bit = extract_bit(key->data, node->prefixlen);
+                slot = &node->child[next_bit];
+        }
+        /* If the slot is empty (a free child pointer or an empty root),
+         * simply assign the @new_node to that slot and be done.
+         */
+        if (!node) {
+                rcu_assign_pointer(*slot, new_node);
+                goto out;
+        }
+        /* If the slot we picked already exists, replace it with @new_node
+         * which already has the correct data array set.
+         */
+        if (node->prefixlen == matchlen) {
+                new_node->child[0] = node->child[0];
+                new_node->child[1] = node->child[1];
+                if (!(node->flags & LPM_TREE_NODE_FLAG_IM))
+                        trie->n_entries--;
+                rcu_assign_pointer(*slot, new_node);
+                kfree_rcu(node, rcu);
+                goto out;
+        }
+        /* If the new node matches the prefix completely, it must be inserted
+         * as an ancestor. Simply insert it between @node and *@slot.
+         */
+        if (matchlen == key->prefixlen) {
+                next_bit = extract_bit(node->data, matchlen);
+                rcu_assign_pointer(new_node->child[next_bit], node);
+                rcu_assign_pointer(*slot, new_node);
+                goto out;
+        }
+        im_node = lpm_trie_node_alloc(trie, NULL);
+        if (!im_node) {
+                ret = -ENOMEM;
+                goto out;
+        }
+        im_node->prefixlen = matchlen;
+        im_node->flags |= LPM_TREE_NODE_FLAG_IM;
+        memcpy(im_node->data, node->data, trie->data_size);
+        /* Now determine which child to install in which slot */
+        if (extract_bit(key->data, matchlen)) {
+                rcu_assign_pointer(im_node->child[0], node);
+                rcu_assign_pointer(im_node->child[1], new_node);
+        } else {
+                rcu_assign_pointer(im_node->child[0], new_node);
+                rcu_assign_pointer(im_node->child[1], node);
+        }
+        /* Finally, assign the intermediate node to the determined spot */
+        rcu_assign_pointer(*slot, im_node);
+out:
+        if (ret) {
+                if (new_node)
+                        trie->n_entries--;
+                kfree(new_node);
+                kfree(im_node);
+        }
+        raw_spin_unlock_irqrestore(&trie->lock, irq_flags);
+        return ret;
+}
+static int trie_delete_elem(struct bpf_map *map, void *key)
+{
+        /* TODO */
+        return -ENOSYS;
+}
+#define LPM_DATA_SIZE_MAX       256
+#define LPM_DATA_SIZE_MIN       1
+#define LPM_VAL_SIZE_MAX        (KMALLOC_MAX_SIZE - LPM_DATA_SIZE_MAX - \
+                                 sizeof(struct lpm_trie_node))
+#define LPM_VAL_SIZE_MIN        1
+#define LPM_KEY_SIZE(X)         (sizeof(struct bpf_lpm_trie_key) + (X))
+#define LPM_KEY_SIZE_MAX        LPM_KEY_SIZE(LPM_DATA_SIZE_MAX)
+#define LPM_KEY_SIZE_MIN        LPM_KEY_SIZE(LPM_DATA_SIZE_MIN)
+static struct bpf_map *trie_alloc(union bpf_attr *attr)
+{
+        struct lpm_trie *trie;
+        u64 cost = sizeof(*trie), cost_per_node;
+        int ret;
+        if (!capable(CAP_SYS_ADMIN))
+                return ERR_PTR(-EPERM);
+        /* check sanity of attributes */
+        if (attr->max_entries == 0 ||
+            attr->map_flags != BPF_F_NO_PREALLOC ||
+            attr->key_size < LPM_KEY_SIZE_MIN ||
+            attr->key_size > LPM_KEY_SIZE_MAX ||
+            attr->value_size < LPM_VAL_SIZE_MIN ||
+            attr->value_size > LPM_VAL_SIZE_MAX)
+                return ERR_PTR(-EINVAL);
+        trie = kzalloc(sizeof(*trie), GFP_USER | __GFP_NOWARN);
+        if (!trie)
+                return ERR_PTR(-ENOMEM);
+        /* copy mandatory map attributes */
+        trie->map.map_type = attr->map_type;
+        trie->map.key_size = attr->key_size;
+        trie->map.value_size = attr->value_size;
+        trie->map.max_entries = attr->max_entries;
+        trie->data_size = attr->key_size -
+                          offsetof(struct bpf_lpm_trie_key, data);
+        trie->max_prefixlen = trie->data_size * 8;
+        cost_per_node = sizeof(struct lpm_trie_node) +
+                        attr->value_size + trie->data_size;
+        cost += (u64) attr->max_entries * cost_per_node;
+        if (cost >= U32_MAX - PAGE_SIZE) {
+                ret = -E2BIG;
+                goto out_err;
+        }
+        trie->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
+        ret = bpf_map_precharge_memlock(trie->map.pages);
+        if (ret)
+                goto out_err;
+        raw_spin_lock_init(&trie->lock);
+        return &trie->map;
+out_err:
+        kfree(trie);
+        return ERR_PTR(ret);
+}
+static void trie_free(struct bpf_map *map)
+{
+        struct lpm_trie *trie = container_of(map, struct lpm_trie, map);
+        struct lpm_trie_node __rcu **slot;
+        struct lpm_trie_node *node;
+        raw_spin_lock(&trie->lock);
+        /* Always start at the root and walk down to a node that has no
+         * children. Then free that node, nullify its reference in the parent
+         * and start over.
+         */
+        for (;;) {
+                slot = &trie->root;
+                for (;;) {
+                        node = rcu_dereference_protected(*slot,
+                                        lockdep_is_held(&trie->lock));
+                        if (!node)
+                                goto unlock;
+                        if (rcu_access_pointer(node->child[0])) {
+                                slot = &node->child[0];
+                                continue;
+                        }
+                        if (rcu_access_pointer(node->child[1])) {
+                                slot = &node->child[1];
+                                continue;
+                        }
+                        kfree(node);
+                        RCU_INIT_POINTER(*slot, NULL);
+                        break;
+                }
+        }
+unlock:
+        raw_spin_unlock(&trie->lock);
+}
+static const struct bpf_map_ops trie_ops = {
+        .map_alloc = trie_alloc,
+        .map_free = trie_free,
+        .map_lookup_elem = trie_lookup_elem,
+        .map_update_elem = trie_update_elem,
+        .map_delete_elem = trie_delete_elem,
+};
+static struct bpf_map_type_list trie_type __read_mostly = {
+        .ops = &trie_ops,
+        .type = BPF_MAP_TYPE_LPM_TRIE,
+};
+static int __init register_trie_map(void)
+{
+        bpf_register_map_type(&trie_type);
+        return 0;
+}
+late_initcall(register_trie_map);
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index bbb016adbaeb..f74ca17af64a 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -10,6 +10,7 @@
 * General Public License for more details.
 */
 #include <linux/bpf.h>
+#include <linux/bpf_trace.h>
 #include <linux/syscalls.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
@@ -241,6 +242,7 @@ static int map_create(union bpf_attr *attr)
                /* failed to allocate fd */
                goto free_map;
+        trace_bpf_map_create(map, err);
        return err;
 free_map:
@@ -365,6 +367,7 @@ static int map_lookup_elem(union bpf_attr *attr)
        if (copy_to_user(uvalue, value, value_size) != 0)
                goto free_value;
+        trace_bpf_map_lookup_elem(map, ufd, key, value);
        err = 0;
 free_value:
@@ -447,6 +450,8 @@ static int map_update_elem(union bpf_attr *attr)
        __this_cpu_dec(bpf_prog_active);
        preempt_enable();
+        if (!err)
+                trace_bpf_map_update_elem(map, ufd, key, value);
 free_value:
        kfree(value);
 free_key:
@@ -492,6 +497,8 @@ static int map_delete_elem(union bpf_attr *attr)
        __this_cpu_dec(bpf_prog_active);
        preempt_enable();
+        if (!err)
+                trace_bpf_map_delete_elem(map, ufd, key);
 free_key:
        kfree(key);
 err_put:
@@ -544,6 +551,7 @@ static int map_get_next_key(union bpf_attr *attr)
        if (copy_to_user(unext_key, next_key, map->key_size) != 0)
                goto free_next_key;
+        trace_bpf_map_next_key(map, ufd, key, next_key);
        err = 0;
 free_next_key:
@@ -697,8 +705,10 @@ static void __bpf_prog_put_rcu(struct rcu_head *rcu)
 void bpf_prog_put(struct bpf_prog *prog)
 {
-        if (atomic_dec_and_test(&prog->aux->refcnt))
+        if (atomic_dec_and_test(&prog->aux->refcnt)) {
+                trace_bpf_prog_put_rcu(prog);
                call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu);
+        }
 }
 EXPORT_SYMBOL_GPL(bpf_prog_put);
@@ -807,7 +817,11 @@ struct bpf_prog *bpf_prog_get(u32 ufd)
 struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type)
 {
-        return __bpf_prog_get(ufd, &type);
+        struct bpf_prog *prog = __bpf_prog_get(ufd, &type);
+        if (!IS_ERR(prog))
+                trace_bpf_prog_get_type(prog);
+        return prog;
 }
 EXPORT_SYMBOL_GPL(bpf_prog_get_type);
@@ -889,6 +903,7 @@ static int bpf_prog_load(union bpf_attr *attr)
                /* failed to allocate fd */
                goto free_used_maps;
+        trace_bpf_prog_load(prog, err);
        return err;
 free_used_maps:
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index cdc43b899f28..d2bded2b250c 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -481,6 +481,13 @@ static void reset_reg_range_values(struct bpf_reg_state *regs, u32 regno)
        regs[regno].max_value = BPF_REGISTER_MAX_RANGE;
 }
+static void mark_reg_unknown_value_and_range(struct bpf_reg_state *regs,
+                                             u32 regno)
+{
+        mark_reg_unknown_value(regs, regno);
+        reset_reg_range_values(regs, regno);
+}
 enum reg_arg_type {
        SRC_OP,         /* register is used as source operand */
        DST_OP,         /* register is used as destination operand */
@@ -532,6 +539,7 @@ static bool is_spillable_regtype(enum bpf_reg_type type)
        switch (type) {
        case PTR_TO_MAP_VALUE:
        case PTR_TO_MAP_VALUE_OR_NULL:
+        case PTR_TO_MAP_VALUE_ADJ:
        case PTR_TO_STACK:
        case PTR_TO_CTX:
        case PTR_TO_PACKET:
@@ -616,7 +624,8 @@ static int check_stack_read(struct bpf_verifier_state *state, int off, int size,
                }
                if (value_regno >= 0)
                        /* have read misc data from the stack */
-                        mark_reg_unknown_value(state->regs, value_regno);
+                        mark_reg_unknown_value_and_range(state->regs,
+                                                         value_regno);
                return 0;
        }
 }
@@ -627,7 +636,7 @@ static int check_map_access(struct bpf_verifier_env *env, u32 regno, int off,
 {
        struct bpf_map *map = env->cur_state.regs[regno].map_ptr;
-        if (off < 0 || off + size > map->value_size) {
+        if (off < 0 || size <= 0 || off + size > map->value_size) {
                verbose("invalid access to map value, value_size=%d off=%d size=%d\n",
                        map->value_size, off, size);
                return -EACCES;
@@ -635,6 +644,51 @@ static int check_map_access(struct bpf_verifier_env *env, u32 regno, int off,
        return 0;
 }
+/* check read/write into an adjusted map element */
+static int check_map_access_adj(struct bpf_verifier_env *env, u32 regno,
+                                int off, int size)
+{
+        struct bpf_verifier_state *state = &env->cur_state;
+        struct bpf_reg_state *reg = &state->regs[regno];
+        int err;
+        /* We adjusted the register to this map value, so we
+         * need to change off and size to min_value and max_value
+         * respectively to make sure our theoretical access will be
+         * safe.
+         */
+        if (log_level)
+                print_verifier_state(state);
+        env->varlen_map_value_access = true;
+        /* The minimum value is only important with signed
+         * comparisons where we can't assume the floor of a
+         * value is 0.  If we are using signed variables for our
+         * index'es we need to make sure that whatever we use
+         * will have a set floor within our range.
+         */
+        if (reg->min_value < 0) {
+                verbose("R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
+                        regno);
+                return -EACCES;
+        }
+        err = check_map_access(env, regno, reg->min_value + off, size);
+        if (err) {
+                verbose("R%d min value is outside of the array range\n",
+                        regno);
+                return err;
+        }
+        /* If we haven't set a max value then we need to bail
+         * since we can't be sure we won't do bad things.
+         */
+        if (reg->max_value == BPF_REGISTER_MAX_RANGE) {
+                verbose("R%d unbounded memory access, make sure to bounds check any array access into a map\n",
+                        regno);
+                return -EACCES;
+        }
+        return check_map_access(env, regno, reg->max_value + off, size);
+}
 #define MAX_PACKET_OFF 0xffff
 static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
@@ -647,6 +701,7 @@ static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
                /* dst_input() and dst_output() can't write for now */
                if (t == BPF_WRITE)
                        return false;
+                /* fallthrough */
        case BPF_PROG_TYPE_SCHED_CLS:
        case BPF_PROG_TYPE_SCHED_ACT:
        case BPF_PROG_TYPE_XDP:
@@ -775,47 +830,13 @@ static int check_mem_access(struct bpf_verifier_env *env, u32 regno, int off,
                        return -EACCES;
                }
-                /* If we adjusted the register to this map value at all then we
+                if (reg->type == PTR_TO_MAP_VALUE_ADJ)
-                 * need to change off and size to min_value and max_value
+                        err = check_map_access_adj(env, regno, off, size);
-                 * respectively to make sure our theoretical access will be
+                else
-                 * safe.
+                        err = check_map_access(env, regno, off, size);
-                 */
-                if (reg->type == PTR_TO_MAP_VALUE_ADJ) {
-                        if (log_level)
-                                print_verifier_state(state);
-                        env->varlen_map_value_access = true;
-                        /* The minimum value is only important with signed
-                         * comparisons where we can't assume the floor of a
-                         * value is 0.  If we are using signed variables for our
-                         * index'es we need to make sure that whatever we use
-                         * will have a set floor within our range.
-                         */
-                        if (reg->min_value < 0) {
-                                verbose("R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
-                                        regno);
-                                return -EACCES;
-                        }
-                        err = check_map_access(env, regno, reg->min_value + off,
-                                               size);
-                        if (err) {
-                                verbose("R%d min value is outside of the array range\n",
-                                        regno);
-                                return err;
-                        }
-                        /* If we haven't set a max value then we need to bail
-                         * since we can't be sure we won't do bad things.
-                         */
-                        if (reg->max_value == BPF_REGISTER_MAX_RANGE) {
-                                verbose("R%d unbounded memory access, make sure to bounds check any array access into a map\n",
-                                        regno);
-                                return -EACCES;
-                        }
-                        off += reg->max_value;
-                }
-                err = check_map_access(env, regno, off, size);
                if (!err && t == BPF_READ && value_regno >= 0)
-                        mark_reg_unknown_value(state->regs, value_regno);
+                        mark_reg_unknown_value_and_range(state->regs,
+                                                         value_regno);
        } else if (reg->type == PTR_TO_CTX) {
                enum bpf_reg_type reg_type = UNKNOWN_VALUE;
@@ -827,7 +848,8 @@ static int check_mem_access(struct bpf_verifier_env *env, u32 regno, int off,
                }
                err = check_ctx_access(env, off, size, t, &reg_type);
                if (!err && t == BPF_READ && value_regno >= 0) {
-                        mark_reg_unknown_value(state->regs, value_regno);
+                        mark_reg_unknown_value_and_range(state->regs,
+                                                         value_regno);
                        /* note that reg.[id|off|range] == 0 */
                        state->regs[value_regno].type = reg_type;
                }
@@ -860,7 +882,8 @@ static int check_mem_access(struct bpf_verifier_env *env, u32 regno, int off,
                }
                err = check_packet_access(env, regno, off, size);
                if (!err && t == BPF_READ && value_regno >= 0)
-                        mark_reg_unknown_value(state->regs, value_regno);
+                        mark_reg_unknown_value_and_range(state->regs,
+                                                         value_regno);
        } else {
                verbose("R%d invalid mem access '%s'\n",
                        regno, reg_type_str[reg->type]);
@@ -958,6 +981,25 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
        return 0;
 }
+static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
+                                   int access_size, bool zero_size_allowed,
+                                   struct bpf_call_arg_meta *meta)
+{
+        struct bpf_reg_state *regs = env->cur_state.regs;
+        switch (regs[regno].type) {
+        case PTR_TO_PACKET:
+                return check_packet_access(env, regno, 0, access_size);
+        case PTR_TO_MAP_VALUE:
+                return check_map_access(env, regno, 0, access_size);
+        case PTR_TO_MAP_VALUE_ADJ:
+                return check_map_access_adj(env, regno, 0, access_size);
+        default: /* const_imm|ptr_to_stack or invalid ptr */
+                return check_stack_boundary(env, regno, access_size,
+                                            zero_size_allowed, meta);
+        }
+}
 static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
                          enum bpf_arg_type arg_type,
                          struct bpf_call_arg_meta *meta)
@@ -993,10 +1035,13 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
                expected_type = PTR_TO_STACK;
                if (type != PTR_TO_PACKET && type != expected_type)
                        goto err_type;
-        } else if (arg_type == ARG_CONST_STACK_SIZE ||
+        } else if (arg_type == ARG_CONST_SIZE ||
-                   arg_type == ARG_CONST_STACK_SIZE_OR_ZERO) {
+                   arg_type == ARG_CONST_SIZE_OR_ZERO) {
                expected_type = CONST_IMM;
-                if (type != expected_type)
+                /* One exception. Allow UNKNOWN_VALUE registers when the
+                 * boundaries are known and don't cause unsafe memory accesses
+                 */
+                if (type != UNKNOWN_VALUE && type != expected_type)
                        goto err_type;
        } else if (arg_type == ARG_CONST_MAP_PTR) {
                expected_type = CONST_PTR_TO_MAP;
@@ -1006,8 +1051,8 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
                expected_type = PTR_TO_CTX;
                if (type != expected_type)
                        goto err_type;
-        } else if (arg_type == ARG_PTR_TO_STACK ||
+        } else if (arg_type == ARG_PTR_TO_MEM ||
-                   arg_type == ARG_PTR_TO_RAW_STACK) {
+                   arg_type == ARG_PTR_TO_UNINIT_MEM) {
                expected_type = PTR_TO_STACK;
                /* One exception here. In case function allows for NULL to be
                 * passed in as argument, it's a CONST_IMM type. Final test
@@ -1015,9 +1060,10 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
                 */
                if (type == CONST_IMM && reg->imm == 0)
                        /* final test in check_stack_boundary() */;
-                else if (type != PTR_TO_PACKET && type != expected_type)
+                else if (type != PTR_TO_PACKET && type != PTR_TO_MAP_VALUE &&
+                         type != PTR_TO_MAP_VALUE_ADJ && type != expected_type)
                        goto err_type;
-                meta->raw_mode = arg_type == ARG_PTR_TO_RAW_STACK;
+                meta->raw_mode = arg_type == ARG_PTR_TO_UNINIT_MEM;
        } else {
                verbose("unsupported arg_type %d\n", arg_type);
                return -EFAULT;
@@ -1063,9 +1109,9 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
                        err = check_stack_boundary(env, regno,
                                                   meta->map_ptr->value_size,
                                                   false, NULL);
-        } else if (arg_type == ARG_CONST_STACK_SIZE ||
+        } else if (arg_type == ARG_CONST_SIZE ||
-                   arg_type == ARG_CONST_STACK_SIZE_OR_ZERO) {
+                   arg_type == ARG_CONST_SIZE_OR_ZERO) {
-                bool zero_size_allowed = (arg_type == ARG_CONST_STACK_SIZE_OR_ZERO);
+                bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO);
                /* bpf_xxx(..., buf, len) call will access 'len' bytes
                 * from stack pointer 'buf'. Check it
@@ -1073,14 +1119,50 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
                 */
                if (regno == 0) {
                        /* kernel subsystem misconfigured verifier */
-                        verbose("ARG_CONST_STACK_SIZE cannot be first argument\n");
+                        verbose("ARG_CONST_SIZE cannot be first argument\n");
                        return -EACCES;
                }
-                if (regs[regno - 1].type == PTR_TO_PACKET)
-                        err = check_packet_access(env, regno - 1, 0, reg->imm);
+                /* If the register is UNKNOWN_VALUE, the access check happens
-                else
+                 * using its boundaries. Otherwise, just use its imm
-                        err = check_stack_boundary(env, regno - 1, reg->imm,
+                 */
-                                                   zero_size_allowed, meta);
+                if (type == UNKNOWN_VALUE) {
+                        /* For unprivileged variable accesses, disable raw
+                         * mode so that the program is required to
+                         * initialize all the memory that the helper could
+                         * just partially fill up.
+                         */
+                        meta = NULL;
+                        if (reg->min_value < 0) {
+                                verbose("R%d min value is negative, either use unsigned or 'var &= const'\n",
+                                        regno);
+                                return -EACCES;
+                        }
+                        if (reg->min_value == 0) {
+                                err = check_helper_mem_access(env, regno - 1, 0,
+                                                              zero_size_allowed,
+                                                              meta);
+                                if (err)
+                                        return err;
+                        }
+                        if (reg->max_value == BPF_REGISTER_MAX_RANGE) {
+                                verbose("R%d unbounded memory access, use 'var &= const' or 'if (var < const)'\n",
+                                        regno);
+                                return -EACCES;
+                        }
+                        err = check_helper_mem_access(env, regno - 1,
+                                                      reg->max_value,
+                                                      zero_size_allowed, meta);
+                        if (err)
+                                return err;
+                } else {
+                        /* register is CONST_IMM */
+                        err = check_helper_mem_access(env, regno - 1, reg->imm,
+                                                      zero_size_allowed, meta);
+                }
        }
        return err;
@@ -1154,15 +1236,15 @@ static int check_raw_mode(const struct bpf_func_proto *fn)
 {
        int count = 0;
-        if (fn->arg1_type == ARG_PTR_TO_RAW_STACK)
+        if (fn->arg1_type == ARG_PTR_TO_UNINIT_MEM)
                count++;
-        if (fn->arg2_type == ARG_PTR_TO_RAW_STACK)
+        if (fn->arg2_type == ARG_PTR_TO_UNINIT_MEM)
                count++;
-        if (fn->arg3_type == ARG_PTR_TO_RAW_STACK)
+        if (fn->arg3_type == ARG_PTR_TO_UNINIT_MEM)
                count++;
-        if (fn->arg4_type == ARG_PTR_TO_RAW_STACK)
+        if (fn->arg4_type == ARG_PTR_TO_UNINIT_MEM)
                count++;
-        if (fn->arg5_type == ARG_PTR_TO_RAW_STACK)
+        if (fn->arg5_type == ARG_PTR_TO_UNINIT_MEM)
                count++;
        return count > 1 ? -EINVAL : 0;
@@ -1316,7 +1398,7 @@ static int check_packet_ptr_add(struct bpf_verifier_env *env,
                imm = insn->imm;
 add_imm:
-                if (imm <= 0) {
+                if (imm < 0) {
                        verbose("addition of negative constant to packet pointer is not allowed\n");
                        return -EACCES;
                }
@@ -1485,22 +1567,54 @@ static int evaluate_reg_imm_alu(struct bpf_verifier_env *env,
        struct bpf_reg_state *dst_reg = &regs[insn->dst_reg];
        struct bpf_reg_state *src_reg = &regs[insn->src_reg];
        u8 opcode = BPF_OP(insn->code);
+        u64 dst_imm = dst_reg->imm;
-        /* dst_reg->type == CONST_IMM here, simulate execution of 'add'/'or'
+        /* dst_reg->type == CONST_IMM here. Simulate execution of insns
-         * insn. Don't care about overflow or negative values, just add them
+         * containing ALU ops. Don't care about overflow or negative
+         * values, just add/sub/... them; registers are in u64.
         */
-        if (opcode == BPF_ADD && BPF_SRC(insn->code) == BPF_K)
+        if (opcode == BPF_ADD && BPF_SRC(insn->code) == BPF_K) {
-                dst_reg->imm += insn->imm;
+                dst_imm += insn->imm;
-        else if (opcode == BPF_ADD && BPF_SRC(insn->code) == BPF_X &&
+        } else if (opcode == BPF_ADD && BPF_SRC(insn->code) == BPF_X &&
-                 src_reg->type == CONST_IMM)
+                   src_reg->type == CONST_IMM) {
-                dst_reg->imm += src_reg->imm;
+                dst_imm += src_reg->imm;
-        else if (opcode == BPF_OR && BPF_SRC(insn->code) == BPF_K)
+        } else if (opcode == BPF_SUB && BPF_SRC(insn->code) == BPF_K) {
-                dst_reg->imm |= insn->imm;
+                dst_imm -= insn->imm;
-        else if (opcode == BPF_OR && BPF_SRC(insn->code) == BPF_X &&
+        } else if (opcode == BPF_SUB && BPF_SRC(insn->code) == BPF_X &&
-                 src_reg->type == CONST_IMM)
+                   src_reg->type == CONST_IMM) {
-                dst_reg->imm |= src_reg->imm;
+                dst_imm -= src_reg->imm;
-        else
+        } else if (opcode == BPF_MUL && BPF_SRC(insn->code) == BPF_K) {
+                dst_imm *= insn->imm;
+        } else if (opcode == BPF_MUL && BPF_SRC(insn->code) == BPF_X &&
+                   src_reg->type == CONST_IMM) {
+                dst_imm *= src_reg->imm;
+        } else if (opcode == BPF_OR && BPF_SRC(insn->code) == BPF_K) {
+                dst_imm |= insn->imm;
+        } else if (opcode == BPF_OR && BPF_SRC(insn->code) == BPF_X &&
+                   src_reg->type == CONST_IMM) {
+                dst_imm |= src_reg->imm;
+        } else if (opcode == BPF_AND && BPF_SRC(insn->code) == BPF_K) {
+                dst_imm &= insn->imm;
+        } else if (opcode == BPF_AND && BPF_SRC(insn->code) == BPF_X &&
+                   src_reg->type == CONST_IMM) {
+                dst_imm &= src_reg->imm;
+        } else if (opcode == BPF_RSH && BPF_SRC(insn->code) == BPF_K) {
+                dst_imm >>= insn->imm;
+        } else if (opcode == BPF_RSH && BPF_SRC(insn->code) == BPF_X &&
+                   src_reg->type == CONST_IMM) {
+                dst_imm >>= src_reg->imm;
+        } else if (opcode == BPF_LSH && BPF_SRC(insn->code) == BPF_K) {
+                dst_imm <<= insn->imm;
+        } else if (opcode == BPF_LSH && BPF_SRC(insn->code) == BPF_X &&
+                   src_reg->type == CONST_IMM) {
+                dst_imm <<= src_reg->imm;
+        } else {
                mark_reg_unknown_value(regs, insn->dst_reg);
+                goto out;
+        }
+        dst_reg->imm = dst_imm;
+out:
        return 0;
 }
@@ -1894,6 +2008,7 @@ static void reg_set_min_max(struct bpf_reg_state *true_reg,
        case BPF_JGT:
                /* Unsigned comparison, the minimum value is 0. */
                false_reg->min_value = 0;
+                /* fallthrough */
        case BPF_JSGT:
                /* If this is false then we know the maximum val is val,
                 * otherwise we know the min val is val+1.
@@ -1904,6 +2019,7 @@ static void reg_set_min_max(struct bpf_reg_state *true_reg,
        case BPF_JGE:
                /* Unsigned comparison, the minimum value is 0. */
                false_reg->min_value = 0;
+                /* fallthrough */
        case BPF_JSGE:
                /* If this is false then we know the maximum value is val - 1,
                 * otherwise we know the mimimum value is val.
@@ -1942,6 +2058,7 @@ static void reg_set_min_max_inv(struct bpf_reg_state *true_reg,
        case BPF_JGT:
                /* Unsigned comparison, the minimum value is 0. */
                true_reg->min_value = 0;
+                /* fallthrough */
        case BPF_JSGT:
                /*
                 * If this is false, then the val is <= the register, if it is
@@ -1953,6 +2070,7 @@ static void reg_set_min_max_inv(struct bpf_reg_state *true_reg,
        case BPF_JGE:
                /* Unsigned comparison, the minimum value is 0. */
                true_reg->min_value = 0;
+                /* fallthrough */
        case BPF_JSGE:
                /* If this is false then constant < register, if it is true then
                 * the register < constant.
@@ -2144,14 +2262,8 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
                return err;
        if (insn->src_reg == 0) {
-                /* generic move 64-bit immediate into a register,
-                 * only analyzer needs to collect the ld_imm value.
-                 */
                u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm;
-                if (!env->analyzer_ops)
-                        return 0;
                regs[insn->dst_reg].type = CONST_IMM;
                regs[insn->dst_reg].imm = imm;
                return 0;
@@ -2729,7 +2841,6 @@ static int do_check(struct bpf_verifier_env *env)
                        if (err)
                                return err;
-                        reset_reg_range_values(regs, insn->dst_reg);
                        if (BPF_SIZE(insn->code) != BPF_W &&
                            BPF_SIZE(insn->code) != BPF_DW) {
                                insn_idx++;
@@ -3085,10 +3196,14 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
        insn = env->prog->insnsi + delta;
        for (i = 0; i < insn_cnt; i++, insn++) {
-                if (insn->code == (BPF_LDX | BPF_MEM | BPF_W) ||
+                if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) ||
+                    insn->code == (BPF_LDX | BPF_MEM | BPF_H) ||
+                    insn->code == (BPF_LDX | BPF_MEM | BPF_W) ||
                    insn->code == (BPF_LDX | BPF_MEM | BPF_DW))
                        type = BPF_READ;
-                else if (insn->code == (BPF_STX | BPF_MEM | BPF_W) ||
+                else if (insn->code == (BPF_STX | BPF_MEM | BPF_B) ||
+                         insn->code == (BPF_STX | BPF_MEM | BPF_H) ||
+                         insn->code == (BPF_STX | BPF_MEM | BPF_W) ||
                         insn->code == (BPF_STX | BPF_MEM | BPF_DW))
                        type = BPF_WRITE;
                else
@@ -3097,8 +3212,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
                if (env->insn_aux_data[i].ptr_type != PTR_TO_CTX)
                        continue;
-                cnt = ops->convert_ctx_access(type, insn->dst_reg, insn->src_reg,
+                cnt = ops->convert_ctx_access(type, insn, insn_buf, env->prog);
-                                              insn->off, insn_buf, env->prog);
                if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
                        verbose("bpf verifier is misconfigured\n");
                        return -EINVAL;
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index fa77311dadb2..424daa4586d1 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -76,8 +76,8 @@ static const struct bpf_func_proto bpf_probe_read_proto = {
        .func           = bpf_probe_read,
        .gpl_only       = true,
        .ret_type       = RET_INTEGER,
-        .arg1_type      = ARG_PTR_TO_RAW_STACK,
+        .arg1_type      = ARG_PTR_TO_UNINIT_MEM,
-        .arg2_type      = ARG_CONST_STACK_SIZE,
+        .arg2_type      = ARG_CONST_SIZE,
        .arg3_type      = ARG_ANYTHING,
 };
@@ -109,8 +109,8 @@ static const struct bpf_func_proto bpf_probe_write_user_proto = {
        .gpl_only       = true,
        .ret_type       = RET_INTEGER,
        .arg1_type      = ARG_ANYTHING,
-        .arg2_type      = ARG_PTR_TO_STACK,
+        .arg2_type      = ARG_PTR_TO_MEM,
-        .arg3_type      = ARG_CONST_STACK_SIZE,
+        .arg3_type      = ARG_CONST_SIZE,
 };
 static const struct bpf_func_proto *bpf_get_probe_write_proto(void)
@@ -213,8 +213,8 @@ static const struct bpf_func_proto bpf_trace_printk_proto = {
        .func           = bpf_trace_printk,
        .gpl_only       = true,
        .ret_type       = RET_INTEGER,
-        .arg1_type      = ARG_PTR_TO_STACK,
+        .arg1_type      = ARG_PTR_TO_MEM,
-        .arg2_type      = ARG_CONST_STACK_SIZE,
+        .arg2_type      = ARG_CONST_SIZE,
 };
 const struct bpf_func_proto *bpf_get_trace_printk_proto(void)
@@ -329,8 +329,8 @@ static const struct bpf_func_proto bpf_perf_event_output_proto = {
        .arg1_type      = ARG_PTR_TO_CTX,
        .arg2_type      = ARG_CONST_MAP_PTR,
        .arg3_type      = ARG_ANYTHING,
-        .arg4_type      = ARG_PTR_TO_STACK,
+        .arg4_type      = ARG_PTR_TO_MEM,
-        .arg5_type      = ARG_CONST_STACK_SIZE,
+        .arg5_type      = ARG_CONST_SIZE,
 };
 static DEFINE_PER_CPU(struct pt_regs, bpf_pt_regs);
@@ -395,6 +395,36 @@ static const struct bpf_func_proto bpf_current_task_under_cgroup_proto = {
        .arg2_type      = ARG_ANYTHING,
 };
+BPF_CALL_3(bpf_probe_read_str, void *, dst, u32, size,
+           const void *, unsafe_ptr)
+{
+        int ret;
+        /*
+         * The strncpy_from_unsafe() call will likely not fill the entire
+         * buffer, but that's okay in this circumstance as we're probing
+         * arbitrary memory anyway similar to bpf_probe_read() and might
+         * as well probe the stack. Thus, memory is explicitly cleared
+         * only in error case, so that improper users ignoring return
+         * code altogether don't copy garbage; otherwise length of string
+         * is returned that can be used for bpf_perf_event_output() et al.
+         */
+        ret = strncpy_from_unsafe(dst, unsafe_ptr, size);
+        if (unlikely(ret < 0))
+                memset(dst, 0, size);
+        return ret;
+}
+static const struct bpf_func_proto bpf_probe_read_str_proto = {
+        .func           = bpf_probe_read_str,
+        .gpl_only       = true,
+        .ret_type       = RET_INTEGER,
+        .arg1_type      = ARG_PTR_TO_UNINIT_MEM,
+        .arg2_type      = ARG_CONST_SIZE,
+        .arg3_type      = ARG_ANYTHING,
+};
 static const struct bpf_func_proto *tracing_func_proto(enum bpf_func_id func_id)
 {
        switch (func_id) {
@@ -432,6 +462,8 @@ static const struct bpf_func_proto *tracing_func_proto(enum bpf_func_id func_id)
                return &bpf_current_task_under_cgroup_proto;
        case BPF_FUNC_get_prandom_u32:
                return &bpf_get_prandom_u32_proto;
+        case BPF_FUNC_probe_read_str:
+                return &bpf_probe_read_str_proto;
        default:
                return NULL;
        }
@@ -459,6 +491,13 @@ static bool kprobe_prog_is_valid_access(int off, int size, enum bpf_access_type
                return false;
        if (off % size != 0)
                return false;
+        /*
+         * Assertion for 32 bit to make sure last 8 byte access
+         * (BPF_DW) to the last 4 byte member is disallowed.
+         */
+        if (off + size > sizeof(struct pt_regs))
+                return false;
        return true;
 }
@@ -492,8 +531,8 @@ static const struct bpf_func_proto bpf_perf_event_output_proto_tp = {
        .arg1_type      = ARG_PTR_TO_CTX,
        .arg2_type      = ARG_CONST_MAP_PTR,
        .arg3_type      = ARG_ANYTHING,
-        .arg4_type      = ARG_PTR_TO_STACK,
+        .arg4_type      = ARG_PTR_TO_MEM,
-        .arg5_type      = ARG_CONST_STACK_SIZE,
+        .arg5_type      = ARG_CONST_SIZE,
 };
 BPF_CALL_3(bpf_get_stackid_tp, void *, tp_buff, struct bpf_map *, map,
@@ -540,6 +579,8 @@ static bool tp_prog_is_valid_access(int off, int size, enum bpf_access_type type
                return false;
        if (off % size != 0)
                return false;
+        BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(__u64));
        return true;
 }
@@ -572,28 +613,29 @@ static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type
        return true;
 }
-static u32 pe_prog_convert_ctx_access(enum bpf_access_type type, int dst_reg,
+static u32 pe_prog_convert_ctx_access(enum bpf_access_type type,
-                                      int src_reg, int ctx_off,
+                                      const struct bpf_insn *si,
                                      struct bpf_insn *insn_buf,
                                      struct bpf_prog *prog)
 {
        struct bpf_insn *insn = insn_buf;
-        switch (ctx_off) {
+        switch (si->off) {
        case offsetof(struct bpf_perf_event_data, sample_period):
                BUILD_BUG_ON(FIELD_SIZEOF(struct perf_sample_data, period) != sizeof(u64));
                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern,
-                                                       data), dst_reg, src_reg,
+                                                       data), si->dst_reg, si->src_reg,
                                      offsetof(struct bpf_perf_event_data_kern, data));
-                *insn++ = BPF_LDX_MEM(BPF_DW, dst_reg, dst_reg,
+                *insn++ = BPF_LDX_MEM(BPF_DW, si->dst_reg, si->dst_reg,
                                      offsetof(struct perf_sample_data, period));
                break;
        default:
                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern,
-                                                       regs), dst_reg, src_reg,
+                                                       regs), si->dst_reg, si->src_reg,
                                      offsetof(struct bpf_perf_event_data_kern, regs));
-                *insn++ = BPF_LDX_MEM(BPF_SIZEOF(long), dst_reg, dst_reg, ctx_off);
+                *insn++ = BPF_LDX_MEM(BPF_SIZEOF(long), si->dst_reg, si->dst_reg,
+                                      si->off);
                break;
        }
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 5d33a7352919..aea6a1218c7d 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -162,15 +162,27 @@ trace_print_bitmask_seq(struct trace_seq *p, void *bitmask_ptr,
 }
 EXPORT_SYMBOL_GPL(trace_print_bitmask_seq);
+/**
+ * trace_print_hex_seq - print buffer as hex sequence
+ * @p: trace seq struct to write to
+ * @buf: The buffer to print
+ * @buf_len: Length of @buf in bytes
+ * @concatenate: Print @buf as single hex string or with spacing
+ *
+ * Prints the passed buffer as a hex sequence either as a whole,
+ * single hex string if @concatenate is true or with spacing after
+ * each byte in case @concatenate is false.
+ */
 const char *
-trace_print_hex_seq(struct trace_seq *p, const unsigned char *buf, int buf_len)
+trace_print_hex_seq(struct trace_seq *p, const unsigned char *buf, int buf_len,
+                    bool concatenate)
 {
        int i;
        const char *ret = trace_seq_buffer_ptr(p);
        for (i = 0; i < buf_len; i++)
-                trace_seq_printf(p, "%s%2.2x", i == 0 ? "" : " ", buf[i]);
+                trace_seq_printf(p, "%s%2.2x", concatenate || i == 0 ? "" : " ",
+                                 buf[i]);
        trace_seq_putc(p, 0);
        return ret;