Merge branch 'dtc-update' into dt/next

author: Rob Herring <robh@kernel.org> 2018-03-08 10:21:07 -0500
committer: Rob Herring <robh@kernel.org> 2018-03-08 10:21:07 -0500
commit: c679fa6e3aaa5c58fc514b5b88cfa82774b8d390 (patch)
tree: 0c10b339368bd1795152a66a4e245e6f654fb3ec /kernel
parent: bdb7013df910681f84eff27b07791d4c160cb76f (diff)
parent: 4fd98e374fd377ae0458a9dc44aa779cf9631ddd (diff)
19 files changed, 243 insertions, 133 deletions
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index b1f66480135b..14750e7c5ee4 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -26,8 +26,10 @@ static void bpf_array_free_percpu(struct bpf_array *array)
 {
        int i;
-        for (i = 0; i < array->map.max_entries; i++)
+        for (i = 0; i < array->map.max_entries; i++) {
                free_percpu(array->pptrs[i]);
+                cond_resched();
+        }
 }
 static int bpf_array_alloc_percpu(struct bpf_array *array)
@@ -43,6 +45,7 @@ static int bpf_array_alloc_percpu(struct bpf_array *array)
                        return -ENOMEM;
                }
                array->pptrs[i] = ptr;
+                cond_resched();
        }
        return 0;
@@ -73,11 +76,11 @@ static int array_map_alloc_check(union bpf_attr *attr)
 static struct bpf_map *array_map_alloc(union bpf_attr *attr)
 {
        bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
-        int numa_node = bpf_map_attr_numa_node(attr);
+        int ret, numa_node = bpf_map_attr_numa_node(attr);
        u32 elem_size, index_mask, max_entries;
        bool unpriv = !capable(CAP_SYS_ADMIN);
+        u64 cost, array_size, mask64;
        struct bpf_array *array;
-        u64 array_size, mask64;
        elem_size = round_up(attr->value_size, 8);
@@ -109,8 +112,19 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
                array_size += (u64) max_entries * elem_size;
        /* make sure there is no u32 overflow later in round_up() */
-        if (array_size >= U32_MAX - PAGE_SIZE)
+        cost = array_size;
+        if (cost >= U32_MAX - PAGE_SIZE)
                return ERR_PTR(-ENOMEM);
+        if (percpu) {
+                cost += (u64)attr->max_entries * elem_size * num_possible_cpus();
+                if (cost >= U32_MAX - PAGE_SIZE)
+                        return ERR_PTR(-ENOMEM);
+        }
+        cost = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
+        ret = bpf_map_precharge_memlock(cost);
+        if (ret < 0)
+                return ERR_PTR(ret);
        /* allocate all map elements and zero-initialize them */
        array = bpf_map_area_alloc(array_size, numa_node);
@@ -121,20 +135,13 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
        /* copy mandatory map attributes */
        bpf_map_init_from_attr(&array->map, attr);
+        array->map.pages = cost;
        array->elem_size = elem_size;
-        if (!percpu)
+        if (percpu && bpf_array_alloc_percpu(array)) {
-                goto out;
-        array_size += (u64) attr->max_entries * elem_size * num_possible_cpus();
-        if (array_size >= U32_MAX - PAGE_SIZE ||
-            bpf_array_alloc_percpu(array)) {
                bpf_map_area_free(array);
                return ERR_PTR(-ENOMEM);
        }
-out:
-        array->map.pages = round_up(array_size, PAGE_SIZE) >> PAGE_SHIFT;
        return &array->map;
 }
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 29ca9208dcfa..d315b393abdd 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1590,7 +1590,7 @@ int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs,
         * so always copy 'cnt' prog_ids to the user.
         * In a rare race the user will see zero prog_ids
         */
-        ids = kcalloc(cnt, sizeof(u32), GFP_USER);
+        ids = kcalloc(cnt, sizeof(u32), GFP_USER | __GFP_NOWARN);
        if (!ids)
                return -ENOMEM;
        rcu_read_lock();
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index fbfdada6caee..a4bb0b34375a 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -334,7 +334,7 @@ static int cpu_map_kthread_run(void *data)
 static struct bpf_cpu_map_entry *__cpu_map_entry_alloc(u32 qsize, u32 cpu,
                                                       int map_id)
 {
-        gfp_t gfp = GFP_ATOMIC|__GFP_NOWARN;
+        gfp_t gfp = GFP_KERNEL | __GFP_NOWARN;
        struct bpf_cpu_map_entry *rcpu;
        int numa, err;
diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c
index 7b469d10d0e9..b4b5b81e7251 100644
--- a/kernel/bpf/lpm_trie.c
+++ b/kernel/bpf/lpm_trie.c
@@ -555,7 +555,10 @@ static void trie_free(struct bpf_map *map)
        struct lpm_trie_node __rcu **slot;
        struct lpm_trie_node *node;
-        raw_spin_lock(&trie->lock);
+        /* Wait for outstanding programs to complete
+         * update/lookup/delete/get_next_key and free the trie.
+         */
+        synchronize_rcu();
        /* Always start at the root and walk down to a node that has no
         * children. Then free that node, nullify its reference in the parent
@@ -566,10 +569,9 @@ static void trie_free(struct bpf_map *map)
                slot = &trie->root;
                for (;;) {
-                        node = rcu_dereference_protected(*slot,
+                        node = rcu_dereference_protected(*slot, 1);
-                                        lockdep_is_held(&trie->lock));
                        if (!node)
-                                goto unlock;
+                                goto out;
                        if (rcu_access_pointer(node->child[0])) {
                                slot = &node->child[0];
@@ -587,8 +589,8 @@ static void trie_free(struct bpf_map *map)
                }
        }
-unlock:
+out:
-        raw_spin_unlock(&trie->lock);
+        kfree(trie);
 }
 static int trie_get_next_key(struct bpf_map *map, void *_key, void *_next_key)
diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
index 48c33417d13c..a927e89dad6e 100644
--- a/kernel/bpf/sockmap.c
+++ b/kernel/bpf/sockmap.c
@@ -521,8 +521,8 @@ static struct smap_psock *smap_init_psock(struct sock *sock,
 static struct bpf_map *sock_map_alloc(union bpf_attr *attr)
 {
        struct bpf_stab *stab;
-        int err = -EINVAL;
        u64 cost;
+        int err;
        if (!capable(CAP_NET_ADMIN))
                return ERR_PTR(-EPERM);
@@ -547,6 +547,7 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr)
        /* make sure page count doesn't overflow */
        cost = (u64) stab->map.max_entries * sizeof(struct sock *);
+        err = -EINVAL;
        if (cost >= U32_MAX - PAGE_SIZE)
                goto free_stab;
diff --git a/kernel/fork.c b/kernel/fork.c
index be8aa5b98666..e5d9d405ae4e 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -592,7 +592,7 @@ static void check_mm(struct mm_struct *mm)
 * is dropped: either by a lazy thread or by
 * mmput. Free the page directory and the mm.
 */
-static void __mmdrop(struct mm_struct *mm)
+void __mmdrop(struct mm_struct *mm)
 {
        BUG_ON(mm == &init_mm);
        mm_free_pgd(mm);
@@ -603,18 +603,7 @@ static void __mmdrop(struct mm_struct *mm)
        put_user_ns(mm->user_ns);
        free_mm(mm);
 }
+EXPORT_SYMBOL_GPL(__mmdrop);
-void mmdrop(struct mm_struct *mm)
-{
-        /*
-         * The implicit full barrier implied by atomic_dec_and_test() is
-         * required by the membarrier system call before returning to
-         * user-space, after storing to rq->curr.
-         */
-        if (unlikely(atomic_dec_and_test(&mm->mm_count)))
-                __mmdrop(mm);
-}
-EXPORT_SYMBOL_GPL(mmdrop);
 static void mmdrop_async_fn(struct work_struct *work)
 {
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index e6a9c36470ee..82b8b18ee1eb 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -1726,25 +1726,14 @@ static int irq_domain_debug_show(struct seq_file *m, void *p)
        irq_domain_debug_show_one(m, d, 0);
        return 0;
 }
+DEFINE_SHOW_ATTRIBUTE(irq_domain_debug);
-static int irq_domain_debug_open(struct inode *inode, struct file *file)
-{
-        return single_open(file, irq_domain_debug_show, inode->i_private);
-}
-static const struct file_operations dfs_domain_ops = {
-        .open           = irq_domain_debug_open,
-        .read           = seq_read,
-        .llseek         = seq_lseek,
-        .release        = single_release,
-};
 static void debugfs_add_domain_dir(struct irq_domain *d)
 {
        if (!d->name || !domain_dir || d->debugfs_file)
                return;
        d->debugfs_file = debugfs_create_file(d->name, 0444, domain_dir, d,
-                                              &dfs_domain_ops);
+                                              &irq_domain_debug_fops);
 }
 static void debugfs_remove_domain_dir(struct irq_domain *d)
@@ -1760,7 +1749,8 @@ void __init irq_domain_debugfs_init(struct dentry *root)
        if (!domain_dir)
                return;
-        debugfs_create_file("default", 0444, domain_dir, NULL, &dfs_domain_ops);
+        debugfs_create_file("default", 0444, domain_dir, NULL,
+                            &irq_domain_debug_fops);
        mutex_lock(&irq_domain_mutex);
        list_for_each_entry(d, &irq_domain_list, link)
                debugfs_add_domain_dir(d);
diff --git a/kernel/irq/matrix.c b/kernel/irq/matrix.c
index 5187dfe809ac..4c5770407031 100644
--- a/kernel/irq/matrix.c
+++ b/kernel/irq/matrix.c
@@ -16,6 +16,7 @@ struct cpumap {
        unsigned int            available;
        unsigned int            allocated;
        unsigned int            managed;
+        bool                    initialized;
        bool                    online;
        unsigned long           alloc_map[IRQ_MATRIX_SIZE];
        unsigned long           managed_map[IRQ_MATRIX_SIZE];
@@ -81,9 +82,11 @@ void irq_matrix_online(struct irq_matrix *m)
        BUG_ON(cm->online);
-        bitmap_zero(cm->alloc_map, m->matrix_bits);
+        if (!cm->initialized) {
-        cm->available = m->alloc_size - (cm->managed + m->systembits_inalloc);
+                cm->available = m->alloc_size;
-        cm->allocated = 0;
+                cm->available -= cm->managed + m->systembits_inalloc;
+                cm->initialized = true;
+        }
        m->global_available += cm->available;
        cm->online = true;
        m->online_maps++;
@@ -370,14 +373,16 @@ void irq_matrix_free(struct irq_matrix *m, unsigned int cpu,
        if (WARN_ON_ONCE(bit < m->alloc_start || bit >= m->alloc_end))
                return;
-        if (cm->online) {
+        clear_bit(bit, cm->alloc_map);
-                clear_bit(bit, cm->alloc_map);
+        cm->allocated--;
-                cm->allocated--;
+        if (cm->online)
                m->total_allocated--;
-                if (!managed) {
-                        cm->available++;
+        if (!managed) {
+                cm->available++;
+                if (cm->online)
                        m->global_available++;
-                }
        }
        trace_irq_matrix_free(bit, cpu, m, cm);
 }
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index da2ccf142358..102160ff5c66 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -978,67 +978,90 @@ static int prepare_kprobe(struct kprobe *p)
 }
 /* Caller must lock kprobe_mutex */
-static void arm_kprobe_ftrace(struct kprobe *p)
+static int arm_kprobe_ftrace(struct kprobe *p)
 {
-        int ret;
+        int ret = 0;
        ret = ftrace_set_filter_ip(&kprobe_ftrace_ops,
                                   (unsigned long)p->addr, 0, 0);
-        WARN(ret < 0, "Failed to arm kprobe-ftrace at %p (%d)\n", p->addr, ret);
+        if (ret) {
-        kprobe_ftrace_enabled++;
+                pr_debug("Failed to arm kprobe-ftrace at %p (%d)\n", p->addr, ret);
-        if (kprobe_ftrace_enabled == 1) {
+                return ret;
+        }
+        if (kprobe_ftrace_enabled == 0) {
                ret = register_ftrace_function(&kprobe_ftrace_ops);
-                WARN(ret < 0, "Failed to init kprobe-ftrace (%d)\n", ret);
+                if (ret) {
+                        pr_debug("Failed to init kprobe-ftrace (%d)\n", ret);
+                        goto err_ftrace;
+                }
        }
+        kprobe_ftrace_enabled++;
+        return ret;
+err_ftrace:
+        /*
+         * Note: Since kprobe_ftrace_ops has IPMODIFY set, and ftrace requires a
+         * non-empty filter_hash for IPMODIFY ops, we're safe from an accidental
+         * empty filter_hash which would undesirably trace all functions.
+         */
+        ftrace_set_filter_ip(&kprobe_ftrace_ops, (unsigned long)p->addr, 1, 0);
+        return ret;
 }
 /* Caller must lock kprobe_mutex */
-static void disarm_kprobe_ftrace(struct kprobe *p)
+static int disarm_kprobe_ftrace(struct kprobe *p)
 {
-        int ret;
+        int ret = 0;
-        kprobe_ftrace_enabled--;
+        if (kprobe_ftrace_enabled == 1) {
-        if (kprobe_ftrace_enabled == 0) {
                ret = unregister_ftrace_function(&kprobe_ftrace_ops);
-                WARN(ret < 0, "Failed to init kprobe-ftrace (%d)\n", ret);
+                if (WARN(ret < 0, "Failed to unregister kprobe-ftrace (%d)\n", ret))
+                        return ret;
        }
+        kprobe_ftrace_enabled--;
        ret = ftrace_set_filter_ip(&kprobe_ftrace_ops,
                           (unsigned long)p->addr, 1, 0);
        WARN(ret < 0, "Failed to disarm kprobe-ftrace at %p (%d)\n", p->addr, ret);
+        return ret;
 }
 #else   /* !CONFIG_KPROBES_ON_FTRACE */
 #define prepare_kprobe(p)       arch_prepare_kprobe(p)
-#define arm_kprobe_ftrace(p)    do {} while (0)
+#define arm_kprobe_ftrace(p)    (-ENODEV)
-#define disarm_kprobe_ftrace(p) do {} while (0)
+#define disarm_kprobe_ftrace(p) (-ENODEV)
 #endif
 /* Arm a kprobe with text_mutex */
-static void arm_kprobe(struct kprobe *kp)
+static int arm_kprobe(struct kprobe *kp)
 {
-        if (unlikely(kprobe_ftrace(kp))) {
+        if (unlikely(kprobe_ftrace(kp)))
-                arm_kprobe_ftrace(kp);
+                return arm_kprobe_ftrace(kp);
-                return;
-        }
        cpus_read_lock();
        mutex_lock(&text_mutex);
        __arm_kprobe(kp);
        mutex_unlock(&text_mutex);
        cpus_read_unlock();
+        return 0;
 }
 /* Disarm a kprobe with text_mutex */
-static void disarm_kprobe(struct kprobe *kp, bool reopt)
+static int disarm_kprobe(struct kprobe *kp, bool reopt)
 {
-        if (unlikely(kprobe_ftrace(kp))) {
+        if (unlikely(kprobe_ftrace(kp)))
-                disarm_kprobe_ftrace(kp);
+                return disarm_kprobe_ftrace(kp);
-                return;
-        }
        cpus_read_lock();
        mutex_lock(&text_mutex);
        __disarm_kprobe(kp, reopt);
        mutex_unlock(&text_mutex);
        cpus_read_unlock();
+        return 0;
 }
 /*
@@ -1362,9 +1385,15 @@ out:
        if (ret == 0 && kprobe_disabled(ap) && !kprobe_disabled(p)) {
                ap->flags &= ~KPROBE_FLAG_DISABLED;
-                if (!kprobes_all_disarmed)
+                if (!kprobes_all_disarmed) {
                        /* Arm the breakpoint again. */
-                        arm_kprobe(ap);
+                        ret = arm_kprobe(ap);
+                        if (ret) {
+                                ap->flags |= KPROBE_FLAG_DISABLED;
+                                list_del_rcu(&p->list);
+                                synchronize_sched();
+                        }
+                }
        }
        return ret;
 }
@@ -1573,8 +1602,14 @@ int register_kprobe(struct kprobe *p)
        hlist_add_head_rcu(&p->hlist,
                       &kprobe_table[hash_ptr(p->addr, KPROBE_HASH_BITS)]);
-        if (!kprobes_all_disarmed && !kprobe_disabled(p))
+        if (!kprobes_all_disarmed && !kprobe_disabled(p)) {
-                arm_kprobe(p);
+                ret = arm_kprobe(p);
+                if (ret) {
+                        hlist_del_rcu(&p->hlist);
+                        synchronize_sched();
+                        goto out;
+                }
+        }
        /* Try to optimize kprobe */
        try_to_optimize_kprobe(p);
@@ -1608,11 +1643,12 @@ static int aggr_kprobe_disabled(struct kprobe *ap)
 static struct kprobe *__disable_kprobe(struct kprobe *p)
 {
        struct kprobe *orig_p;
+        int ret;
        /* Get an original kprobe for return */
        orig_p = __get_valid_kprobe(p);
        if (unlikely(orig_p == NULL))
-                return NULL;
+                return ERR_PTR(-EINVAL);
        if (!kprobe_disabled(p)) {
                /* Disable probe if it is a child probe */
@@ -1626,8 +1662,13 @@ static struct kprobe *__disable_kprobe(struct kprobe *p)
                         * should have already been disarmed, so
                         * skip unneed disarming process.
                         */
-                        if (!kprobes_all_disarmed)
+                        if (!kprobes_all_disarmed) {
-                                disarm_kprobe(orig_p, true);
+                                ret = disarm_kprobe(orig_p, true);
+                                if (ret) {
+                                        p->flags &= ~KPROBE_FLAG_DISABLED;
+                                        return ERR_PTR(ret);
+                                }
+                        }
                        orig_p->flags |= KPROBE_FLAG_DISABLED;
                }
        }
@@ -1644,8 +1685,8 @@ static int __unregister_kprobe_top(struct kprobe *p)
        /* Disable kprobe. This will disarm it if needed. */
        ap = __disable_kprobe(p);
-        if (ap == NULL)
+        if (IS_ERR(ap))
-                return -EINVAL;
+                return PTR_ERR(ap);
        if (ap == p)
                /*
@@ -2078,12 +2119,14 @@ static void kill_kprobe(struct kprobe *p)
 int disable_kprobe(struct kprobe *kp)
 {
        int ret = 0;
+        struct kprobe *p;
        mutex_lock(&kprobe_mutex);
        /* Disable this kprobe */
-        if (__disable_kprobe(kp) == NULL)
+        p = __disable_kprobe(kp);
-                ret = -EINVAL;
+        if (IS_ERR(p))
+                ret = PTR_ERR(p);
        mutex_unlock(&kprobe_mutex);
        return ret;
@@ -2116,7 +2159,9 @@ int enable_kprobe(struct kprobe *kp)
        if (!kprobes_all_disarmed && kprobe_disabled(p)) {
                p->flags &= ~KPROBE_FLAG_DISABLED;
-                arm_kprobe(p);
+                ret = arm_kprobe(p);
+                if (ret)
+                        p->flags |= KPROBE_FLAG_DISABLED;
        }
 out:
        mutex_unlock(&kprobe_mutex);
@@ -2407,11 +2452,12 @@ static const struct file_operations debugfs_kprobe_blacklist_ops = {
        .release        = seq_release,
 };
-static void arm_all_kprobes(void)
+static int arm_all_kprobes(void)
 {
        struct hlist_head *head;
        struct kprobe *p;
-        unsigned int i;
+        unsigned int i, total = 0, errors = 0;
+        int err, ret = 0;
        mutex_lock(&kprobe_mutex);
@@ -2428,46 +2474,74 @@ static void arm_all_kprobes(void)
        /* Arming kprobes doesn't optimize kprobe itself */
        for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
                head = &kprobe_table[i];
-                hlist_for_each_entry_rcu(p, head, hlist)
+                /* Arm all kprobes on a best-effort basis */
-                        if (!kprobe_disabled(p))
+                hlist_for_each_entry_rcu(p, head, hlist) {
-                                arm_kprobe(p);
+                        if (!kprobe_disabled(p)) {
+                                err = arm_kprobe(p);
+                                if (err)  {
+                                        errors++;
+                                        ret = err;
+                                }
+                                total++;
+                        }
+                }
        }
-        printk(KERN_INFO "Kprobes globally enabled\n");
+        if (errors)
+                pr_warn("Kprobes globally enabled, but failed to arm %d out of %d probes\n",
+                        errors, total);
+        else
+                pr_info("Kprobes globally enabled\n");
 already_enabled:
        mutex_unlock(&kprobe_mutex);
-        return;
+        return ret;
 }
-static void disarm_all_kprobes(void)
+static int disarm_all_kprobes(void)
 {
        struct hlist_head *head;
        struct kprobe *p;
-        unsigned int i;
+        unsigned int i, total = 0, errors = 0;
+        int err, ret = 0;
        mutex_lock(&kprobe_mutex);
        /* If kprobes are already disarmed, just return */
        if (kprobes_all_disarmed) {
                mutex_unlock(&kprobe_mutex);
-                return;
+                return 0;
        }
        kprobes_all_disarmed = true;
-        printk(KERN_INFO "Kprobes globally disabled\n");
        for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
                head = &kprobe_table[i];
+                /* Disarm all kprobes on a best-effort basis */
                hlist_for_each_entry_rcu(p, head, hlist) {
-                        if (!arch_trampoline_kprobe(p) && !kprobe_disabled(p))
+                        if (!arch_trampoline_kprobe(p) && !kprobe_disabled(p)) {
-                                disarm_kprobe(p, false);
+                                err = disarm_kprobe(p, false);
+                                if (err) {
+                                        errors++;
+                                        ret = err;
+                                }
+                                total++;
+                        }
                }
        }
+        if (errors)
+                pr_warn("Kprobes globally disabled, but failed to disarm %d out of %d probes\n",
+                        errors, total);
+        else
+                pr_info("Kprobes globally disabled\n");
        mutex_unlock(&kprobe_mutex);
        /* Wait for disarming all kprobes by optimizer */
        wait_for_kprobe_optimizer();
+        return ret;
 }
 /*
@@ -2494,6 +2568,7 @@ static ssize_t write_enabled_file_bool(struct file *file,
 {
        char buf[32];
        size_t buf_size;
+        int ret = 0;
        buf_size = min(count, (sizeof(buf)-1));
        if (copy_from_user(buf, user_buf, buf_size))
@@ -2504,17 +2579,20 @@ static ssize_t write_enabled_file_bool(struct file *file,
        case 'y':
        case 'Y':
        case '1':
-                arm_all_kprobes();
+                ret = arm_all_kprobes();
                break;
        case 'n':
        case 'N':
        case '0':
-                disarm_all_kprobes();
+                ret = disarm_all_kprobes();
                break;
        default:
                return -EINVAL;
        }
+        if (ret)
+                return ret;
        return count;
 }
diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c
index 38ece035039e..d880296245c5 100644
--- a/kernel/locking/qspinlock.c
+++ b/kernel/locking/qspinlock.c
@@ -379,6 +379,14 @@ queue:
        tail = encode_tail(smp_processor_id(), idx);
        node += idx;
+        /*
+         * Ensure that we increment the head node->count before initialising
+         * the actual node. If the compiler is kind enough to reorder these
+         * stores, then an IRQ could overwrite our assignments.
+         */
+        barrier();
        node->locked = 0;
        node->next = NULL;
        pv_init_node(node);
@@ -408,14 +416,15 @@ queue:
         */
        if (old & _Q_TAIL_MASK) {
                prev = decode_tail(old);
                /*
-                 * The above xchg_tail() is also a load of @lock which
+                 * We must ensure that the stores to @node are observed before
-                 * generates, through decode_tail(), a pointer.  The address
+                 * the write to prev->next. The address dependency from
-                 * dependency matches the RELEASE of xchg_tail() such that
+                 * xchg_tail is not sufficient to ensure this because the read
-                 * the subsequent access to @prev happens after.
+                 * component of xchg_tail is unordered with respect to the
+                 * initialisation of @node.
                 */
+                smp_store_release(&prev->next, node);
-                WRITE_ONCE(prev->next, node);
                pv_wait_node(node, prev);
                arch_mcs_spin_lock_contended(&node->locked);
diff --git a/kernel/relay.c b/kernel/relay.c
index c3029402f15c..c955b10c973c 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -163,7 +163,7 @@ static struct rchan_buf *relay_create_buf(struct rchan *chan)
 {
        struct rchan_buf *buf;
-        if (chan->n_subbufs > UINT_MAX / sizeof(size_t *))
+        if (chan->n_subbufs > KMALLOC_MAX_SIZE / sizeof(size_t *))
                return NULL;
        buf = kzalloc(sizeof(struct rchan_buf), GFP_KERNEL);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index bf724c1952ea..e7c535eee0a6 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2601,19 +2601,31 @@ static inline void finish_task(struct task_struct *prev)
 #endif
 }
-static inline void finish_lock_switch(struct rq *rq)
+static inline void
+prepare_lock_switch(struct rq *rq, struct task_struct *next, struct rq_flags *rf)
 {
+        /*
+         * Since the runqueue lock will be released by the next
+         * task (which is an invalid locking op but in the case
+         * of the scheduler it's an obvious special-case), so we
+         * do an early lockdep release here:
+         */
+        rq_unpin_lock(rq, rf);
+        spin_release(&rq->lock.dep_map, 1, _THIS_IP_);
 #ifdef CONFIG_DEBUG_SPINLOCK
        /* this is a valid case when another task releases the spinlock */
-        rq->lock.owner = current;
+        rq->lock.owner = next;
 #endif
+}
+static inline void finish_lock_switch(struct rq *rq)
+{
        /*
         * If we are tracking spinlock dependencies then we have to
         * fix up the runqueue lock - which gets 'carried over' from
         * prev into current:
         */
        spin_acquire(&rq->lock.dep_map, 0, 0, _THIS_IP_);
        raw_spin_unlock_irq(&rq->lock);
 }
@@ -2844,14 +2856,7 @@ context_switch(struct rq *rq, struct task_struct *prev,
        rq->clock_update_flags &= ~(RQCF_ACT_SKIP|RQCF_REQ_SKIP);
-        /*
+        prepare_lock_switch(rq, next, rf);
-         * Since the runqueue lock will be released by the next
-         * task (which is an invalid locking op but in the case
-         * of the scheduler it's an obvious special-case), so we
-         * do an early lockdep release here:
-         */
-        rq_unpin_lock(rq, rf);
-        spin_release(&rq->lock.dep_map, 1, _THIS_IP_);
        /* Here we just switch the register state and the stack. */
        switch_to(prev, next, prev);
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index dd062a1c8cf0..7936f548e071 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -19,8 +19,6 @@
 #include "sched.h"
-#define SUGOV_KTHREAD_PRIORITY  50
 struct sugov_tunables {
        struct gov_attr_set attr_set;
        unsigned int rate_limit_us;
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 9bb0e0c412ec..9df09782025c 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1153,6 +1153,7 @@ static void update_curr_dl(struct rq *rq)
        struct sched_dl_entity *dl_se = &curr->dl;
        u64 delta_exec, scaled_delta_exec;
        int cpu = cpu_of(rq);
+        u64 now;
        if (!dl_task(curr) || !on_dl_rq(dl_se))
                return;
@@ -1165,7 +1166,8 @@ static void update_curr_dl(struct rq *rq)
         * natural solution, but the full ramifications of this
         * approach need further study.
         */
-        delta_exec = rq_clock_task(rq) - curr->se.exec_start;
+        now = rq_clock_task(rq);
+        delta_exec = now - curr->se.exec_start;
        if (unlikely((s64)delta_exec <= 0)) {
                if (unlikely(dl_se->dl_yielded))
                        goto throttle;
@@ -1178,7 +1180,7 @@ static void update_curr_dl(struct rq *rq)
        curr->se.sum_exec_runtime += delta_exec;
        account_group_exec_runtime(curr, delta_exec);
-        curr->se.exec_start = rq_clock_task(rq);
+        curr->se.exec_start = now;
        cgroup_account_cputime(curr, delta_exec);
        sched_rt_avg_update(rq, delta_exec);
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 663b2355a3aa..aad49451584e 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -950,12 +950,13 @@ static void update_curr_rt(struct rq *rq)
 {
        struct task_struct *curr = rq->curr;
        struct sched_rt_entity *rt_se = &curr->rt;
-        u64 now = rq_clock_task(rq);
        u64 delta_exec;
+        u64 now;
        if (curr->sched_class != &rt_sched_class)
                return;
+        now = rq_clock_task(rq);
        delta_exec = now - curr->se.exec_start;
        if (unlikely((s64)delta_exec <= 0))
                return;
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 940fa408a288..dc77548167ef 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -1076,14 +1076,16 @@ long seccomp_get_metadata(struct task_struct *task,
        size = min_t(unsigned long, size, sizeof(kmd));
-        if (copy_from_user(&kmd, data, size))
+        if (size < sizeof(kmd.filter_off))
+                return -EINVAL;
+        if (copy_from_user(&kmd.filter_off, data, sizeof(kmd.filter_off)))
                return -EFAULT;
        filter = get_nth_filter(task, kmd.filter_off);
        if (IS_ERR(filter))
                return PTR_ERR(filter);
-        memset(&kmd, 0, sizeof(kmd));
        if (filter->log)
                kmd.flags |= SECCOMP_FILTER_FLAG_LOG;
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index fc2838ac8b78..c0a9e310d715 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -872,6 +872,8 @@ int perf_event_query_prog_array(struct perf_event *event, void __user *info)
                return -EINVAL;
        if (copy_from_user(&query, uquery, sizeof(query)))
                return -EFAULT;
+        if (query.ids_len > BPF_TRACE_MAX_PROGS)
+                return -E2BIG;
        mutex_lock(&bpf_event_mutex);
        ret = bpf_prog_array_copy_info(event->tp_event->prog_array,
diff --git a/kernel/user.c b/kernel/user.c
index 9a20acce460d..36288d840675 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -101,6 +101,7 @@ struct user_struct root_user = {
        .sigpending     = ATOMIC_INIT(0),
        .locked_shm     = 0,
        .uid            = GLOBAL_ROOT_UID,
+        .ratelimit      = RATELIMIT_STATE_INIT(root_user.ratelimit, 0, 0),
 };
 /*
@@ -191,6 +192,8 @@ struct user_struct *alloc_uid(kuid_t uid)
                new->uid = uid;
                atomic_set(&new->__count, 1);
+                ratelimit_state_init(&new->ratelimit, HZ, 100);
+                ratelimit_set_flags(&new->ratelimit, RATELIMIT_MSG_ON_RELEASE);
                /*
                 * Before adding this, check whether we raced
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 017044c26233..bb9a519cbf50 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -4180,6 +4180,22 @@ void workqueue_set_max_active(struct workqueue_struct *wq, int max_active)
 EXPORT_SYMBOL_GPL(workqueue_set_max_active);
 /**
+ * current_work - retrieve %current task's work struct
+ *
+ * Determine if %current task is a workqueue worker and what it's working on.
+ * Useful to find out the context that the %current task is running in.
+ *
+ * Return: work struct if %current task is a workqueue worker, %NULL otherwise.
+ */
+struct work_struct *current_work(void)
+{
+        struct worker *worker = current_wq_worker();
+        return worker ? worker->current_work : NULL;
+}
+EXPORT_SYMBOL(current_work);
+/**
 * current_is_workqueue_rescuer - is %current workqueue rescuer?
 *
 * Determine whether %current is a workqueue rescuer.  Can be used from
author	Rob Herring <robh@kernel.org>	2018-03-08 10:21:07 -0500
committer	Rob Herring <robh@kernel.org>	2018-03-08 10:21:07 -0500
commit	c679fa6e3aaa5c58fc514b5b88cfa82774b8d390 (patch)
tree	0c10b339368bd1795152a66a4e245e6f654fb3ec /kernel
parent	bdb7013df910681f84eff27b07791d4c160cb76f (diff)
parent	4fd98e374fd377ae0458a9dc44aa779cf9631ddd (diff)