aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/verifier.c2
-rw-r--r--kernel/cgroup.c31
-rw-r--r--kernel/cpuset.c71
-rw-r--r--kernel/locking/lockdep.c58
-rw-r--r--kernel/memremap.c20
-rw-r--r--kernel/module.c120
-rw-r--r--kernel/signal.c6
-rw-r--r--kernel/trace/trace_stack.c7
-rw-r--r--kernel/workqueue.c74
9 files changed, 267 insertions, 122 deletions
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index d1d3e8f57de9..2e7f7ab739e4 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -2082,7 +2082,7 @@ static void adjust_branches(struct bpf_prog *prog, int pos, int delta)
2082 /* adjust offset of jmps if necessary */ 2082 /* adjust offset of jmps if necessary */
2083 if (i < pos && i + insn->off + 1 > pos) 2083 if (i < pos && i + insn->off + 1 > pos)
2084 insn->off += delta; 2084 insn->off += delta;
2085 else if (i > pos && i + insn->off + 1 < pos) 2085 else if (i > pos + delta && i + insn->off + 1 <= pos + delta)
2086 insn->off -= delta; 2086 insn->off -= delta;
2087 } 2087 }
2088} 2088}
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index c03a640ef6da..d27904c193da 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -58,6 +58,7 @@
58#include <linux/kthread.h> 58#include <linux/kthread.h>
59#include <linux/delay.h> 59#include <linux/delay.h>
60#include <linux/atomic.h> 60#include <linux/atomic.h>
61#include <linux/cpuset.h>
61#include <net/sock.h> 62#include <net/sock.h>
62 63
63/* 64/*
@@ -2739,6 +2740,7 @@ out_unlock_rcu:
2739out_unlock_threadgroup: 2740out_unlock_threadgroup:
2740 percpu_up_write(&cgroup_threadgroup_rwsem); 2741 percpu_up_write(&cgroup_threadgroup_rwsem);
2741 cgroup_kn_unlock(of->kn); 2742 cgroup_kn_unlock(of->kn);
2743 cpuset_post_attach_flush();
2742 return ret ?: nbytes; 2744 return ret ?: nbytes;
2743} 2745}
2744 2746
@@ -4655,14 +4657,15 @@ static void css_free_work_fn(struct work_struct *work)
4655 4657
4656 if (ss) { 4658 if (ss) {
4657 /* css free path */ 4659 /* css free path */
4660 struct cgroup_subsys_state *parent = css->parent;
4658 int id = css->id; 4661 int id = css->id;
4659 4662
4660 if (css->parent)
4661 css_put(css->parent);
4662
4663 ss->css_free(css); 4663 ss->css_free(css);
4664 cgroup_idr_remove(&ss->css_idr, id); 4664 cgroup_idr_remove(&ss->css_idr, id);
4665 cgroup_put(cgrp); 4665 cgroup_put(cgrp);
4666
4667 if (parent)
4668 css_put(parent);
4666 } else { 4669 } else {
4667 /* cgroup free path */ 4670 /* cgroup free path */
4668 atomic_dec(&cgrp->root->nr_cgrps); 4671 atomic_dec(&cgrp->root->nr_cgrps);
@@ -4758,6 +4761,7 @@ static void init_and_link_css(struct cgroup_subsys_state *css,
4758 INIT_LIST_HEAD(&css->sibling); 4761 INIT_LIST_HEAD(&css->sibling);
4759 INIT_LIST_HEAD(&css->children); 4762 INIT_LIST_HEAD(&css->children);
4760 css->serial_nr = css_serial_nr_next++; 4763 css->serial_nr = css_serial_nr_next++;
4764 atomic_set(&css->online_cnt, 0);
4761 4765
4762 if (cgroup_parent(cgrp)) { 4766 if (cgroup_parent(cgrp)) {
4763 css->parent = cgroup_css(cgroup_parent(cgrp), ss); 4767 css->parent = cgroup_css(cgroup_parent(cgrp), ss);
@@ -4780,6 +4784,10 @@ static int online_css(struct cgroup_subsys_state *css)
4780 if (!ret) { 4784 if (!ret) {
4781 css->flags |= CSS_ONLINE; 4785 css->flags |= CSS_ONLINE;
4782 rcu_assign_pointer(css->cgroup->subsys[ss->id], css); 4786 rcu_assign_pointer(css->cgroup->subsys[ss->id], css);
4787
4788 atomic_inc(&css->online_cnt);
4789 if (css->parent)
4790 atomic_inc(&css->parent->online_cnt);
4783 } 4791 }
4784 return ret; 4792 return ret;
4785} 4793}
@@ -5017,10 +5025,15 @@ static void css_killed_work_fn(struct work_struct *work)
5017 container_of(work, struct cgroup_subsys_state, destroy_work); 5025 container_of(work, struct cgroup_subsys_state, destroy_work);
5018 5026
5019 mutex_lock(&cgroup_mutex); 5027 mutex_lock(&cgroup_mutex);
5020 offline_css(css);
5021 mutex_unlock(&cgroup_mutex);
5022 5028
5023 css_put(css); 5029 do {
5030 offline_css(css);
5031 css_put(css);
5032 /* @css can't go away while we're holding cgroup_mutex */
5033 css = css->parent;
5034 } while (css && atomic_dec_and_test(&css->online_cnt));
5035
5036 mutex_unlock(&cgroup_mutex);
5024} 5037}
5025 5038
5026/* css kill confirmation processing requires process context, bounce */ 5039/* css kill confirmation processing requires process context, bounce */
@@ -5029,8 +5042,10 @@ static void css_killed_ref_fn(struct percpu_ref *ref)
5029 struct cgroup_subsys_state *css = 5042 struct cgroup_subsys_state *css =
5030 container_of(ref, struct cgroup_subsys_state, refcnt); 5043 container_of(ref, struct cgroup_subsys_state, refcnt);
5031 5044
5032 INIT_WORK(&css->destroy_work, css_killed_work_fn); 5045 if (atomic_dec_and_test(&css->online_cnt)) {
5033 queue_work(cgroup_destroy_wq, &css->destroy_work); 5046 INIT_WORK(&css->destroy_work, css_killed_work_fn);
5047 queue_work(cgroup_destroy_wq, &css->destroy_work);
5048 }
5034} 5049}
5035 5050
5036/** 5051/**
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 3e945fcd8179..41989ab4db57 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -287,6 +287,8 @@ static struct cpuset top_cpuset = {
287static DEFINE_MUTEX(cpuset_mutex); 287static DEFINE_MUTEX(cpuset_mutex);
288static DEFINE_SPINLOCK(callback_lock); 288static DEFINE_SPINLOCK(callback_lock);
289 289
290static struct workqueue_struct *cpuset_migrate_mm_wq;
291
290/* 292/*
291 * CPU / memory hotplug is handled asynchronously. 293 * CPU / memory hotplug is handled asynchronously.
292 */ 294 */
@@ -972,31 +974,51 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
972} 974}
973 975
974/* 976/*
975 * cpuset_migrate_mm 977 * Migrate memory region from one set of nodes to another. This is
976 * 978 * performed asynchronously as it can be called from process migration path
977 * Migrate memory region from one set of nodes to another. 979 * holding locks involved in process management. All mm migrations are
978 * 980 * performed in the queued order and can be waited for by flushing
979 * Temporarilly set tasks mems_allowed to target nodes of migration, 981 * cpuset_migrate_mm_wq.
980 * so that the migration code can allocate pages on these nodes.
981 *
982 * While the mm_struct we are migrating is typically from some
983 * other task, the task_struct mems_allowed that we are hacking
984 * is for our current task, which must allocate new pages for that
985 * migrating memory region.
986 */ 982 */
987 983
984struct cpuset_migrate_mm_work {
985 struct work_struct work;
986 struct mm_struct *mm;
987 nodemask_t from;
988 nodemask_t to;
989};
990
991static void cpuset_migrate_mm_workfn(struct work_struct *work)
992{
993 struct cpuset_migrate_mm_work *mwork =
994 container_of(work, struct cpuset_migrate_mm_work, work);
995
996 /* on a wq worker, no need to worry about %current's mems_allowed */
997 do_migrate_pages(mwork->mm, &mwork->from, &mwork->to, MPOL_MF_MOVE_ALL);
998 mmput(mwork->mm);
999 kfree(mwork);
1000}
1001
988static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from, 1002static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from,
989 const nodemask_t *to) 1003 const nodemask_t *to)
990{ 1004{
991 struct task_struct *tsk = current; 1005 struct cpuset_migrate_mm_work *mwork;
992
993 tsk->mems_allowed = *to;
994 1006
995 do_migrate_pages(mm, from, to, MPOL_MF_MOVE_ALL); 1007 mwork = kzalloc(sizeof(*mwork), GFP_KERNEL);
1008 if (mwork) {
1009 mwork->mm = mm;
1010 mwork->from = *from;
1011 mwork->to = *to;
1012 INIT_WORK(&mwork->work, cpuset_migrate_mm_workfn);
1013 queue_work(cpuset_migrate_mm_wq, &mwork->work);
1014 } else {
1015 mmput(mm);
1016 }
1017}
996 1018
997 rcu_read_lock(); 1019void cpuset_post_attach_flush(void)
998 guarantee_online_mems(task_cs(tsk), &tsk->mems_allowed); 1020{
999 rcu_read_unlock(); 1021 flush_workqueue(cpuset_migrate_mm_wq);
1000} 1022}
1001 1023
1002/* 1024/*
@@ -1097,7 +1119,8 @@ static void update_tasks_nodemask(struct cpuset *cs)
1097 mpol_rebind_mm(mm, &cs->mems_allowed); 1119 mpol_rebind_mm(mm, &cs->mems_allowed);
1098 if (migrate) 1120 if (migrate)
1099 cpuset_migrate_mm(mm, &cs->old_mems_allowed, &newmems); 1121 cpuset_migrate_mm(mm, &cs->old_mems_allowed, &newmems);
1100 mmput(mm); 1122 else
1123 mmput(mm);
1101 } 1124 }
1102 css_task_iter_end(&it); 1125 css_task_iter_end(&it);
1103 1126
@@ -1545,11 +1568,11 @@ static void cpuset_attach(struct cgroup_taskset *tset)
1545 * @old_mems_allowed is the right nodesets that we 1568 * @old_mems_allowed is the right nodesets that we
1546 * migrate mm from. 1569 * migrate mm from.
1547 */ 1570 */
1548 if (is_memory_migrate(cs)) { 1571 if (is_memory_migrate(cs))
1549 cpuset_migrate_mm(mm, &oldcs->old_mems_allowed, 1572 cpuset_migrate_mm(mm, &oldcs->old_mems_allowed,
1550 &cpuset_attach_nodemask_to); 1573 &cpuset_attach_nodemask_to);
1551 } 1574 else
1552 mmput(mm); 1575 mmput(mm);
1553 } 1576 }
1554 } 1577 }
1555 1578
@@ -1714,6 +1737,7 @@ out_unlock:
1714 mutex_unlock(&cpuset_mutex); 1737 mutex_unlock(&cpuset_mutex);
1715 kernfs_unbreak_active_protection(of->kn); 1738 kernfs_unbreak_active_protection(of->kn);
1716 css_put(&cs->css); 1739 css_put(&cs->css);
1740 flush_workqueue(cpuset_migrate_mm_wq);
1717 return retval ?: nbytes; 1741 return retval ?: nbytes;
1718} 1742}
1719 1743
@@ -2359,6 +2383,9 @@ void __init cpuset_init_smp(void)
2359 top_cpuset.effective_mems = node_states[N_MEMORY]; 2383 top_cpuset.effective_mems = node_states[N_MEMORY];
2360 2384
2361 register_hotmemory_notifier(&cpuset_track_online_nodes_nb); 2385 register_hotmemory_notifier(&cpuset_track_online_nodes_nb);
2386
2387 cpuset_migrate_mm_wq = alloc_ordered_workqueue("cpuset_migrate_mm", 0);
2388 BUG_ON(!cpuset_migrate_mm_wq);
2362} 2389}
2363 2390
2364/** 2391/**
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 60ace56618f6..716547fdb873 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -292,7 +292,7 @@ LIST_HEAD(all_lock_classes);
292#define __classhashfn(key) hash_long((unsigned long)key, CLASSHASH_BITS) 292#define __classhashfn(key) hash_long((unsigned long)key, CLASSHASH_BITS)
293#define classhashentry(key) (classhash_table + __classhashfn((key))) 293#define classhashentry(key) (classhash_table + __classhashfn((key)))
294 294
295static struct list_head classhash_table[CLASSHASH_SIZE]; 295static struct hlist_head classhash_table[CLASSHASH_SIZE];
296 296
297/* 297/*
298 * We put the lock dependency chains into a hash-table as well, to cache 298 * We put the lock dependency chains into a hash-table as well, to cache
@@ -303,7 +303,7 @@ static struct list_head classhash_table[CLASSHASH_SIZE];
303#define __chainhashfn(chain) hash_long(chain, CHAINHASH_BITS) 303#define __chainhashfn(chain) hash_long(chain, CHAINHASH_BITS)
304#define chainhashentry(chain) (chainhash_table + __chainhashfn((chain))) 304#define chainhashentry(chain) (chainhash_table + __chainhashfn((chain)))
305 305
306static struct list_head chainhash_table[CHAINHASH_SIZE]; 306static struct hlist_head chainhash_table[CHAINHASH_SIZE];
307 307
308/* 308/*
309 * The hash key of the lock dependency chains is a hash itself too: 309 * The hash key of the lock dependency chains is a hash itself too:
@@ -666,7 +666,7 @@ static inline struct lock_class *
666look_up_lock_class(struct lockdep_map *lock, unsigned int subclass) 666look_up_lock_class(struct lockdep_map *lock, unsigned int subclass)
667{ 667{
668 struct lockdep_subclass_key *key; 668 struct lockdep_subclass_key *key;
669 struct list_head *hash_head; 669 struct hlist_head *hash_head;
670 struct lock_class *class; 670 struct lock_class *class;
671 671
672#ifdef CONFIG_DEBUG_LOCKDEP 672#ifdef CONFIG_DEBUG_LOCKDEP
@@ -719,7 +719,7 @@ look_up_lock_class(struct lockdep_map *lock, unsigned int subclass)
719 if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) 719 if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
720 return NULL; 720 return NULL;
721 721
722 list_for_each_entry_rcu(class, hash_head, hash_entry) { 722 hlist_for_each_entry_rcu(class, hash_head, hash_entry) {
723 if (class->key == key) { 723 if (class->key == key) {
724 /* 724 /*
725 * Huh! same key, different name? Did someone trample 725 * Huh! same key, different name? Did someone trample
@@ -742,7 +742,7 @@ static inline struct lock_class *
742register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force) 742register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force)
743{ 743{
744 struct lockdep_subclass_key *key; 744 struct lockdep_subclass_key *key;
745 struct list_head *hash_head; 745 struct hlist_head *hash_head;
746 struct lock_class *class; 746 struct lock_class *class;
747 747
748 DEBUG_LOCKS_WARN_ON(!irqs_disabled()); 748 DEBUG_LOCKS_WARN_ON(!irqs_disabled());
@@ -774,7 +774,7 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force)
774 * We have to do the hash-walk again, to avoid races 774 * We have to do the hash-walk again, to avoid races
775 * with another CPU: 775 * with another CPU:
776 */ 776 */
777 list_for_each_entry_rcu(class, hash_head, hash_entry) { 777 hlist_for_each_entry_rcu(class, hash_head, hash_entry) {
778 if (class->key == key) 778 if (class->key == key)
779 goto out_unlock_set; 779 goto out_unlock_set;
780 } 780 }
@@ -805,7 +805,7 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force)
805 * We use RCU's safe list-add method to make 805 * We use RCU's safe list-add method to make
806 * parallel walking of the hash-list safe: 806 * parallel walking of the hash-list safe:
807 */ 807 */
808 list_add_tail_rcu(&class->hash_entry, hash_head); 808 hlist_add_head_rcu(&class->hash_entry, hash_head);
809 /* 809 /*
810 * Add it to the global list of classes: 810 * Add it to the global list of classes:
811 */ 811 */
@@ -1822,7 +1822,7 @@ check_deadlock(struct task_struct *curr, struct held_lock *next,
1822 */ 1822 */
1823static int 1823static int
1824check_prev_add(struct task_struct *curr, struct held_lock *prev, 1824check_prev_add(struct task_struct *curr, struct held_lock *prev,
1825 struct held_lock *next, int distance, int trylock_loop) 1825 struct held_lock *next, int distance, int *stack_saved)
1826{ 1826{
1827 struct lock_list *entry; 1827 struct lock_list *entry;
1828 int ret; 1828 int ret;
@@ -1883,8 +1883,11 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev,
1883 } 1883 }
1884 } 1884 }
1885 1885
1886 if (!trylock_loop && !save_trace(&trace)) 1886 if (!*stack_saved) {
1887 return 0; 1887 if (!save_trace(&trace))
1888 return 0;
1889 *stack_saved = 1;
1890 }
1888 1891
1889 /* 1892 /*
1890 * Ok, all validations passed, add the new lock 1893 * Ok, all validations passed, add the new lock
@@ -1907,6 +1910,8 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev,
1907 * Debugging printouts: 1910 * Debugging printouts:
1908 */ 1911 */
1909 if (verbose(hlock_class(prev)) || verbose(hlock_class(next))) { 1912 if (verbose(hlock_class(prev)) || verbose(hlock_class(next))) {
1913 /* We drop graph lock, so another thread can overwrite trace. */
1914 *stack_saved = 0;
1910 graph_unlock(); 1915 graph_unlock();
1911 printk("\n new dependency: "); 1916 printk("\n new dependency: ");
1912 print_lock_name(hlock_class(prev)); 1917 print_lock_name(hlock_class(prev));
@@ -1929,7 +1934,7 @@ static int
1929check_prevs_add(struct task_struct *curr, struct held_lock *next) 1934check_prevs_add(struct task_struct *curr, struct held_lock *next)
1930{ 1935{
1931 int depth = curr->lockdep_depth; 1936 int depth = curr->lockdep_depth;
1932 int trylock_loop = 0; 1937 int stack_saved = 0;
1933 struct held_lock *hlock; 1938 struct held_lock *hlock;
1934 1939
1935 /* 1940 /*
@@ -1956,7 +1961,7 @@ check_prevs_add(struct task_struct *curr, struct held_lock *next)
1956 */ 1961 */
1957 if (hlock->read != 2 && hlock->check) { 1962 if (hlock->read != 2 && hlock->check) {
1958 if (!check_prev_add(curr, hlock, next, 1963 if (!check_prev_add(curr, hlock, next,
1959 distance, trylock_loop)) 1964 distance, &stack_saved))
1960 return 0; 1965 return 0;
1961 /* 1966 /*
1962 * Stop after the first non-trylock entry, 1967 * Stop after the first non-trylock entry,
@@ -1979,7 +1984,6 @@ check_prevs_add(struct task_struct *curr, struct held_lock *next)
1979 if (curr->held_locks[depth].irq_context != 1984 if (curr->held_locks[depth].irq_context !=
1980 curr->held_locks[depth-1].irq_context) 1985 curr->held_locks[depth-1].irq_context)
1981 break; 1986 break;
1982 trylock_loop = 1;
1983 } 1987 }
1984 return 1; 1988 return 1;
1985out_bug: 1989out_bug:
@@ -2017,7 +2021,7 @@ static inline int lookup_chain_cache(struct task_struct *curr,
2017 u64 chain_key) 2021 u64 chain_key)
2018{ 2022{
2019 struct lock_class *class = hlock_class(hlock); 2023 struct lock_class *class = hlock_class(hlock);
2020 struct list_head *hash_head = chainhashentry(chain_key); 2024 struct hlist_head *hash_head = chainhashentry(chain_key);
2021 struct lock_chain *chain; 2025 struct lock_chain *chain;
2022 struct held_lock *hlock_curr; 2026 struct held_lock *hlock_curr;
2023 int i, j; 2027 int i, j;
@@ -2033,7 +2037,7 @@ static inline int lookup_chain_cache(struct task_struct *curr,
2033 * We can walk it lock-free, because entries only get added 2037 * We can walk it lock-free, because entries only get added
2034 * to the hash: 2038 * to the hash:
2035 */ 2039 */
2036 list_for_each_entry_rcu(chain, hash_head, entry) { 2040 hlist_for_each_entry_rcu(chain, hash_head, entry) {
2037 if (chain->chain_key == chain_key) { 2041 if (chain->chain_key == chain_key) {
2038cache_hit: 2042cache_hit:
2039 debug_atomic_inc(chain_lookup_hits); 2043 debug_atomic_inc(chain_lookup_hits);
@@ -2057,7 +2061,7 @@ cache_hit:
2057 /* 2061 /*
2058 * We have to walk the chain again locked - to avoid duplicates: 2062 * We have to walk the chain again locked - to avoid duplicates:
2059 */ 2063 */
2060 list_for_each_entry(chain, hash_head, entry) { 2064 hlist_for_each_entry(chain, hash_head, entry) {
2061 if (chain->chain_key == chain_key) { 2065 if (chain->chain_key == chain_key) {
2062 graph_unlock(); 2066 graph_unlock();
2063 goto cache_hit; 2067 goto cache_hit;
@@ -2091,7 +2095,7 @@ cache_hit:
2091 } 2095 }
2092 chain_hlocks[chain->base + j] = class - lock_classes; 2096 chain_hlocks[chain->base + j] = class - lock_classes;
2093 } 2097 }
2094 list_add_tail_rcu(&chain->entry, hash_head); 2098 hlist_add_head_rcu(&chain->entry, hash_head);
2095 debug_atomic_inc(chain_lookup_misses); 2099 debug_atomic_inc(chain_lookup_misses);
2096 inc_chains(); 2100 inc_chains();
2097 2101
@@ -3875,7 +3879,7 @@ void lockdep_reset(void)
3875 nr_process_chains = 0; 3879 nr_process_chains = 0;
3876 debug_locks = 1; 3880 debug_locks = 1;
3877 for (i = 0; i < CHAINHASH_SIZE; i++) 3881 for (i = 0; i < CHAINHASH_SIZE; i++)
3878 INIT_LIST_HEAD(chainhash_table + i); 3882 INIT_HLIST_HEAD(chainhash_table + i);
3879 raw_local_irq_restore(flags); 3883 raw_local_irq_restore(flags);
3880} 3884}
3881 3885
@@ -3894,7 +3898,7 @@ static void zap_class(struct lock_class *class)
3894 /* 3898 /*
3895 * Unhash the class and remove it from the all_lock_classes list: 3899 * Unhash the class and remove it from the all_lock_classes list:
3896 */ 3900 */
3897 list_del_rcu(&class->hash_entry); 3901 hlist_del_rcu(&class->hash_entry);
3898 list_del_rcu(&class->lock_entry); 3902 list_del_rcu(&class->lock_entry);
3899 3903
3900 RCU_INIT_POINTER(class->key, NULL); 3904 RCU_INIT_POINTER(class->key, NULL);
@@ -3917,7 +3921,7 @@ static inline int within(const void *addr, void *start, unsigned long size)
3917void lockdep_free_key_range(void *start, unsigned long size) 3921void lockdep_free_key_range(void *start, unsigned long size)
3918{ 3922{
3919 struct lock_class *class; 3923 struct lock_class *class;
3920 struct list_head *head; 3924 struct hlist_head *head;
3921 unsigned long flags; 3925 unsigned long flags;
3922 int i; 3926 int i;
3923 int locked; 3927 int locked;
@@ -3930,9 +3934,7 @@ void lockdep_free_key_range(void *start, unsigned long size)
3930 */ 3934 */
3931 for (i = 0; i < CLASSHASH_SIZE; i++) { 3935 for (i = 0; i < CLASSHASH_SIZE; i++) {
3932 head = classhash_table + i; 3936 head = classhash_table + i;
3933 if (list_empty(head)) 3937 hlist_for_each_entry_rcu(class, head, hash_entry) {
3934 continue;
3935 list_for_each_entry_rcu(class, head, hash_entry) {
3936 if (within(class->key, start, size)) 3938 if (within(class->key, start, size))
3937 zap_class(class); 3939 zap_class(class);
3938 else if (within(class->name, start, size)) 3940 else if (within(class->name, start, size))
@@ -3962,7 +3964,7 @@ void lockdep_free_key_range(void *start, unsigned long size)
3962void lockdep_reset_lock(struct lockdep_map *lock) 3964void lockdep_reset_lock(struct lockdep_map *lock)
3963{ 3965{
3964 struct lock_class *class; 3966 struct lock_class *class;
3965 struct list_head *head; 3967 struct hlist_head *head;
3966 unsigned long flags; 3968 unsigned long flags;
3967 int i, j; 3969 int i, j;
3968 int locked; 3970 int locked;
@@ -3987,9 +3989,7 @@ void lockdep_reset_lock(struct lockdep_map *lock)
3987 locked = graph_lock(); 3989 locked = graph_lock();
3988 for (i = 0; i < CLASSHASH_SIZE; i++) { 3990 for (i = 0; i < CLASSHASH_SIZE; i++) {
3989 head = classhash_table + i; 3991 head = classhash_table + i;
3990 if (list_empty(head)) 3992 hlist_for_each_entry_rcu(class, head, hash_entry) {
3991 continue;
3992 list_for_each_entry_rcu(class, head, hash_entry) {
3993 int match = 0; 3993 int match = 0;
3994 3994
3995 for (j = 0; j < NR_LOCKDEP_CACHING_CLASSES; j++) 3995 for (j = 0; j < NR_LOCKDEP_CACHING_CLASSES; j++)
@@ -4027,10 +4027,10 @@ void lockdep_init(void)
4027 return; 4027 return;
4028 4028
4029 for (i = 0; i < CLASSHASH_SIZE; i++) 4029 for (i = 0; i < CLASSHASH_SIZE; i++)
4030 INIT_LIST_HEAD(classhash_table + i); 4030 INIT_HLIST_HEAD(classhash_table + i);
4031 4031
4032 for (i = 0; i < CHAINHASH_SIZE; i++) 4032 for (i = 0; i < CHAINHASH_SIZE; i++)
4033 INIT_LIST_HEAD(chainhash_table + i); 4033 INIT_HLIST_HEAD(chainhash_table + i);
4034 4034
4035 lockdep_initialized = 1; 4035 lockdep_initialized = 1;
4036} 4036}
diff --git a/kernel/memremap.c b/kernel/memremap.c
index e517a16cb426..2c468dea60bc 100644
--- a/kernel/memremap.c
+++ b/kernel/memremap.c
@@ -150,7 +150,7 @@ void devm_memunmap(struct device *dev, void *addr)
150} 150}
151EXPORT_SYMBOL(devm_memunmap); 151EXPORT_SYMBOL(devm_memunmap);
152 152
153pfn_t phys_to_pfn_t(dma_addr_t addr, unsigned long flags) 153pfn_t phys_to_pfn_t(phys_addr_t addr, u64 flags)
154{ 154{
155 return __pfn_to_pfn_t(addr >> PAGE_SHIFT, flags); 155 return __pfn_to_pfn_t(addr >> PAGE_SHIFT, flags);
156} 156}
@@ -183,7 +183,11 @@ EXPORT_SYMBOL(put_zone_device_page);
183 183
184static void pgmap_radix_release(struct resource *res) 184static void pgmap_radix_release(struct resource *res)
185{ 185{
186 resource_size_t key; 186 resource_size_t key, align_start, align_size, align_end;
187
188 align_start = res->start & ~(SECTION_SIZE - 1);
189 align_size = ALIGN(resource_size(res), SECTION_SIZE);
190 align_end = align_start + align_size - 1;
187 191
188 mutex_lock(&pgmap_lock); 192 mutex_lock(&pgmap_lock);
189 for (key = res->start; key <= res->end; key += SECTION_SIZE) 193 for (key = res->start; key <= res->end; key += SECTION_SIZE)
@@ -226,12 +230,11 @@ static void devm_memremap_pages_release(struct device *dev, void *data)
226 percpu_ref_put(pgmap->ref); 230 percpu_ref_put(pgmap->ref);
227 } 231 }
228 232
229 pgmap_radix_release(res);
230
231 /* pages are dead and unused, undo the arch mapping */ 233 /* pages are dead and unused, undo the arch mapping */
232 align_start = res->start & ~(SECTION_SIZE - 1); 234 align_start = res->start & ~(SECTION_SIZE - 1);
233 align_size = ALIGN(resource_size(res), SECTION_SIZE); 235 align_size = ALIGN(resource_size(res), SECTION_SIZE);
234 arch_remove_memory(align_start, align_size); 236 arch_remove_memory(align_start, align_size);
237 pgmap_radix_release(res);
235 dev_WARN_ONCE(dev, pgmap->altmap && pgmap->altmap->alloc, 238 dev_WARN_ONCE(dev, pgmap->altmap && pgmap->altmap->alloc,
236 "%s: failed to free all reserved pages\n", __func__); 239 "%s: failed to free all reserved pages\n", __func__);
237} 240}
@@ -267,7 +270,7 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
267{ 270{
268 int is_ram = region_intersects(res->start, resource_size(res), 271 int is_ram = region_intersects(res->start, resource_size(res),
269 "System RAM"); 272 "System RAM");
270 resource_size_t key, align_start, align_size; 273 resource_size_t key, align_start, align_size, align_end;
271 struct dev_pagemap *pgmap; 274 struct dev_pagemap *pgmap;
272 struct page_map *page_map; 275 struct page_map *page_map;
273 unsigned long pfn; 276 unsigned long pfn;
@@ -309,7 +312,10 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
309 312
310 mutex_lock(&pgmap_lock); 313 mutex_lock(&pgmap_lock);
311 error = 0; 314 error = 0;
312 for (key = res->start; key <= res->end; key += SECTION_SIZE) { 315 align_start = res->start & ~(SECTION_SIZE - 1);
316 align_size = ALIGN(resource_size(res), SECTION_SIZE);
317 align_end = align_start + align_size - 1;
318 for (key = align_start; key <= align_end; key += SECTION_SIZE) {
313 struct dev_pagemap *dup; 319 struct dev_pagemap *dup;
314 320
315 rcu_read_lock(); 321 rcu_read_lock();
@@ -336,8 +342,6 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
336 if (nid < 0) 342 if (nid < 0)
337 nid = numa_mem_id(); 343 nid = numa_mem_id();
338 344
339 align_start = res->start & ~(SECTION_SIZE - 1);
340 align_size = ALIGN(resource_size(res), SECTION_SIZE);
341 error = arch_add_memory(nid, align_start, align_size, true); 345 error = arch_add_memory(nid, align_start, align_size, true);
342 if (error) 346 if (error)
343 goto err_add_memory; 347 goto err_add_memory;
diff --git a/kernel/module.c b/kernel/module.c
index 8358f4697c0c..9537da37ce87 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -303,6 +303,9 @@ struct load_info {
303 struct _ddebug *debug; 303 struct _ddebug *debug;
304 unsigned int num_debug; 304 unsigned int num_debug;
305 bool sig_ok; 305 bool sig_ok;
306#ifdef CONFIG_KALLSYMS
307 unsigned long mod_kallsyms_init_off;
308#endif
306 struct { 309 struct {
307 unsigned int sym, str, mod, vers, info, pcpu; 310 unsigned int sym, str, mod, vers, info, pcpu;
308 } index; 311 } index;
@@ -2480,10 +2483,21 @@ static void layout_symtab(struct module *mod, struct load_info *info)
2480 strsect->sh_flags |= SHF_ALLOC; 2483 strsect->sh_flags |= SHF_ALLOC;
2481 strsect->sh_entsize = get_offset(mod, &mod->init_layout.size, strsect, 2484 strsect->sh_entsize = get_offset(mod, &mod->init_layout.size, strsect,
2482 info->index.str) | INIT_OFFSET_MASK; 2485 info->index.str) | INIT_OFFSET_MASK;
2483 mod->init_layout.size = debug_align(mod->init_layout.size);
2484 pr_debug("\t%s\n", info->secstrings + strsect->sh_name); 2486 pr_debug("\t%s\n", info->secstrings + strsect->sh_name);
2487
2488 /* We'll tack temporary mod_kallsyms on the end. */
2489 mod->init_layout.size = ALIGN(mod->init_layout.size,
2490 __alignof__(struct mod_kallsyms));
2491 info->mod_kallsyms_init_off = mod->init_layout.size;
2492 mod->init_layout.size += sizeof(struct mod_kallsyms);
2493 mod->init_layout.size = debug_align(mod->init_layout.size);
2485} 2494}
2486 2495
2496/*
2497 * We use the full symtab and strtab which layout_symtab arranged to
2498 * be appended to the init section. Later we switch to the cut-down
2499 * core-only ones.
2500 */
2487static void add_kallsyms(struct module *mod, const struct load_info *info) 2501static void add_kallsyms(struct module *mod, const struct load_info *info)
2488{ 2502{
2489 unsigned int i, ndst; 2503 unsigned int i, ndst;
@@ -2492,29 +2506,34 @@ static void add_kallsyms(struct module *mod, const struct load_info *info)
2492 char *s; 2506 char *s;
2493 Elf_Shdr *symsec = &info->sechdrs[info->index.sym]; 2507 Elf_Shdr *symsec = &info->sechdrs[info->index.sym];
2494 2508
2495 mod->symtab = (void *)symsec->sh_addr; 2509 /* Set up to point into init section. */
2496 mod->num_symtab = symsec->sh_size / sizeof(Elf_Sym); 2510 mod->kallsyms = mod->init_layout.base + info->mod_kallsyms_init_off;
2511
2512 mod->kallsyms->symtab = (void *)symsec->sh_addr;
2513 mod->kallsyms->num_symtab = symsec->sh_size / sizeof(Elf_Sym);
2497 /* Make sure we get permanent strtab: don't use info->strtab. */ 2514 /* Make sure we get permanent strtab: don't use info->strtab. */
2498 mod->strtab = (void *)info->sechdrs[info->index.str].sh_addr; 2515 mod->kallsyms->strtab = (void *)info->sechdrs[info->index.str].sh_addr;
2499 2516
2500 /* Set types up while we still have access to sections. */ 2517 /* Set types up while we still have access to sections. */
2501 for (i = 0; i < mod->num_symtab; i++) 2518 for (i = 0; i < mod->kallsyms->num_symtab; i++)
2502 mod->symtab[i].st_info = elf_type(&mod->symtab[i], info); 2519 mod->kallsyms->symtab[i].st_info
2503 2520 = elf_type(&mod->kallsyms->symtab[i], info);
2504 mod->core_symtab = dst = mod->core_layout.base + info->symoffs; 2521
2505 mod->core_strtab = s = mod->core_layout.base + info->stroffs; 2522 /* Now populate the cut down core kallsyms for after init. */
2506 src = mod->symtab; 2523 mod->core_kallsyms.symtab = dst = mod->core_layout.base + info->symoffs;
2507 for (ndst = i = 0; i < mod->num_symtab; i++) { 2524 mod->core_kallsyms.strtab = s = mod->core_layout.base + info->stroffs;
2525 src = mod->kallsyms->symtab;
2526 for (ndst = i = 0; i < mod->kallsyms->num_symtab; i++) {
2508 if (i == 0 || 2527 if (i == 0 ||
2509 is_core_symbol(src+i, info->sechdrs, info->hdr->e_shnum, 2528 is_core_symbol(src+i, info->sechdrs, info->hdr->e_shnum,
2510 info->index.pcpu)) { 2529 info->index.pcpu)) {
2511 dst[ndst] = src[i]; 2530 dst[ndst] = src[i];
2512 dst[ndst++].st_name = s - mod->core_strtab; 2531 dst[ndst++].st_name = s - mod->core_kallsyms.strtab;
2513 s += strlcpy(s, &mod->strtab[src[i].st_name], 2532 s += strlcpy(s, &mod->kallsyms->strtab[src[i].st_name],
2514 KSYM_NAME_LEN) + 1; 2533 KSYM_NAME_LEN) + 1;
2515 } 2534 }
2516 } 2535 }
2517 mod->core_num_syms = ndst; 2536 mod->core_kallsyms.num_symtab = ndst;
2518} 2537}
2519#else 2538#else
2520static inline void layout_symtab(struct module *mod, struct load_info *info) 2539static inline void layout_symtab(struct module *mod, struct load_info *info)
@@ -3263,9 +3282,8 @@ static noinline int do_init_module(struct module *mod)
3263 module_put(mod); 3282 module_put(mod);
3264 trim_init_extable(mod); 3283 trim_init_extable(mod);
3265#ifdef CONFIG_KALLSYMS 3284#ifdef CONFIG_KALLSYMS
3266 mod->num_symtab = mod->core_num_syms; 3285 /* Switch to core kallsyms now init is done: kallsyms may be walking! */
3267 mod->symtab = mod->core_symtab; 3286 rcu_assign_pointer(mod->kallsyms, &mod->core_kallsyms);
3268 mod->strtab = mod->core_strtab;
3269#endif 3287#endif
3270 mod_tree_remove_init(mod); 3288 mod_tree_remove_init(mod);
3271 disable_ro_nx(&mod->init_layout); 3289 disable_ro_nx(&mod->init_layout);
@@ -3496,7 +3514,7 @@ static int load_module(struct load_info *info, const char __user *uargs,
3496 3514
3497 /* Module is ready to execute: parsing args may do that. */ 3515 /* Module is ready to execute: parsing args may do that. */
3498 after_dashes = parse_args(mod->name, mod->args, mod->kp, mod->num_kp, 3516 after_dashes = parse_args(mod->name, mod->args, mod->kp, mod->num_kp,
3499 -32768, 32767, NULL, 3517 -32768, 32767, mod,
3500 unknown_module_param_cb); 3518 unknown_module_param_cb);
3501 if (IS_ERR(after_dashes)) { 3519 if (IS_ERR(after_dashes)) {
3502 err = PTR_ERR(after_dashes); 3520 err = PTR_ERR(after_dashes);
@@ -3627,6 +3645,11 @@ static inline int is_arm_mapping_symbol(const char *str)
3627 && (str[2] == '\0' || str[2] == '.'); 3645 && (str[2] == '\0' || str[2] == '.');
3628} 3646}
3629 3647
3648static const char *symname(struct mod_kallsyms *kallsyms, unsigned int symnum)
3649{
3650 return kallsyms->strtab + kallsyms->symtab[symnum].st_name;
3651}
3652
3630static const char *get_ksymbol(struct module *mod, 3653static const char *get_ksymbol(struct module *mod,
3631 unsigned long addr, 3654 unsigned long addr,
3632 unsigned long *size, 3655 unsigned long *size,
@@ -3634,6 +3657,7 @@ static const char *get_ksymbol(struct module *mod,
3634{ 3657{
3635 unsigned int i, best = 0; 3658 unsigned int i, best = 0;
3636 unsigned long nextval; 3659 unsigned long nextval;
3660 struct mod_kallsyms *kallsyms = rcu_dereference_sched(mod->kallsyms);
3637 3661
3638 /* At worse, next value is at end of module */ 3662 /* At worse, next value is at end of module */
3639 if (within_module_init(addr, mod)) 3663 if (within_module_init(addr, mod))
@@ -3643,32 +3667,32 @@ static const char *get_ksymbol(struct module *mod,
3643 3667
3644 /* Scan for closest preceding symbol, and next symbol. (ELF 3668 /* Scan for closest preceding symbol, and next symbol. (ELF
3645 starts real symbols at 1). */ 3669 starts real symbols at 1). */
3646 for (i = 1; i < mod->num_symtab; i++) { 3670 for (i = 1; i < kallsyms->num_symtab; i++) {
3647 if (mod->symtab[i].st_shndx == SHN_UNDEF) 3671 if (kallsyms->symtab[i].st_shndx == SHN_UNDEF)
3648 continue; 3672 continue;
3649 3673
3650 /* We ignore unnamed symbols: they're uninformative 3674 /* We ignore unnamed symbols: they're uninformative
3651 * and inserted at a whim. */ 3675 * and inserted at a whim. */
3652 if (mod->symtab[i].st_value <= addr 3676 if (*symname(kallsyms, i) == '\0'
3653 && mod->symtab[i].st_value > mod->symtab[best].st_value 3677 || is_arm_mapping_symbol(symname(kallsyms, i)))
3654 && *(mod->strtab + mod->symtab[i].st_name) != '\0' 3678 continue;
3655 && !is_arm_mapping_symbol(mod->strtab + mod->symtab[i].st_name)) 3679
3680 if (kallsyms->symtab[i].st_value <= addr
3681 && kallsyms->symtab[i].st_value > kallsyms->symtab[best].st_value)
3656 best = i; 3682 best = i;
3657 if (mod->symtab[i].st_value > addr 3683 if (kallsyms->symtab[i].st_value > addr
3658 && mod->symtab[i].st_value < nextval 3684 && kallsyms->symtab[i].st_value < nextval)
3659 && *(mod->strtab + mod->symtab[i].st_name) != '\0' 3685 nextval = kallsyms->symtab[i].st_value;
3660 && !is_arm_mapping_symbol(mod->strtab + mod->symtab[i].st_name))
3661 nextval = mod->symtab[i].st_value;
3662 } 3686 }
3663 3687
3664 if (!best) 3688 if (!best)
3665 return NULL; 3689 return NULL;
3666 3690
3667 if (size) 3691 if (size)
3668 *size = nextval - mod->symtab[best].st_value; 3692 *size = nextval - kallsyms->symtab[best].st_value;
3669 if (offset) 3693 if (offset)
3670 *offset = addr - mod->symtab[best].st_value; 3694 *offset = addr - kallsyms->symtab[best].st_value;
3671 return mod->strtab + mod->symtab[best].st_name; 3695 return symname(kallsyms, best);
3672} 3696}
3673 3697
3674/* For kallsyms to ask for address resolution. NULL means not found. Careful 3698/* For kallsyms to ask for address resolution. NULL means not found. Careful
@@ -3758,19 +3782,21 @@ int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
3758 3782
3759 preempt_disable(); 3783 preempt_disable();
3760 list_for_each_entry_rcu(mod, &modules, list) { 3784 list_for_each_entry_rcu(mod, &modules, list) {
3785 struct mod_kallsyms *kallsyms;
3786
3761 if (mod->state == MODULE_STATE_UNFORMED) 3787 if (mod->state == MODULE_STATE_UNFORMED)
3762 continue; 3788 continue;
3763 if (symnum < mod->num_symtab) { 3789 kallsyms = rcu_dereference_sched(mod->kallsyms);
3764 *value = mod->symtab[symnum].st_value; 3790 if (symnum < kallsyms->num_symtab) {
3765 *type = mod->symtab[symnum].st_info; 3791 *value = kallsyms->symtab[symnum].st_value;
3766 strlcpy(name, mod->strtab + mod->symtab[symnum].st_name, 3792 *type = kallsyms->symtab[symnum].st_info;
3767 KSYM_NAME_LEN); 3793 strlcpy(name, symname(kallsyms, symnum), KSYM_NAME_LEN);
3768 strlcpy(module_name, mod->name, MODULE_NAME_LEN); 3794 strlcpy(module_name, mod->name, MODULE_NAME_LEN);
3769 *exported = is_exported(name, *value, mod); 3795 *exported = is_exported(name, *value, mod);
3770 preempt_enable(); 3796 preempt_enable();
3771 return 0; 3797 return 0;
3772 } 3798 }
3773 symnum -= mod->num_symtab; 3799 symnum -= kallsyms->num_symtab;
3774 } 3800 }
3775 preempt_enable(); 3801 preempt_enable();
3776 return -ERANGE; 3802 return -ERANGE;
@@ -3779,11 +3805,12 @@ int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
3779static unsigned long mod_find_symname(struct module *mod, const char *name) 3805static unsigned long mod_find_symname(struct module *mod, const char *name)
3780{ 3806{
3781 unsigned int i; 3807 unsigned int i;
3808 struct mod_kallsyms *kallsyms = rcu_dereference_sched(mod->kallsyms);
3782 3809
3783 for (i = 0; i < mod->num_symtab; i++) 3810 for (i = 0; i < kallsyms->num_symtab; i++)
3784 if (strcmp(name, mod->strtab+mod->symtab[i].st_name) == 0 && 3811 if (strcmp(name, symname(kallsyms, i)) == 0 &&
3785 mod->symtab[i].st_info != 'U') 3812 kallsyms->symtab[i].st_info != 'U')
3786 return mod->symtab[i].st_value; 3813 return kallsyms->symtab[i].st_value;
3787 return 0; 3814 return 0;
3788} 3815}
3789 3816
@@ -3822,11 +3849,14 @@ int module_kallsyms_on_each_symbol(int (*fn)(void *, const char *,
3822 module_assert_mutex(); 3849 module_assert_mutex();
3823 3850
3824 list_for_each_entry(mod, &modules, list) { 3851 list_for_each_entry(mod, &modules, list) {
3852 /* We hold module_mutex: no need for rcu_dereference_sched */
3853 struct mod_kallsyms *kallsyms = mod->kallsyms;
3854
3825 if (mod->state == MODULE_STATE_UNFORMED) 3855 if (mod->state == MODULE_STATE_UNFORMED)
3826 continue; 3856 continue;
3827 for (i = 0; i < mod->num_symtab; i++) { 3857 for (i = 0; i < kallsyms->num_symtab; i++) {
3828 ret = fn(data, mod->strtab + mod->symtab[i].st_name, 3858 ret = fn(data, symname(kallsyms, i),
3829 mod, mod->symtab[i].st_value); 3859 mod, kallsyms->symtab[i].st_value);
3830 if (ret != 0) 3860 if (ret != 0)
3831 return ret; 3861 return ret;
3832 } 3862 }
diff --git a/kernel/signal.c b/kernel/signal.c
index f3f1f7a972fd..0508544c8ced 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -3508,8 +3508,10 @@ static int sigsuspend(sigset_t *set)
3508 current->saved_sigmask = current->blocked; 3508 current->saved_sigmask = current->blocked;
3509 set_current_blocked(set); 3509 set_current_blocked(set);
3510 3510
3511 __set_current_state(TASK_INTERRUPTIBLE); 3511 while (!signal_pending(current)) {
3512 schedule(); 3512 __set_current_state(TASK_INTERRUPTIBLE);
3513 schedule();
3514 }
3513 set_restore_sigmask(); 3515 set_restore_sigmask();
3514 return -ERESTARTNOHAND; 3516 return -ERESTARTNOHAND;
3515} 3517}
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index dda9e6742950..202df6cffcca 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -126,6 +126,13 @@ check_stack(unsigned long ip, unsigned long *stack)
126 } 126 }
127 127
128 /* 128 /*
129 * Some archs may not have the passed in ip in the dump.
130 * If that happens, we need to show everything.
131 */
132 if (i == stack_trace_max.nr_entries)
133 i = 0;
134
135 /*
129 * Now find where in the stack these are. 136 * Now find where in the stack these are.
130 */ 137 */
131 x = 0; 138 x = 0;
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 61a0264e28f9..7ff5dc7d2ac5 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -301,7 +301,23 @@ static DEFINE_SPINLOCK(wq_mayday_lock); /* protects wq->maydays list */
301static LIST_HEAD(workqueues); /* PR: list of all workqueues */ 301static LIST_HEAD(workqueues); /* PR: list of all workqueues */
302static bool workqueue_freezing; /* PL: have wqs started freezing? */ 302static bool workqueue_freezing; /* PL: have wqs started freezing? */
303 303
304static cpumask_var_t wq_unbound_cpumask; /* PL: low level cpumask for all unbound wqs */ 304/* PL: allowable cpus for unbound wqs and work items */
305static cpumask_var_t wq_unbound_cpumask;
306
307/* CPU where unbound work was last round robin scheduled from this CPU */
308static DEFINE_PER_CPU(int, wq_rr_cpu_last);
309
310/*
311 * Local execution of unbound work items is no longer guaranteed. The
312 * following always forces round-robin CPU selection on unbound work items
313 * to uncover usages which depend on it.
314 */
315#ifdef CONFIG_DEBUG_WQ_FORCE_RR_CPU
316static bool wq_debug_force_rr_cpu = true;
317#else
318static bool wq_debug_force_rr_cpu = false;
319#endif
320module_param_named(debug_force_rr_cpu, wq_debug_force_rr_cpu, bool, 0644);
305 321
306/* the per-cpu worker pools */ 322/* the per-cpu worker pools */
307static DEFINE_PER_CPU_SHARED_ALIGNED(struct worker_pool [NR_STD_WORKER_POOLS], 323static DEFINE_PER_CPU_SHARED_ALIGNED(struct worker_pool [NR_STD_WORKER_POOLS],
@@ -570,6 +586,16 @@ static struct pool_workqueue *unbound_pwq_by_node(struct workqueue_struct *wq,
570 int node) 586 int node)
571{ 587{
572 assert_rcu_or_wq_mutex_or_pool_mutex(wq); 588 assert_rcu_or_wq_mutex_or_pool_mutex(wq);
589
590 /*
591 * XXX: @node can be NUMA_NO_NODE if CPU goes offline while a
592 * delayed item is pending. The plan is to keep CPU -> NODE
593 * mapping valid and stable across CPU on/offlines. Once that
594 * happens, this workaround can be removed.
595 */
596 if (unlikely(node == NUMA_NO_NODE))
597 return wq->dfl_pwq;
598
573 return rcu_dereference_raw(wq->numa_pwq_tbl[node]); 599 return rcu_dereference_raw(wq->numa_pwq_tbl[node]);
574} 600}
575 601
@@ -1298,6 +1324,39 @@ static bool is_chained_work(struct workqueue_struct *wq)
1298 return worker && worker->current_pwq->wq == wq; 1324 return worker && worker->current_pwq->wq == wq;
1299} 1325}
1300 1326
1327/*
1328 * When queueing an unbound work item to a wq, prefer local CPU if allowed
1329 * by wq_unbound_cpumask. Otherwise, round robin among the allowed ones to
1330 * avoid perturbing sensitive tasks.
1331 */
1332static int wq_select_unbound_cpu(int cpu)
1333{
1334 static bool printed_dbg_warning;
1335 int new_cpu;
1336
1337 if (likely(!wq_debug_force_rr_cpu)) {
1338 if (cpumask_test_cpu(cpu, wq_unbound_cpumask))
1339 return cpu;
1340 } else if (!printed_dbg_warning) {
1341 pr_warn("workqueue: round-robin CPU selection forced, expect performance impact\n");
1342 printed_dbg_warning = true;
1343 }
1344
1345 if (cpumask_empty(wq_unbound_cpumask))
1346 return cpu;
1347
1348 new_cpu = __this_cpu_read(wq_rr_cpu_last);
1349 new_cpu = cpumask_next_and(new_cpu, wq_unbound_cpumask, cpu_online_mask);
1350 if (unlikely(new_cpu >= nr_cpu_ids)) {
1351 new_cpu = cpumask_first_and(wq_unbound_cpumask, cpu_online_mask);
1352 if (unlikely(new_cpu >= nr_cpu_ids))
1353 return cpu;
1354 }
1355 __this_cpu_write(wq_rr_cpu_last, new_cpu);
1356
1357 return new_cpu;
1358}
1359
1301static void __queue_work(int cpu, struct workqueue_struct *wq, 1360static void __queue_work(int cpu, struct workqueue_struct *wq,
1302 struct work_struct *work) 1361 struct work_struct *work)
1303{ 1362{
@@ -1323,7 +1382,7 @@ static void __queue_work(int cpu, struct workqueue_struct *wq,
1323 return; 1382 return;
1324retry: 1383retry:
1325 if (req_cpu == WORK_CPU_UNBOUND) 1384 if (req_cpu == WORK_CPU_UNBOUND)
1326 cpu = raw_smp_processor_id(); 1385 cpu = wq_select_unbound_cpu(raw_smp_processor_id());
1327 1386
1328 /* pwq which will be used unless @work is executing elsewhere */ 1387 /* pwq which will be used unless @work is executing elsewhere */
1329 if (!(wq->flags & WQ_UNBOUND)) 1388 if (!(wq->flags & WQ_UNBOUND))
@@ -1464,13 +1523,13 @@ static void __queue_delayed_work(int cpu, struct workqueue_struct *wq,
1464 timer_stats_timer_set_start_info(&dwork->timer); 1523 timer_stats_timer_set_start_info(&dwork->timer);
1465 1524
1466 dwork->wq = wq; 1525 dwork->wq = wq;
1467 /* timer isn't guaranteed to run in this cpu, record earlier */
1468 if (cpu == WORK_CPU_UNBOUND)
1469 cpu = raw_smp_processor_id();
1470 dwork->cpu = cpu; 1526 dwork->cpu = cpu;
1471 timer->expires = jiffies + delay; 1527 timer->expires = jiffies + delay;
1472 1528
1473 add_timer_on(timer, cpu); 1529 if (unlikely(cpu != WORK_CPU_UNBOUND))
1530 add_timer_on(timer, cpu);
1531 else
1532 add_timer(timer);
1474} 1533}
1475 1534
1476/** 1535/**
@@ -2355,7 +2414,8 @@ static void check_flush_dependency(struct workqueue_struct *target_wq,
2355 WARN_ONCE(current->flags & PF_MEMALLOC, 2414 WARN_ONCE(current->flags & PF_MEMALLOC,
2356 "workqueue: PF_MEMALLOC task %d(%s) is flushing !WQ_MEM_RECLAIM %s:%pf", 2415 "workqueue: PF_MEMALLOC task %d(%s) is flushing !WQ_MEM_RECLAIM %s:%pf",
2357 current->pid, current->comm, target_wq->name, target_func); 2416 current->pid, current->comm, target_wq->name, target_func);
2358 WARN_ONCE(worker && (worker->current_pwq->wq->flags & WQ_MEM_RECLAIM), 2417 WARN_ONCE(worker && ((worker->current_pwq->wq->flags &
2418 (WQ_MEM_RECLAIM | __WQ_LEGACY)) == WQ_MEM_RECLAIM),
2359 "workqueue: WQ_MEM_RECLAIM %s:%pf is flushing !WQ_MEM_RECLAIM %s:%pf", 2419 "workqueue: WQ_MEM_RECLAIM %s:%pf is flushing !WQ_MEM_RECLAIM %s:%pf",
2360 worker->current_pwq->wq->name, worker->current_func, 2420 worker->current_pwq->wq->name, worker->current_func,
2361 target_wq->name, target_func); 2421 target_wq->name, target_func);