aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-04-13 19:19:18 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2015-04-13 19:19:18 -0400
commit45141eeafefdb8998d2ab1f87c2afe0457059b47 (patch)
tree9a1665a01bf0e36715249ed9e9baaa76e08326f7
parent8954672d86d036643e3ce7ce3b2422c336db66d0 (diff)
parent6ba94429c8e7b87b0fff13c5ac90731b239b77fa (diff)
Merge branch 'for-4.1' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq
Pull workqueue updates from Tejun Heo: "Workqueue now prints debug information at the end of sysrq-t which should be helpful when tracking down suspected workqueue stalls. It only prints out the ones with something currently going on so it shouldn't add much output in most cases" * 'for-4.1' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq: workqueue: Reorder sysfs code percpu: Fix trivial typos in comments workqueue: dump workqueues on sysrq-t workqueue: keep track of the flushing task and pool manager workqueue: make the workqueues list RCU walkable
-rw-r--r--drivers/tty/sysrq.c1
-rw-r--r--include/linux/workqueue.h1
-rw-r--r--kernel/workqueue.c847
-rw-r--r--mm/percpu.c4
4 files changed, 518 insertions, 335 deletions
diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c
index 259a4d5a4e8f..843f2cdc280b 100644
--- a/drivers/tty/sysrq.c
+++ b/drivers/tty/sysrq.c
@@ -275,6 +275,7 @@ static struct sysrq_key_op sysrq_showregs_op = {
275static void sysrq_handle_showstate(int key) 275static void sysrq_handle_showstate(int key)
276{ 276{
277 show_state(); 277 show_state();
278 show_workqueue_state();
278} 279}
279static struct sysrq_key_op sysrq_showstate_op = { 280static struct sysrq_key_op sysrq_showstate_op = {
280 .handler = sysrq_handle_showstate, 281 .handler = sysrq_handle_showstate,
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index f597846ff605..deee212af8e0 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -454,6 +454,7 @@ extern bool workqueue_congested(int cpu, struct workqueue_struct *wq);
454extern unsigned int work_busy(struct work_struct *work); 454extern unsigned int work_busy(struct work_struct *work);
455extern __printf(1, 2) void set_worker_desc(const char *fmt, ...); 455extern __printf(1, 2) void set_worker_desc(const char *fmt, ...);
456extern void print_worker_info(const char *log_lvl, struct task_struct *task); 456extern void print_worker_info(const char *log_lvl, struct task_struct *task);
457extern void show_workqueue_state(void);
457 458
458/** 459/**
459 * queue_work - queue work on a workqueue 460 * queue_work - queue work on a workqueue
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 41ff75b478c6..586ad91300b0 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -159,6 +159,7 @@ struct worker_pool {
159 159
160 /* see manage_workers() for details on the two manager mutexes */ 160 /* see manage_workers() for details on the two manager mutexes */
161 struct mutex manager_arb; /* manager arbitration */ 161 struct mutex manager_arb; /* manager arbitration */
162 struct worker *manager; /* L: purely informational */
162 struct mutex attach_mutex; /* attach/detach exclusion */ 163 struct mutex attach_mutex; /* attach/detach exclusion */
163 struct list_head workers; /* A: attached workers */ 164 struct list_head workers; /* A: attached workers */
164 struct completion *detach_completion; /* all workers detached */ 165 struct completion *detach_completion; /* all workers detached */
@@ -230,7 +231,7 @@ struct wq_device;
230 */ 231 */
231struct workqueue_struct { 232struct workqueue_struct {
232 struct list_head pwqs; /* WR: all pwqs of this wq */ 233 struct list_head pwqs; /* WR: all pwqs of this wq */
233 struct list_head list; /* PL: list of all workqueues */ 234 struct list_head list; /* PR: list of all workqueues */
234 235
235 struct mutex mutex; /* protects this wq */ 236 struct mutex mutex; /* protects this wq */
236 int work_color; /* WQ: current work color */ 237 int work_color; /* WQ: current work color */
@@ -257,6 +258,13 @@ struct workqueue_struct {
257#endif 258#endif
258 char name[WQ_NAME_LEN]; /* I: workqueue name */ 259 char name[WQ_NAME_LEN]; /* I: workqueue name */
259 260
261 /*
262 * Destruction of workqueue_struct is sched-RCU protected to allow
263 * walking the workqueues list without grabbing wq_pool_mutex.
264 * This is used to dump all workqueues from sysrq.
265 */
266 struct rcu_head rcu;
267
260 /* hot fields used during command issue, aligned to cacheline */ 268 /* hot fields used during command issue, aligned to cacheline */
261 unsigned int flags ____cacheline_aligned; /* WQ: WQ_* flags */ 269 unsigned int flags ____cacheline_aligned; /* WQ: WQ_* flags */
262 struct pool_workqueue __percpu *cpu_pwqs; /* I: per-cpu pwqs */ 270 struct pool_workqueue __percpu *cpu_pwqs; /* I: per-cpu pwqs */
@@ -288,7 +296,7 @@ static struct workqueue_attrs *wq_update_unbound_numa_attrs_buf;
288static DEFINE_MUTEX(wq_pool_mutex); /* protects pools and workqueues list */ 296static DEFINE_MUTEX(wq_pool_mutex); /* protects pools and workqueues list */
289static DEFINE_SPINLOCK(wq_mayday_lock); /* protects wq->maydays list */ 297static DEFINE_SPINLOCK(wq_mayday_lock); /* protects wq->maydays list */
290 298
291static LIST_HEAD(workqueues); /* PL: list of all workqueues */ 299static LIST_HEAD(workqueues); /* PR: list of all workqueues */
292static bool workqueue_freezing; /* PL: have wqs started freezing? */ 300static bool workqueue_freezing; /* PL: have wqs started freezing? */
293 301
294/* the per-cpu worker pools */ 302/* the per-cpu worker pools */
@@ -324,6 +332,7 @@ EXPORT_SYMBOL_GPL(system_freezable_power_efficient_wq);
324static int worker_thread(void *__worker); 332static int worker_thread(void *__worker);
325static void copy_workqueue_attrs(struct workqueue_attrs *to, 333static void copy_workqueue_attrs(struct workqueue_attrs *to,
326 const struct workqueue_attrs *from); 334 const struct workqueue_attrs *from);
335static void workqueue_sysfs_unregister(struct workqueue_struct *wq);
327 336
328#define CREATE_TRACE_POINTS 337#define CREATE_TRACE_POINTS
329#include <trace/events/workqueue.h> 338#include <trace/events/workqueue.h>
@@ -1911,9 +1920,11 @@ static bool manage_workers(struct worker *worker)
1911 */ 1920 */
1912 if (!mutex_trylock(&pool->manager_arb)) 1921 if (!mutex_trylock(&pool->manager_arb))
1913 return false; 1922 return false;
1923 pool->manager = worker;
1914 1924
1915 maybe_create_worker(pool); 1925 maybe_create_worker(pool);
1916 1926
1927 pool->manager = NULL;
1917 mutex_unlock(&pool->manager_arb); 1928 mutex_unlock(&pool->manager_arb);
1918 return true; 1929 return true;
1919} 1930}
@@ -2303,6 +2314,7 @@ repeat:
2303struct wq_barrier { 2314struct wq_barrier {
2304 struct work_struct work; 2315 struct work_struct work;
2305 struct completion done; 2316 struct completion done;
2317 struct task_struct *task; /* purely informational */
2306}; 2318};
2307 2319
2308static void wq_barrier_func(struct work_struct *work) 2320static void wq_barrier_func(struct work_struct *work)
@@ -2351,6 +2363,7 @@ static void insert_wq_barrier(struct pool_workqueue *pwq,
2351 INIT_WORK_ONSTACK(&barr->work, wq_barrier_func); 2363 INIT_WORK_ONSTACK(&barr->work, wq_barrier_func);
2352 __set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(&barr->work)); 2364 __set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(&barr->work));
2353 init_completion(&barr->done); 2365 init_completion(&barr->done);
2366 barr->task = current;
2354 2367
2355 /* 2368 /*
2356 * If @target is currently being executed, schedule the 2369 * If @target is currently being executed, schedule the
@@ -2989,323 +3002,6 @@ int execute_in_process_context(work_func_t fn, struct execute_work *ew)
2989} 3002}
2990EXPORT_SYMBOL_GPL(execute_in_process_context); 3003EXPORT_SYMBOL_GPL(execute_in_process_context);
2991 3004
2992#ifdef CONFIG_SYSFS
2993/*
2994 * Workqueues with WQ_SYSFS flag set is visible to userland via
2995 * /sys/bus/workqueue/devices/WQ_NAME. All visible workqueues have the
2996 * following attributes.
2997 *
2998 * per_cpu RO bool : whether the workqueue is per-cpu or unbound
2999 * max_active RW int : maximum number of in-flight work items
3000 *
3001 * Unbound workqueues have the following extra attributes.
3002 *
3003 * id RO int : the associated pool ID
3004 * nice RW int : nice value of the workers
3005 * cpumask RW mask : bitmask of allowed CPUs for the workers
3006 */
3007struct wq_device {
3008 struct workqueue_struct *wq;
3009 struct device dev;
3010};
3011
3012static struct workqueue_struct *dev_to_wq(struct device *dev)
3013{
3014 struct wq_device *wq_dev = container_of(dev, struct wq_device, dev);
3015
3016 return wq_dev->wq;
3017}
3018
3019static ssize_t per_cpu_show(struct device *dev, struct device_attribute *attr,
3020 char *buf)
3021{
3022 struct workqueue_struct *wq = dev_to_wq(dev);
3023
3024 return scnprintf(buf, PAGE_SIZE, "%d\n", (bool)!(wq->flags & WQ_UNBOUND));
3025}
3026static DEVICE_ATTR_RO(per_cpu);
3027
3028static ssize_t max_active_show(struct device *dev,
3029 struct device_attribute *attr, char *buf)
3030{
3031 struct workqueue_struct *wq = dev_to_wq(dev);
3032
3033 return scnprintf(buf, PAGE_SIZE, "%d\n", wq->saved_max_active);
3034}
3035
3036static ssize_t max_active_store(struct device *dev,
3037 struct device_attribute *attr, const char *buf,
3038 size_t count)
3039{
3040 struct workqueue_struct *wq = dev_to_wq(dev);
3041 int val;
3042
3043 if (sscanf(buf, "%d", &val) != 1 || val <= 0)
3044 return -EINVAL;
3045
3046 workqueue_set_max_active(wq, val);
3047 return count;
3048}
3049static DEVICE_ATTR_RW(max_active);
3050
3051static struct attribute *wq_sysfs_attrs[] = {
3052 &dev_attr_per_cpu.attr,
3053 &dev_attr_max_active.attr,
3054 NULL,
3055};
3056ATTRIBUTE_GROUPS(wq_sysfs);
3057
3058static ssize_t wq_pool_ids_show(struct device *dev,
3059 struct device_attribute *attr, char *buf)
3060{
3061 struct workqueue_struct *wq = dev_to_wq(dev);
3062 const char *delim = "";
3063 int node, written = 0;
3064
3065 rcu_read_lock_sched();
3066 for_each_node(node) {
3067 written += scnprintf(buf + written, PAGE_SIZE - written,
3068 "%s%d:%d", delim, node,
3069 unbound_pwq_by_node(wq, node)->pool->id);
3070 delim = " ";
3071 }
3072 written += scnprintf(buf + written, PAGE_SIZE - written, "\n");
3073 rcu_read_unlock_sched();
3074
3075 return written;
3076}
3077
3078static ssize_t wq_nice_show(struct device *dev, struct device_attribute *attr,
3079 char *buf)
3080{
3081 struct workqueue_struct *wq = dev_to_wq(dev);
3082 int written;
3083
3084 mutex_lock(&wq->mutex);
3085 written = scnprintf(buf, PAGE_SIZE, "%d\n", wq->unbound_attrs->nice);
3086 mutex_unlock(&wq->mutex);
3087
3088 return written;
3089}
3090
3091/* prepare workqueue_attrs for sysfs store operations */
3092static struct workqueue_attrs *wq_sysfs_prep_attrs(struct workqueue_struct *wq)
3093{
3094 struct workqueue_attrs *attrs;
3095
3096 attrs = alloc_workqueue_attrs(GFP_KERNEL);
3097 if (!attrs)
3098 return NULL;
3099
3100 mutex_lock(&wq->mutex);
3101 copy_workqueue_attrs(attrs, wq->unbound_attrs);
3102 mutex_unlock(&wq->mutex);
3103 return attrs;
3104}
3105
3106static ssize_t wq_nice_store(struct device *dev, struct device_attribute *attr,
3107 const char *buf, size_t count)
3108{
3109 struct workqueue_struct *wq = dev_to_wq(dev);
3110 struct workqueue_attrs *attrs;
3111 int ret;
3112
3113 attrs = wq_sysfs_prep_attrs(wq);
3114 if (!attrs)
3115 return -ENOMEM;
3116
3117 if (sscanf(buf, "%d", &attrs->nice) == 1 &&
3118 attrs->nice >= MIN_NICE && attrs->nice <= MAX_NICE)
3119 ret = apply_workqueue_attrs(wq, attrs);
3120 else
3121 ret = -EINVAL;
3122
3123 free_workqueue_attrs(attrs);
3124 return ret ?: count;
3125}
3126
3127static ssize_t wq_cpumask_show(struct device *dev,
3128 struct device_attribute *attr, char *buf)
3129{
3130 struct workqueue_struct *wq = dev_to_wq(dev);
3131 int written;
3132
3133 mutex_lock(&wq->mutex);
3134 written = scnprintf(buf, PAGE_SIZE, "%*pb\n",
3135 cpumask_pr_args(wq->unbound_attrs->cpumask));
3136 mutex_unlock(&wq->mutex);
3137 return written;
3138}
3139
3140static ssize_t wq_cpumask_store(struct device *dev,
3141 struct device_attribute *attr,
3142 const char *buf, size_t count)
3143{
3144 struct workqueue_struct *wq = dev_to_wq(dev);
3145 struct workqueue_attrs *attrs;
3146 int ret;
3147
3148 attrs = wq_sysfs_prep_attrs(wq);
3149 if (!attrs)
3150 return -ENOMEM;
3151
3152 ret = cpumask_parse(buf, attrs->cpumask);
3153 if (!ret)
3154 ret = apply_workqueue_attrs(wq, attrs);
3155
3156 free_workqueue_attrs(attrs);
3157 return ret ?: count;
3158}
3159
3160static ssize_t wq_numa_show(struct device *dev, struct device_attribute *attr,
3161 char *buf)
3162{
3163 struct workqueue_struct *wq = dev_to_wq(dev);
3164 int written;
3165
3166 mutex_lock(&wq->mutex);
3167 written = scnprintf(buf, PAGE_SIZE, "%d\n",
3168 !wq->unbound_attrs->no_numa);
3169 mutex_unlock(&wq->mutex);
3170
3171 return written;
3172}
3173
3174static ssize_t wq_numa_store(struct device *dev, struct device_attribute *attr,
3175 const char *buf, size_t count)
3176{
3177 struct workqueue_struct *wq = dev_to_wq(dev);
3178 struct workqueue_attrs *attrs;
3179 int v, ret;
3180
3181 attrs = wq_sysfs_prep_attrs(wq);
3182 if (!attrs)
3183 return -ENOMEM;
3184
3185 ret = -EINVAL;
3186 if (sscanf(buf, "%d", &v) == 1) {
3187 attrs->no_numa = !v;
3188 ret = apply_workqueue_attrs(wq, attrs);
3189 }
3190
3191 free_workqueue_attrs(attrs);
3192 return ret ?: count;
3193}
3194
3195static struct device_attribute wq_sysfs_unbound_attrs[] = {
3196 __ATTR(pool_ids, 0444, wq_pool_ids_show, NULL),
3197 __ATTR(nice, 0644, wq_nice_show, wq_nice_store),
3198 __ATTR(cpumask, 0644, wq_cpumask_show, wq_cpumask_store),
3199 __ATTR(numa, 0644, wq_numa_show, wq_numa_store),
3200 __ATTR_NULL,
3201};
3202
3203static struct bus_type wq_subsys = {
3204 .name = "workqueue",
3205 .dev_groups = wq_sysfs_groups,
3206};
3207
3208static int __init wq_sysfs_init(void)
3209{
3210 return subsys_virtual_register(&wq_subsys, NULL);
3211}
3212core_initcall(wq_sysfs_init);
3213
3214static void wq_device_release(struct device *dev)
3215{
3216 struct wq_device *wq_dev = container_of(dev, struct wq_device, dev);
3217
3218 kfree(wq_dev);
3219}
3220
3221/**
3222 * workqueue_sysfs_register - make a workqueue visible in sysfs
3223 * @wq: the workqueue to register
3224 *
3225 * Expose @wq in sysfs under /sys/bus/workqueue/devices.
3226 * alloc_workqueue*() automatically calls this function if WQ_SYSFS is set
3227 * which is the preferred method.
3228 *
3229 * Workqueue user should use this function directly iff it wants to apply
3230 * workqueue_attrs before making the workqueue visible in sysfs; otherwise,
3231 * apply_workqueue_attrs() may race against userland updating the
3232 * attributes.
3233 *
3234 * Return: 0 on success, -errno on failure.
3235 */
3236int workqueue_sysfs_register(struct workqueue_struct *wq)
3237{
3238 struct wq_device *wq_dev;
3239 int ret;
3240
3241 /*
3242 * Adjusting max_active or creating new pwqs by applyting
3243 * attributes breaks ordering guarantee. Disallow exposing ordered
3244 * workqueues.
3245 */
3246 if (WARN_ON(wq->flags & __WQ_ORDERED))
3247 return -EINVAL;
3248
3249 wq->wq_dev = wq_dev = kzalloc(sizeof(*wq_dev), GFP_KERNEL);
3250 if (!wq_dev)
3251 return -ENOMEM;
3252
3253 wq_dev->wq = wq;
3254 wq_dev->dev.bus = &wq_subsys;
3255 wq_dev->dev.init_name = wq->name;
3256 wq_dev->dev.release = wq_device_release;
3257
3258 /*
3259 * unbound_attrs are created separately. Suppress uevent until
3260 * everything is ready.
3261 */
3262 dev_set_uevent_suppress(&wq_dev->dev, true);
3263
3264 ret = device_register(&wq_dev->dev);
3265 if (ret) {
3266 kfree(wq_dev);
3267 wq->wq_dev = NULL;
3268 return ret;
3269 }
3270
3271 if (wq->flags & WQ_UNBOUND) {
3272 struct device_attribute *attr;
3273
3274 for (attr = wq_sysfs_unbound_attrs; attr->attr.name; attr++) {
3275 ret = device_create_file(&wq_dev->dev, attr);
3276 if (ret) {
3277 device_unregister(&wq_dev->dev);
3278 wq->wq_dev = NULL;
3279 return ret;
3280 }
3281 }
3282 }
3283
3284 dev_set_uevent_suppress(&wq_dev->dev, false);
3285 kobject_uevent(&wq_dev->dev.kobj, KOBJ_ADD);
3286 return 0;
3287}
3288
3289/**
3290 * workqueue_sysfs_unregister - undo workqueue_sysfs_register()
3291 * @wq: the workqueue to unregister
3292 *
3293 * If @wq is registered to sysfs by workqueue_sysfs_register(), unregister.
3294 */
3295static void workqueue_sysfs_unregister(struct workqueue_struct *wq)
3296{
3297 struct wq_device *wq_dev = wq->wq_dev;
3298
3299 if (!wq->wq_dev)
3300 return;
3301
3302 wq->wq_dev = NULL;
3303 device_unregister(&wq_dev->dev);
3304}
3305#else /* CONFIG_SYSFS */
3306static void workqueue_sysfs_unregister(struct workqueue_struct *wq) { }
3307#endif /* CONFIG_SYSFS */
3308
3309/** 3005/**
3310 * free_workqueue_attrs - free a workqueue_attrs 3006 * free_workqueue_attrs - free a workqueue_attrs
3311 * @attrs: workqueue_attrs to free 3007 * @attrs: workqueue_attrs to free
@@ -3424,6 +3120,20 @@ static int init_worker_pool(struct worker_pool *pool)
3424 return 0; 3120 return 0;
3425} 3121}
3426 3122
3123static void rcu_free_wq(struct rcu_head *rcu)
3124{
3125 struct workqueue_struct *wq =
3126 container_of(rcu, struct workqueue_struct, rcu);
3127
3128 if (!(wq->flags & WQ_UNBOUND))
3129 free_percpu(wq->cpu_pwqs);
3130 else
3131 free_workqueue_attrs(wq->unbound_attrs);
3132
3133 kfree(wq->rescuer);
3134 kfree(wq);
3135}
3136
3427static void rcu_free_pool(struct rcu_head *rcu) 3137static void rcu_free_pool(struct rcu_head *rcu)
3428{ 3138{
3429 struct worker_pool *pool = container_of(rcu, struct worker_pool, rcu); 3139 struct worker_pool *pool = container_of(rcu, struct worker_pool, rcu);
@@ -3601,12 +3311,10 @@ static void pwq_unbound_release_workfn(struct work_struct *work)
3601 3311
3602 /* 3312 /*
3603 * If we're the last pwq going away, @wq is already dead and no one 3313 * If we're the last pwq going away, @wq is already dead and no one
3604 * is gonna access it anymore. Free it. 3314 * is gonna access it anymore. Schedule RCU free.
3605 */ 3315 */
3606 if (is_last) { 3316 if (is_last)
3607 free_workqueue_attrs(wq->unbound_attrs); 3317 call_rcu_sched(&wq->rcu, rcu_free_wq);
3608 kfree(wq);
3609 }
3610} 3318}
3611 3319
3612/** 3320/**
@@ -4143,7 +3851,7 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
4143 pwq_adjust_max_active(pwq); 3851 pwq_adjust_max_active(pwq);
4144 mutex_unlock(&wq->mutex); 3852 mutex_unlock(&wq->mutex);
4145 3853
4146 list_add(&wq->list, &workqueues); 3854 list_add_tail_rcu(&wq->list, &workqueues);
4147 3855
4148 mutex_unlock(&wq_pool_mutex); 3856 mutex_unlock(&wq_pool_mutex);
4149 3857
@@ -4199,24 +3907,20 @@ void destroy_workqueue(struct workqueue_struct *wq)
4199 * flushing is complete in case freeze races us. 3907 * flushing is complete in case freeze races us.
4200 */ 3908 */
4201 mutex_lock(&wq_pool_mutex); 3909 mutex_lock(&wq_pool_mutex);
4202 list_del_init(&wq->list); 3910 list_del_rcu(&wq->list);
4203 mutex_unlock(&wq_pool_mutex); 3911 mutex_unlock(&wq_pool_mutex);
4204 3912
4205 workqueue_sysfs_unregister(wq); 3913 workqueue_sysfs_unregister(wq);
4206 3914
4207 if (wq->rescuer) { 3915 if (wq->rescuer)
4208 kthread_stop(wq->rescuer->task); 3916 kthread_stop(wq->rescuer->task);
4209 kfree(wq->rescuer);
4210 wq->rescuer = NULL;
4211 }
4212 3917
4213 if (!(wq->flags & WQ_UNBOUND)) { 3918 if (!(wq->flags & WQ_UNBOUND)) {
4214 /* 3919 /*
4215 * The base ref is never dropped on per-cpu pwqs. Directly 3920 * The base ref is never dropped on per-cpu pwqs. Directly
4216 * free the pwqs and wq. 3921 * schedule RCU free.
4217 */ 3922 */
4218 free_percpu(wq->cpu_pwqs); 3923 call_rcu_sched(&wq->rcu, rcu_free_wq);
4219 kfree(wq);
4220 } else { 3924 } else {
4221 /* 3925 /*
4222 * We're the sole accessor of @wq at this point. Directly 3926 * We're the sole accessor of @wq at this point. Directly
@@ -4437,6 +4141,166 @@ void print_worker_info(const char *log_lvl, struct task_struct *task)
4437 } 4141 }
4438} 4142}
4439 4143
4144static void pr_cont_pool_info(struct worker_pool *pool)
4145{
4146 pr_cont(" cpus=%*pbl", nr_cpumask_bits, pool->attrs->cpumask);
4147 if (pool->node != NUMA_NO_NODE)
4148 pr_cont(" node=%d", pool->node);
4149 pr_cont(" flags=0x%x nice=%d", pool->flags, pool->attrs->nice);
4150}
4151
4152static void pr_cont_work(bool comma, struct work_struct *work)
4153{
4154 if (work->func == wq_barrier_func) {
4155 struct wq_barrier *barr;
4156
4157 barr = container_of(work, struct wq_barrier, work);
4158
4159 pr_cont("%s BAR(%d)", comma ? "," : "",
4160 task_pid_nr(barr->task));
4161 } else {
4162 pr_cont("%s %pf", comma ? "," : "", work->func);
4163 }
4164}
4165
4166static void show_pwq(struct pool_workqueue *pwq)
4167{
4168 struct worker_pool *pool = pwq->pool;
4169 struct work_struct *work;
4170 struct worker *worker;
4171 bool has_in_flight = false, has_pending = false;
4172 int bkt;
4173
4174 pr_info(" pwq %d:", pool->id);
4175 pr_cont_pool_info(pool);
4176
4177 pr_cont(" active=%d/%d%s\n", pwq->nr_active, pwq->max_active,
4178 !list_empty(&pwq->mayday_node) ? " MAYDAY" : "");
4179
4180 hash_for_each(pool->busy_hash, bkt, worker, hentry) {
4181 if (worker->current_pwq == pwq) {
4182 has_in_flight = true;
4183 break;
4184 }
4185 }
4186 if (has_in_flight) {
4187 bool comma = false;
4188
4189 pr_info(" in-flight:");
4190 hash_for_each(pool->busy_hash, bkt, worker, hentry) {
4191 if (worker->current_pwq != pwq)
4192 continue;
4193
4194 pr_cont("%s %d%s:%pf", comma ? "," : "",
4195 task_pid_nr(worker->task),
4196 worker == pwq->wq->rescuer ? "(RESCUER)" : "",
4197 worker->current_func);
4198 list_for_each_entry(work, &worker->scheduled, entry)
4199 pr_cont_work(false, work);
4200 comma = true;
4201 }
4202 pr_cont("\n");
4203 }
4204
4205 list_for_each_entry(work, &pool->worklist, entry) {
4206 if (get_work_pwq(work) == pwq) {
4207 has_pending = true;
4208 break;
4209 }
4210 }
4211 if (has_pending) {
4212 bool comma = false;
4213
4214 pr_info(" pending:");
4215 list_for_each_entry(work, &pool->worklist, entry) {
4216 if (get_work_pwq(work) != pwq)
4217 continue;
4218
4219 pr_cont_work(comma, work);
4220 comma = !(*work_data_bits(work) & WORK_STRUCT_LINKED);
4221 }
4222 pr_cont("\n");
4223 }
4224
4225 if (!list_empty(&pwq->delayed_works)) {
4226 bool comma = false;
4227
4228 pr_info(" delayed:");
4229 list_for_each_entry(work, &pwq->delayed_works, entry) {
4230 pr_cont_work(comma, work);
4231 comma = !(*work_data_bits(work) & WORK_STRUCT_LINKED);
4232 }
4233 pr_cont("\n");
4234 }
4235}
4236
4237/**
4238 * show_workqueue_state - dump workqueue state
4239 *
4240 * Called from a sysrq handler and prints out all busy workqueues and
4241 * pools.
4242 */
4243void show_workqueue_state(void)
4244{
4245 struct workqueue_struct *wq;
4246 struct worker_pool *pool;
4247 unsigned long flags;
4248 int pi;
4249
4250 rcu_read_lock_sched();
4251
4252 pr_info("Showing busy workqueues and worker pools:\n");
4253
4254 list_for_each_entry_rcu(wq, &workqueues, list) {
4255 struct pool_workqueue *pwq;
4256 bool idle = true;
4257
4258 for_each_pwq(pwq, wq) {
4259 if (pwq->nr_active || !list_empty(&pwq->delayed_works)) {
4260 idle = false;
4261 break;
4262 }
4263 }
4264 if (idle)
4265 continue;
4266
4267 pr_info("workqueue %s: flags=0x%x\n", wq->name, wq->flags);
4268
4269 for_each_pwq(pwq, wq) {
4270 spin_lock_irqsave(&pwq->pool->lock, flags);
4271 if (pwq->nr_active || !list_empty(&pwq->delayed_works))
4272 show_pwq(pwq);
4273 spin_unlock_irqrestore(&pwq->pool->lock, flags);
4274 }
4275 }
4276
4277 for_each_pool(pool, pi) {
4278 struct worker *worker;
4279 bool first = true;
4280
4281 spin_lock_irqsave(&pool->lock, flags);
4282 if (pool->nr_workers == pool->nr_idle)
4283 goto next_pool;
4284
4285 pr_info("pool %d:", pool->id);
4286 pr_cont_pool_info(pool);
4287 pr_cont(" workers=%d", pool->nr_workers);
4288 if (pool->manager)
4289 pr_cont(" manager: %d",
4290 task_pid_nr(pool->manager->task));
4291 list_for_each_entry(worker, &pool->idle_list, entry) {
4292 pr_cont(" %s%d", first ? "idle: " : "",
4293 task_pid_nr(worker->task));
4294 first = false;
4295 }
4296 pr_cont("\n");
4297 next_pool:
4298 spin_unlock_irqrestore(&pool->lock, flags);
4299 }
4300
4301 rcu_read_unlock_sched();
4302}
4303
4440/* 4304/*
4441 * CPU hotplug. 4305 * CPU hotplug.
4442 * 4306 *
@@ -4834,6 +4698,323 @@ out_unlock:
4834} 4698}
4835#endif /* CONFIG_FREEZER */ 4699#endif /* CONFIG_FREEZER */
4836 4700
4701#ifdef CONFIG_SYSFS
4702/*
4703 * Workqueues with WQ_SYSFS flag set is visible to userland via
4704 * /sys/bus/workqueue/devices/WQ_NAME. All visible workqueues have the
4705 * following attributes.
4706 *
4707 * per_cpu RO bool : whether the workqueue is per-cpu or unbound
4708 * max_active RW int : maximum number of in-flight work items
4709 *
4710 * Unbound workqueues have the following extra attributes.
4711 *
4712 * id RO int : the associated pool ID
4713 * nice RW int : nice value of the workers
4714 * cpumask RW mask : bitmask of allowed CPUs for the workers
4715 */
4716struct wq_device {
4717 struct workqueue_struct *wq;
4718 struct device dev;
4719};
4720
4721static struct workqueue_struct *dev_to_wq(struct device *dev)
4722{
4723 struct wq_device *wq_dev = container_of(dev, struct wq_device, dev);
4724
4725 return wq_dev->wq;
4726}
4727
4728static ssize_t per_cpu_show(struct device *dev, struct device_attribute *attr,
4729 char *buf)
4730{
4731 struct workqueue_struct *wq = dev_to_wq(dev);
4732
4733 return scnprintf(buf, PAGE_SIZE, "%d\n", (bool)!(wq->flags & WQ_UNBOUND));
4734}
4735static DEVICE_ATTR_RO(per_cpu);
4736
4737static ssize_t max_active_show(struct device *dev,
4738 struct device_attribute *attr, char *buf)
4739{
4740 struct workqueue_struct *wq = dev_to_wq(dev);
4741
4742 return scnprintf(buf, PAGE_SIZE, "%d\n", wq->saved_max_active);
4743}
4744
4745static ssize_t max_active_store(struct device *dev,
4746 struct device_attribute *attr, const char *buf,
4747 size_t count)
4748{
4749 struct workqueue_struct *wq = dev_to_wq(dev);
4750 int val;
4751
4752 if (sscanf(buf, "%d", &val) != 1 || val <= 0)
4753 return -EINVAL;
4754
4755 workqueue_set_max_active(wq, val);
4756 return count;
4757}
4758static DEVICE_ATTR_RW(max_active);
4759
4760static struct attribute *wq_sysfs_attrs[] = {
4761 &dev_attr_per_cpu.attr,
4762 &dev_attr_max_active.attr,
4763 NULL,
4764};
4765ATTRIBUTE_GROUPS(wq_sysfs);
4766
4767static ssize_t wq_pool_ids_show(struct device *dev,
4768 struct device_attribute *attr, char *buf)
4769{
4770 struct workqueue_struct *wq = dev_to_wq(dev);
4771 const char *delim = "";
4772 int node, written = 0;
4773
4774 rcu_read_lock_sched();
4775 for_each_node(node) {
4776 written += scnprintf(buf + written, PAGE_SIZE - written,
4777 "%s%d:%d", delim, node,
4778 unbound_pwq_by_node(wq, node)->pool->id);
4779 delim = " ";
4780 }
4781 written += scnprintf(buf + written, PAGE_SIZE - written, "\n");
4782 rcu_read_unlock_sched();
4783
4784 return written;
4785}
4786
4787static ssize_t wq_nice_show(struct device *dev, struct device_attribute *attr,
4788 char *buf)
4789{
4790 struct workqueue_struct *wq = dev_to_wq(dev);
4791 int written;
4792
4793 mutex_lock(&wq->mutex);
4794 written = scnprintf(buf, PAGE_SIZE, "%d\n", wq->unbound_attrs->nice);
4795 mutex_unlock(&wq->mutex);
4796
4797 return written;
4798}
4799
4800/* prepare workqueue_attrs for sysfs store operations */
4801static struct workqueue_attrs *wq_sysfs_prep_attrs(struct workqueue_struct *wq)
4802{
4803 struct workqueue_attrs *attrs;
4804
4805 attrs = alloc_workqueue_attrs(GFP_KERNEL);
4806 if (!attrs)
4807 return NULL;
4808
4809 mutex_lock(&wq->mutex);
4810 copy_workqueue_attrs(attrs, wq->unbound_attrs);
4811 mutex_unlock(&wq->mutex);
4812 return attrs;
4813}
4814
4815static ssize_t wq_nice_store(struct device *dev, struct device_attribute *attr,
4816 const char *buf, size_t count)
4817{
4818 struct workqueue_struct *wq = dev_to_wq(dev);
4819 struct workqueue_attrs *attrs;
4820 int ret;
4821
4822 attrs = wq_sysfs_prep_attrs(wq);
4823 if (!attrs)
4824 return -ENOMEM;
4825
4826 if (sscanf(buf, "%d", &attrs->nice) == 1 &&
4827 attrs->nice >= MIN_NICE && attrs->nice <= MAX_NICE)
4828 ret = apply_workqueue_attrs(wq, attrs);
4829 else
4830 ret = -EINVAL;
4831
4832 free_workqueue_attrs(attrs);
4833 return ret ?: count;
4834}
4835
4836static ssize_t wq_cpumask_show(struct device *dev,
4837 struct device_attribute *attr, char *buf)
4838{
4839 struct workqueue_struct *wq = dev_to_wq(dev);
4840 int written;
4841
4842 mutex_lock(&wq->mutex);
4843 written = scnprintf(buf, PAGE_SIZE, "%*pb\n",
4844 cpumask_pr_args(wq->unbound_attrs->cpumask));
4845 mutex_unlock(&wq->mutex);
4846 return written;
4847}
4848
4849static ssize_t wq_cpumask_store(struct device *dev,
4850 struct device_attribute *attr,
4851 const char *buf, size_t count)
4852{
4853 struct workqueue_struct *wq = dev_to_wq(dev);
4854 struct workqueue_attrs *attrs;
4855 int ret;
4856
4857 attrs = wq_sysfs_prep_attrs(wq);
4858 if (!attrs)
4859 return -ENOMEM;
4860
4861 ret = cpumask_parse(buf, attrs->cpumask);
4862 if (!ret)
4863 ret = apply_workqueue_attrs(wq, attrs);
4864
4865 free_workqueue_attrs(attrs);
4866 return ret ?: count;
4867}
4868
4869static ssize_t wq_numa_show(struct device *dev, struct device_attribute *attr,
4870 char *buf)
4871{
4872 struct workqueue_struct *wq = dev_to_wq(dev);
4873 int written;
4874
4875 mutex_lock(&wq->mutex);
4876 written = scnprintf(buf, PAGE_SIZE, "%d\n",
4877 !wq->unbound_attrs->no_numa);
4878 mutex_unlock(&wq->mutex);
4879
4880 return written;
4881}
4882
4883static ssize_t wq_numa_store(struct device *dev, struct device_attribute *attr,
4884 const char *buf, size_t count)
4885{
4886 struct workqueue_struct *wq = dev_to_wq(dev);
4887 struct workqueue_attrs *attrs;
4888 int v, ret;
4889
4890 attrs = wq_sysfs_prep_attrs(wq);
4891 if (!attrs)
4892 return -ENOMEM;
4893
4894 ret = -EINVAL;
4895 if (sscanf(buf, "%d", &v) == 1) {
4896 attrs->no_numa = !v;
4897 ret = apply_workqueue_attrs(wq, attrs);
4898 }
4899
4900 free_workqueue_attrs(attrs);
4901 return ret ?: count;
4902}
4903
4904static struct device_attribute wq_sysfs_unbound_attrs[] = {
4905 __ATTR(pool_ids, 0444, wq_pool_ids_show, NULL),
4906 __ATTR(nice, 0644, wq_nice_show, wq_nice_store),
4907 __ATTR(cpumask, 0644, wq_cpumask_show, wq_cpumask_store),
4908 __ATTR(numa, 0644, wq_numa_show, wq_numa_store),
4909 __ATTR_NULL,
4910};
4911
4912static struct bus_type wq_subsys = {
4913 .name = "workqueue",
4914 .dev_groups = wq_sysfs_groups,
4915};
4916
4917static int __init wq_sysfs_init(void)
4918{
4919 return subsys_virtual_register(&wq_subsys, NULL);
4920}
4921core_initcall(wq_sysfs_init);
4922
4923static void wq_device_release(struct device *dev)
4924{
4925 struct wq_device *wq_dev = container_of(dev, struct wq_device, dev);
4926
4927 kfree(wq_dev);
4928}
4929
4930/**
4931 * workqueue_sysfs_register - make a workqueue visible in sysfs
4932 * @wq: the workqueue to register
4933 *
4934 * Expose @wq in sysfs under /sys/bus/workqueue/devices.
4935 * alloc_workqueue*() automatically calls this function if WQ_SYSFS is set
4936 * which is the preferred method.
4937 *
4938 * Workqueue user should use this function directly iff it wants to apply
4939 * workqueue_attrs before making the workqueue visible in sysfs; otherwise,
4940 * apply_workqueue_attrs() may race against userland updating the
4941 * attributes.
4942 *
4943 * Return: 0 on success, -errno on failure.
4944 */
4945int workqueue_sysfs_register(struct workqueue_struct *wq)
4946{
4947 struct wq_device *wq_dev;
4948 int ret;
4949
4950 /*
4951 * Adjusting max_active or creating new pwqs by applyting
4952 * attributes breaks ordering guarantee. Disallow exposing ordered
4953 * workqueues.
4954 */
4955 if (WARN_ON(wq->flags & __WQ_ORDERED))
4956 return -EINVAL;
4957
4958 wq->wq_dev = wq_dev = kzalloc(sizeof(*wq_dev), GFP_KERNEL);
4959 if (!wq_dev)
4960 return -ENOMEM;
4961
4962 wq_dev->wq = wq;
4963 wq_dev->dev.bus = &wq_subsys;
4964 wq_dev->dev.init_name = wq->name;
4965 wq_dev->dev.release = wq_device_release;
4966
4967 /*
4968 * unbound_attrs are created separately. Suppress uevent until
4969 * everything is ready.
4970 */
4971 dev_set_uevent_suppress(&wq_dev->dev, true);
4972
4973 ret = device_register(&wq_dev->dev);
4974 if (ret) {
4975 kfree(wq_dev);
4976 wq->wq_dev = NULL;
4977 return ret;
4978 }
4979
4980 if (wq->flags & WQ_UNBOUND) {
4981 struct device_attribute *attr;
4982
4983 for (attr = wq_sysfs_unbound_attrs; attr->attr.name; attr++) {
4984 ret = device_create_file(&wq_dev->dev, attr);
4985 if (ret) {
4986 device_unregister(&wq_dev->dev);
4987 wq->wq_dev = NULL;
4988 return ret;
4989 }
4990 }
4991 }
4992
4993 dev_set_uevent_suppress(&wq_dev->dev, false);
4994 kobject_uevent(&wq_dev->dev.kobj, KOBJ_ADD);
4995 return 0;
4996}
4997
4998/**
4999 * workqueue_sysfs_unregister - undo workqueue_sysfs_register()
5000 * @wq: the workqueue to unregister
5001 *
5002 * If @wq is registered to sysfs by workqueue_sysfs_register(), unregister.
5003 */
5004static void workqueue_sysfs_unregister(struct workqueue_struct *wq)
5005{
5006 struct wq_device *wq_dev = wq->wq_dev;
5007
5008 if (!wq->wq_dev)
5009 return;
5010
5011 wq->wq_dev = NULL;
5012 device_unregister(&wq_dev->dev);
5013}
5014#else /* CONFIG_SYSFS */
5015static void workqueue_sysfs_unregister(struct workqueue_struct *wq) { }
5016#endif /* CONFIG_SYSFS */
5017
4837static void __init wq_numa_init(void) 5018static void __init wq_numa_init(void)
4838{ 5019{
4839 cpumask_var_t *tbl; 5020 cpumask_var_t *tbl;
diff --git a/mm/percpu.c b/mm/percpu.c
index 73c97a5f4495..dfd02484e8de 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -1310,7 +1310,7 @@ bool is_kernel_percpu_address(unsigned long addr)
1310 * and, from the second one, the backing allocator (currently either vm or 1310 * and, from the second one, the backing allocator (currently either vm or
1311 * km) provides translation. 1311 * km) provides translation.
1312 * 1312 *
1313 * The addr can be tranlated simply without checking if it falls into the 1313 * The addr can be translated simply without checking if it falls into the
1314 * first chunk. But the current code reflects better how percpu allocator 1314 * first chunk. But the current code reflects better how percpu allocator
1315 * actually works, and the verification can discover both bugs in percpu 1315 * actually works, and the verification can discover both bugs in percpu
1316 * allocator itself and per_cpu_ptr_to_phys() callers. So we keep current 1316 * allocator itself and per_cpu_ptr_to_phys() callers. So we keep current
@@ -1762,7 +1762,7 @@ early_param("percpu_alloc", percpu_alloc_setup);
1762 * and other parameters considering needed percpu size, allocation 1762 * and other parameters considering needed percpu size, allocation
1763 * atom size and distances between CPUs. 1763 * atom size and distances between CPUs.
1764 * 1764 *
1765 * Groups are always mutliples of atom size and CPUs which are of 1765 * Groups are always multiples of atom size and CPUs which are of
1766 * LOCAL_DISTANCE both ways are grouped together and share space for 1766 * LOCAL_DISTANCE both ways are grouped together and share space for
1767 * units in the same group. The returned configuration is guaranteed 1767 * units in the same group. The returned configuration is guaranteed
1768 * to have CPUs on different nodes on different groups and >=75% usage 1768 * to have CPUs on different nodes on different groups and >=75% usage