diff options
Diffstat (limited to 'fs/btrfs/async-thread.c')
| -rw-r--r-- | fs/btrfs/async-thread.c | 848 |
1 files changed, 227 insertions, 621 deletions
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index c1e0b0caf9cc..ecb5832c0967 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c | |||
| @@ -1,5 +1,6 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * Copyright (C) 2007 Oracle. All rights reserved. | 2 | * Copyright (C) 2007 Oracle. All rights reserved. |
| 3 | * Copyright (C) 2014 Fujitsu. All rights reserved. | ||
| 3 | * | 4 | * |
| 4 | * This program is free software; you can redistribute it and/or | 5 | * This program is free software; you can redistribute it and/or |
| 5 | * modify it under the terms of the GNU General Public | 6 | * modify it under the terms of the GNU General Public |
| @@ -21,708 +22,313 @@ | |||
| 21 | #include <linux/list.h> | 22 | #include <linux/list.h> |
| 22 | #include <linux/spinlock.h> | 23 | #include <linux/spinlock.h> |
| 23 | #include <linux/freezer.h> | 24 | #include <linux/freezer.h> |
| 25 | #include <linux/workqueue.h> | ||
| 24 | #include "async-thread.h" | 26 | #include "async-thread.h" |
| 27 | #include "ctree.h" | ||
| 28 | |||
| 29 | #define WORK_DONE_BIT 0 | ||
| 30 | #define WORK_ORDER_DONE_BIT 1 | ||
| 31 | #define WORK_HIGH_PRIO_BIT 2 | ||
| 32 | |||
| 33 | #define NO_THRESHOLD (-1) | ||
| 34 | #define DFT_THRESHOLD (32) | ||
| 35 | |||
| 36 | struct __btrfs_workqueue { | ||
| 37 | struct workqueue_struct *normal_wq; | ||
| 38 | /* List head pointing to ordered work list */ | ||
| 39 | struct list_head ordered_list; | ||
| 40 | |||
| 41 | /* Spinlock for ordered_list */ | ||
| 42 | spinlock_t list_lock; | ||
| 43 | |||
| 44 | /* Thresholding related variants */ | ||
| 45 | atomic_t pending; | ||
| 46 | int max_active; | ||
| 47 | int current_max; | ||
| 48 | int thresh; | ||
| 49 | unsigned int count; | ||
| 50 | spinlock_t thres_lock; | ||
| 51 | }; | ||
| 25 | 52 | ||
| 26 | #define WORK_QUEUED_BIT 0 | 53 | struct btrfs_workqueue { |
| 27 | #define WORK_DONE_BIT 1 | 54 | struct __btrfs_workqueue *normal; |
| 28 | #define WORK_ORDER_DONE_BIT 2 | 55 | struct __btrfs_workqueue *high; |
| 29 | #define WORK_HIGH_PRIO_BIT 3 | 56 | }; |
| 30 | |||
| 31 | /* | ||
| 32 | * container for the kthread task pointer and the list of pending work | ||
| 33 | * One of these is allocated per thread. | ||
| 34 | */ | ||
| 35 | struct btrfs_worker_thread { | ||
| 36 | /* pool we belong to */ | ||
| 37 | struct btrfs_workers *workers; | ||
| 38 | |||
| 39 | /* list of struct btrfs_work that are waiting for service */ | ||
| 40 | struct list_head pending; | ||
| 41 | struct list_head prio_pending; | ||
| 42 | |||
| 43 | /* list of worker threads from struct btrfs_workers */ | ||
| 44 | struct list_head worker_list; | ||
| 45 | |||
| 46 | /* kthread */ | ||
| 47 | struct task_struct *task; | ||
| 48 | 57 | ||
| 49 | /* number of things on the pending list */ | 58 | static inline struct __btrfs_workqueue |
| 50 | atomic_t num_pending; | 59 | *__btrfs_alloc_workqueue(const char *name, int flags, int max_active, |
| 60 | int thresh) | ||
| 61 | { | ||
| 62 | struct __btrfs_workqueue *ret = kzalloc(sizeof(*ret), GFP_NOFS); | ||
| 51 | 63 | ||
| 52 | /* reference counter for this struct */ | 64 | if (unlikely(!ret)) |
| 53 | atomic_t refs; | 65 | return NULL; |
| 54 | 66 | ||
| 55 | unsigned long sequence; | 67 | ret->max_active = max_active; |
| 68 | atomic_set(&ret->pending, 0); | ||
| 69 | if (thresh == 0) | ||
| 70 | thresh = DFT_THRESHOLD; | ||
| 71 | /* For low threshold, disabling threshold is a better choice */ | ||
| 72 | if (thresh < DFT_THRESHOLD) { | ||
| 73 | ret->current_max = max_active; | ||
| 74 | ret->thresh = NO_THRESHOLD; | ||
| 75 | } else { | ||
| 76 | ret->current_max = 1; | ||
| 77 | ret->thresh = thresh; | ||
| 78 | } | ||
| 56 | 79 | ||
| 57 | /* protects the pending list. */ | 80 | if (flags & WQ_HIGHPRI) |
| 58 | spinlock_t lock; | 81 | ret->normal_wq = alloc_workqueue("%s-%s-high", flags, |
| 82 | ret->max_active, | ||
| 83 | "btrfs", name); | ||
| 84 | else | ||
| 85 | ret->normal_wq = alloc_workqueue("%s-%s", flags, | ||
| 86 | ret->max_active, "btrfs", | ||
| 87 | name); | ||
| 88 | if (unlikely(!ret->normal_wq)) { | ||
| 89 | kfree(ret); | ||
| 90 | return NULL; | ||
| 91 | } | ||
| 59 | 92 | ||
| 60 | /* set to non-zero when this thread is already awake and kicking */ | 93 | INIT_LIST_HEAD(&ret->ordered_list); |
| 61 | int working; | 94 | spin_lock_init(&ret->list_lock); |
| 95 | spin_lock_init(&ret->thres_lock); | ||
| 96 | trace_btrfs_workqueue_alloc(ret, name, flags & WQ_HIGHPRI); | ||
| 97 | return ret; | ||
| 98 | } | ||
| 62 | 99 | ||
| 63 | /* are we currently idle */ | 100 | static inline void |
| 64 | int idle; | 101 | __btrfs_destroy_workqueue(struct __btrfs_workqueue *wq); |
| 65 | }; | ||
| 66 | 102 | ||
| 67 | static int __btrfs_start_workers(struct btrfs_workers *workers); | 103 | struct btrfs_workqueue *btrfs_alloc_workqueue(const char *name, |
| 104 | int flags, | ||
| 105 | int max_active, | ||
| 106 | int thresh) | ||
| 107 | { | ||
| 108 | struct btrfs_workqueue *ret = kzalloc(sizeof(*ret), GFP_NOFS); | ||
| 68 | 109 | ||
| 69 | /* | 110 | if (unlikely(!ret)) |
| 70 | * btrfs_start_workers uses kthread_run, which can block waiting for memory | 111 | return NULL; |
| 71 | * for a very long time. It will actually throttle on page writeback, | ||
| 72 | * and so it may not make progress until after our btrfs worker threads | ||
| 73 | * process all of the pending work structs in their queue | ||
| 74 | * | ||
| 75 | * This means we can't use btrfs_start_workers from inside a btrfs worker | ||
| 76 | * thread that is used as part of cleaning dirty memory, which pretty much | ||
| 77 | * involves all of the worker threads. | ||
| 78 | * | ||
| 79 | * Instead we have a helper queue who never has more than one thread | ||
| 80 | * where we scheduler thread start operations. This worker_start struct | ||
| 81 | * is used to contain the work and hold a pointer to the queue that needs | ||
| 82 | * another worker. | ||
| 83 | */ | ||
| 84 | struct worker_start { | ||
| 85 | struct btrfs_work work; | ||
| 86 | struct btrfs_workers *queue; | ||
| 87 | }; | ||
| 88 | 112 | ||
| 89 | static void start_new_worker_func(struct btrfs_work *work) | 113 | ret->normal = __btrfs_alloc_workqueue(name, flags & ~WQ_HIGHPRI, |
| 90 | { | 114 | max_active, thresh); |
| 91 | struct worker_start *start; | 115 | if (unlikely(!ret->normal)) { |
| 92 | start = container_of(work, struct worker_start, work); | 116 | kfree(ret); |
| 93 | __btrfs_start_workers(start->queue); | 117 | return NULL; |
| 94 | kfree(start); | 118 | } |
| 95 | } | ||
| 96 | 119 | ||
| 97 | /* | 120 | if (flags & WQ_HIGHPRI) { |
| 98 | * helper function to move a thread onto the idle list after it | 121 | ret->high = __btrfs_alloc_workqueue(name, flags, max_active, |
| 99 | * has finished some requests. | 122 | thresh); |
| 100 | */ | 123 | if (unlikely(!ret->high)) { |
| 101 | static void check_idle_worker(struct btrfs_worker_thread *worker) | 124 | __btrfs_destroy_workqueue(ret->normal); |
| 102 | { | 125 | kfree(ret); |
| 103 | if (!worker->idle && atomic_read(&worker->num_pending) < | 126 | return NULL; |
| 104 | worker->workers->idle_thresh / 2) { | ||
| 105 | unsigned long flags; | ||
| 106 | spin_lock_irqsave(&worker->workers->lock, flags); | ||
| 107 | worker->idle = 1; | ||
| 108 | |||
| 109 | /* the list may be empty if the worker is just starting */ | ||
| 110 | if (!list_empty(&worker->worker_list) && | ||
| 111 | !worker->workers->stopping) { | ||
| 112 | list_move(&worker->worker_list, | ||
| 113 | &worker->workers->idle_list); | ||
| 114 | } | 127 | } |
| 115 | spin_unlock_irqrestore(&worker->workers->lock, flags); | ||
| 116 | } | 128 | } |
| 129 | return ret; | ||
| 117 | } | 130 | } |
| 118 | 131 | ||
| 119 | /* | 132 | /* |
| 120 | * helper function to move a thread off the idle list after new | 133 | * Hook for threshold which will be called in btrfs_queue_work. |
| 121 | * pending work is added. | 134 | * This hook WILL be called in IRQ handler context, |
| 135 | * so workqueue_set_max_active MUST NOT be called in this hook | ||
| 122 | */ | 136 | */ |
| 123 | static void check_busy_worker(struct btrfs_worker_thread *worker) | 137 | static inline void thresh_queue_hook(struct __btrfs_workqueue *wq) |
| 124 | { | 138 | { |
| 125 | if (worker->idle && atomic_read(&worker->num_pending) >= | 139 | if (wq->thresh == NO_THRESHOLD) |
| 126 | worker->workers->idle_thresh) { | 140 | return; |
| 127 | unsigned long flags; | 141 | atomic_inc(&wq->pending); |
| 128 | spin_lock_irqsave(&worker->workers->lock, flags); | ||
| 129 | worker->idle = 0; | ||
| 130 | |||
| 131 | if (!list_empty(&worker->worker_list) && | ||
| 132 | !worker->workers->stopping) { | ||
| 133 | list_move_tail(&worker->worker_list, | ||
| 134 | &worker->workers->worker_list); | ||
| 135 | } | ||
| 136 | spin_unlock_irqrestore(&worker->workers->lock, flags); | ||
| 137 | } | ||
| 138 | } | 142 | } |
| 139 | 143 | ||
| 140 | static void check_pending_worker_creates(struct btrfs_worker_thread *worker) | 144 | /* |
| 145 | * Hook for threshold which will be called before executing the work, | ||
| 146 | * This hook is called in kthread content. | ||
| 147 | * So workqueue_set_max_active is called here. | ||
| 148 | */ | ||
| 149 | static inline void thresh_exec_hook(struct __btrfs_workqueue *wq) | ||
| 141 | { | 150 | { |
| 142 | struct btrfs_workers *workers = worker->workers; | 151 | int new_max_active; |
| 143 | struct worker_start *start; | 152 | long pending; |
| 144 | unsigned long flags; | 153 | int need_change = 0; |
| 145 | 154 | ||
| 146 | rmb(); | 155 | if (wq->thresh == NO_THRESHOLD) |
| 147 | if (!workers->atomic_start_pending) | ||
| 148 | return; | 156 | return; |
| 149 | 157 | ||
| 150 | start = kzalloc(sizeof(*start), GFP_NOFS); | 158 | atomic_dec(&wq->pending); |
| 151 | if (!start) | 159 | spin_lock(&wq->thres_lock); |
| 152 | return; | 160 | /* |
| 153 | 161 | * Use wq->count to limit the calling frequency of | |
| 154 | start->work.func = start_new_worker_func; | 162 | * workqueue_set_max_active. |
| 155 | start->queue = workers; | 163 | */ |
| 156 | 164 | wq->count++; | |
| 157 | spin_lock_irqsave(&workers->lock, flags); | 165 | wq->count %= (wq->thresh / 4); |
| 158 | if (!workers->atomic_start_pending) | 166 | if (!wq->count) |
| 159 | goto out; | 167 | goto out; |
| 160 | 168 | new_max_active = wq->current_max; | |
| 161 | workers->atomic_start_pending = 0; | ||
| 162 | if (workers->num_workers + workers->num_workers_starting >= | ||
| 163 | workers->max_workers) | ||
| 164 | goto out; | ||
| 165 | |||
| 166 | workers->num_workers_starting += 1; | ||
| 167 | spin_unlock_irqrestore(&workers->lock, flags); | ||
| 168 | btrfs_queue_worker(workers->atomic_worker_start, &start->work); | ||
| 169 | return; | ||
| 170 | 169 | ||
| 170 | /* | ||
| 171 | * pending may be changed later, but it's OK since we really | ||
| 172 | * don't need it so accurate to calculate new_max_active. | ||
| 173 | */ | ||
| 174 | pending = atomic_read(&wq->pending); | ||
| 175 | if (pending > wq->thresh) | ||
| 176 | new_max_active++; | ||
| 177 | if (pending < wq->thresh / 2) | ||
| 178 | new_max_active--; | ||
| 179 | new_max_active = clamp_val(new_max_active, 1, wq->max_active); | ||
| 180 | if (new_max_active != wq->current_max) { | ||
| 181 | need_change = 1; | ||
| 182 | wq->current_max = new_max_active; | ||
| 183 | } | ||
| 171 | out: | 184 | out: |
| 172 | kfree(start); | 185 | spin_unlock(&wq->thres_lock); |
| 173 | spin_unlock_irqrestore(&workers->lock, flags); | 186 | |
| 187 | if (need_change) { | ||
| 188 | workqueue_set_max_active(wq->normal_wq, wq->current_max); | ||
| 189 | } | ||
| 174 | } | 190 | } |
| 175 | 191 | ||
| 176 | static noinline void run_ordered_completions(struct btrfs_workers *workers, | 192 | static void run_ordered_work(struct __btrfs_workqueue *wq) |
| 177 | struct btrfs_work *work) | ||
| 178 | { | 193 | { |
| 179 | if (!workers->ordered) | 194 | struct list_head *list = &wq->ordered_list; |
| 180 | return; | 195 | struct btrfs_work *work; |
| 181 | 196 | spinlock_t *lock = &wq->list_lock; | |
| 182 | set_bit(WORK_DONE_BIT, &work->flags); | 197 | unsigned long flags; |
| 183 | |||
| 184 | spin_lock(&workers->order_lock); | ||
| 185 | 198 | ||
| 186 | while (1) { | 199 | while (1) { |
| 187 | if (!list_empty(&workers->prio_order_list)) { | 200 | spin_lock_irqsave(lock, flags); |
| 188 | work = list_entry(workers->prio_order_list.next, | 201 | if (list_empty(list)) |
| 189 | struct btrfs_work, order_list); | ||
| 190 | } else if (!list_empty(&workers->order_list)) { | ||
| 191 | work = list_entry(workers->order_list.next, | ||
| 192 | struct btrfs_work, order_list); | ||
| 193 | } else { | ||
| 194 | break; | 202 | break; |
| 195 | } | 203 | work = list_entry(list->next, struct btrfs_work, |
| 204 | ordered_list); | ||
| 196 | if (!test_bit(WORK_DONE_BIT, &work->flags)) | 205 | if (!test_bit(WORK_DONE_BIT, &work->flags)) |
| 197 | break; | 206 | break; |
| 198 | 207 | ||
| 199 | /* we are going to call the ordered done function, but | 208 | /* |
| 209 | * we are going to call the ordered done function, but | ||
| 200 | * we leave the work item on the list as a barrier so | 210 | * we leave the work item on the list as a barrier so |
| 201 | * that later work items that are done don't have their | 211 | * that later work items that are done don't have their |
| 202 | * functions called before this one returns | 212 | * functions called before this one returns |
| 203 | */ | 213 | */ |
| 204 | if (test_and_set_bit(WORK_ORDER_DONE_BIT, &work->flags)) | 214 | if (test_and_set_bit(WORK_ORDER_DONE_BIT, &work->flags)) |
| 205 | break; | 215 | break; |
| 206 | 216 | trace_btrfs_ordered_sched(work); | |
| 207 | spin_unlock(&workers->order_lock); | 217 | spin_unlock_irqrestore(lock, flags); |
| 208 | |||
| 209 | work->ordered_func(work); | 218 | work->ordered_func(work); |
| 210 | 219 | ||
| 211 | /* now take the lock again and drop our item from the list */ | 220 | /* now take the lock again and drop our item from the list */ |
| 212 | spin_lock(&workers->order_lock); | 221 | spin_lock_irqsave(lock, flags); |
| 213 | list_del(&work->order_list); | 222 | list_del(&work->ordered_list); |
| 214 | spin_unlock(&workers->order_lock); | 223 | spin_unlock_irqrestore(lock, flags); |
| 215 | 224 | ||
| 216 | /* | 225 | /* |
| 217 | * we don't want to call the ordered free functions | 226 | * we don't want to call the ordered free functions |
| 218 | * with the lock held though | 227 | * with the lock held though |
| 219 | */ | 228 | */ |
| 220 | work->ordered_free(work); | 229 | work->ordered_free(work); |
| 221 | spin_lock(&workers->order_lock); | 230 | trace_btrfs_all_work_done(work); |
| 222 | } | ||
| 223 | |||
| 224 | spin_unlock(&workers->order_lock); | ||
| 225 | } | ||
| 226 | |||
| 227 | static void put_worker(struct btrfs_worker_thread *worker) | ||
| 228 | { | ||
| 229 | if (atomic_dec_and_test(&worker->refs)) | ||
| 230 | kfree(worker); | ||
| 231 | } | ||
| 232 | |||
| 233 | static int try_worker_shutdown(struct btrfs_worker_thread *worker) | ||
| 234 | { | ||
| 235 | int freeit = 0; | ||
| 236 | |||
| 237 | spin_lock_irq(&worker->lock); | ||
| 238 | spin_lock(&worker->workers->lock); | ||
| 239 | if (worker->workers->num_workers > 1 && | ||
| 240 | worker->idle && | ||
| 241 | !worker->working && | ||
| 242 | !list_empty(&worker->worker_list) && | ||
| 243 | list_empty(&worker->prio_pending) && | ||
| 244 | list_empty(&worker->pending) && | ||
| 245 | atomic_read(&worker->num_pending) == 0) { | ||
| 246 | freeit = 1; | ||
| 247 | list_del_init(&worker->worker_list); | ||
| 248 | worker->workers->num_workers--; | ||
| 249 | } | 231 | } |
| 250 | spin_unlock(&worker->workers->lock); | 232 | spin_unlock_irqrestore(lock, flags); |
| 251 | spin_unlock_irq(&worker->lock); | ||
| 252 | |||
| 253 | if (freeit) | ||
| 254 | put_worker(worker); | ||
| 255 | return freeit; | ||
| 256 | } | 233 | } |
| 257 | 234 | ||
| 258 | static struct btrfs_work *get_next_work(struct btrfs_worker_thread *worker, | 235 | static void normal_work_helper(struct work_struct *arg) |
| 259 | struct list_head *prio_head, | ||
| 260 | struct list_head *head) | ||
| 261 | { | ||
| 262 | struct btrfs_work *work = NULL; | ||
| 263 | struct list_head *cur = NULL; | ||
| 264 | |||
| 265 | if (!list_empty(prio_head)) | ||
| 266 | cur = prio_head->next; | ||
| 267 | |||
| 268 | smp_mb(); | ||
| 269 | if (!list_empty(&worker->prio_pending)) | ||
| 270 | goto refill; | ||
| 271 | |||
| 272 | if (!list_empty(head)) | ||
| 273 | cur = head->next; | ||
| 274 | |||
| 275 | if (cur) | ||
| 276 | goto out; | ||
| 277 | |||
| 278 | refill: | ||
| 279 | spin_lock_irq(&worker->lock); | ||
| 280 | list_splice_tail_init(&worker->prio_pending, prio_head); | ||
| 281 | list_splice_tail_init(&worker->pending, head); | ||
| 282 | |||
| 283 | if (!list_empty(prio_head)) | ||
| 284 | cur = prio_head->next; | ||
| 285 | else if (!list_empty(head)) | ||
| 286 | cur = head->next; | ||
| 287 | spin_unlock_irq(&worker->lock); | ||
| 288 | |||
| 289 | if (!cur) | ||
| 290 | goto out_fail; | ||
| 291 | |||
| 292 | out: | ||
| 293 | work = list_entry(cur, struct btrfs_work, list); | ||
| 294 | |||
| 295 | out_fail: | ||
| 296 | return work; | ||
| 297 | } | ||
| 298 | |||
| 299 | /* | ||
| 300 | * main loop for servicing work items | ||
| 301 | */ | ||
| 302 | static int worker_loop(void *arg) | ||
| 303 | { | 236 | { |
| 304 | struct btrfs_worker_thread *worker = arg; | ||
| 305 | struct list_head head; | ||
| 306 | struct list_head prio_head; | ||
| 307 | struct btrfs_work *work; | 237 | struct btrfs_work *work; |
| 238 | struct __btrfs_workqueue *wq; | ||
| 239 | int need_order = 0; | ||
| 308 | 240 | ||
| 309 | INIT_LIST_HEAD(&head); | 241 | work = container_of(arg, struct btrfs_work, normal_work); |
| 310 | INIT_LIST_HEAD(&prio_head); | 242 | /* |
| 311 | 243 | * We should not touch things inside work in the following cases: | |
| 312 | do { | 244 | * 1) after work->func() if it has no ordered_free |
| 313 | again: | 245 | * Since the struct is freed in work->func(). |
| 314 | while (1) { | 246 | * 2) after setting WORK_DONE_BIT |
| 315 | 247 | * The work may be freed in other threads almost instantly. | |
| 316 | 248 | * So we save the needed things here. | |
| 317 | work = get_next_work(worker, &prio_head, &head); | 249 | */ |
| 318 | if (!work) | 250 | if (work->ordered_func) |
| 319 | break; | 251 | need_order = 1; |
| 320 | 252 | wq = work->wq; | |
| 321 | list_del(&work->list); | 253 | |
| 322 | clear_bit(WORK_QUEUED_BIT, &work->flags); | 254 | trace_btrfs_work_sched(work); |
| 323 | 255 | thresh_exec_hook(wq); | |
| 324 | work->worker = worker; | 256 | work->func(work); |
| 325 | 257 | if (need_order) { | |
| 326 | work->func(work); | 258 | set_bit(WORK_DONE_BIT, &work->flags); |
| 327 | 259 | run_ordered_work(wq); | |
| 328 | atomic_dec(&worker->num_pending); | ||
| 329 | /* | ||
| 330 | * unless this is an ordered work queue, | ||
| 331 | * 'work' was probably freed by func above. | ||
| 332 | */ | ||
| 333 | run_ordered_completions(worker->workers, work); | ||
| 334 | |||
| 335 | check_pending_worker_creates(worker); | ||
| 336 | cond_resched(); | ||
| 337 | } | ||
| 338 | |||
| 339 | spin_lock_irq(&worker->lock); | ||
| 340 | check_idle_worker(worker); | ||
| 341 | |||
| 342 | if (freezing(current)) { | ||
| 343 | worker->working = 0; | ||
| 344 | spin_unlock_irq(&worker->lock); | ||
| 345 | try_to_freeze(); | ||
| 346 | } else { | ||
| 347 | spin_unlock_irq(&worker->lock); | ||
| 348 | if (!kthread_should_stop()) { | ||
| 349 | cpu_relax(); | ||
| 350 | /* | ||
| 351 | * we've dropped the lock, did someone else | ||
| 352 | * jump_in? | ||
| 353 | */ | ||
| 354 | smp_mb(); | ||
| 355 | if (!list_empty(&worker->pending) || | ||
| 356 | !list_empty(&worker->prio_pending)) | ||
| 357 | continue; | ||
| 358 | |||
| 359 | /* | ||
| 360 | * this short schedule allows more work to | ||
| 361 | * come in without the queue functions | ||
| 362 | * needing to go through wake_up_process() | ||
| 363 | * | ||
| 364 | * worker->working is still 1, so nobody | ||
| 365 | * is going to try and wake us up | ||
| 366 | */ | ||
| 367 | schedule_timeout(1); | ||
| 368 | smp_mb(); | ||
| 369 | if (!list_empty(&worker->pending) || | ||
| 370 | !list_empty(&worker->prio_pending)) | ||
| 371 | continue; | ||
| 372 | |||
| 373 | if (kthread_should_stop()) | ||
| 374 | break; | ||
| 375 | |||
| 376 | /* still no more work?, sleep for real */ | ||
| 377 | spin_lock_irq(&worker->lock); | ||
| 378 | set_current_state(TASK_INTERRUPTIBLE); | ||
| 379 | if (!list_empty(&worker->pending) || | ||
| 380 | !list_empty(&worker->prio_pending)) { | ||
| 381 | spin_unlock_irq(&worker->lock); | ||
| 382 | set_current_state(TASK_RUNNING); | ||
| 383 | goto again; | ||
| 384 | } | ||
| 385 | |||
| 386 | /* | ||
| 387 | * this makes sure we get a wakeup when someone | ||
| 388 | * adds something new to the queue | ||
| 389 | */ | ||
| 390 | worker->working = 0; | ||
| 391 | spin_unlock_irq(&worker->lock); | ||
| 392 | |||
| 393 | if (!kthread_should_stop()) { | ||
| 394 | schedule_timeout(HZ * 120); | ||
| 395 | if (!worker->working && | ||
| 396 | try_worker_shutdown(worker)) { | ||
| 397 | return 0; | ||
| 398 | } | ||
| 399 | } | ||
| 400 | } | ||
| 401 | __set_current_state(TASK_RUNNING); | ||
| 402 | } | ||
| 403 | } while (!kthread_should_stop()); | ||
| 404 | return 0; | ||
| 405 | } | ||
| 406 | |||
| 407 | /* | ||
| 408 | * this will wait for all the worker threads to shutdown | ||
| 409 | */ | ||
| 410 | void btrfs_stop_workers(struct btrfs_workers *workers) | ||
| 411 | { | ||
| 412 | struct list_head *cur; | ||
| 413 | struct btrfs_worker_thread *worker; | ||
| 414 | int can_stop; | ||
| 415 | |||
| 416 | spin_lock_irq(&workers->lock); | ||
| 417 | workers->stopping = 1; | ||
| 418 | list_splice_init(&workers->idle_list, &workers->worker_list); | ||
| 419 | while (!list_empty(&workers->worker_list)) { | ||
| 420 | cur = workers->worker_list.next; | ||
| 421 | worker = list_entry(cur, struct btrfs_worker_thread, | ||
| 422 | worker_list); | ||
| 423 | |||
| 424 | atomic_inc(&worker->refs); | ||
| 425 | workers->num_workers -= 1; | ||
| 426 | if (!list_empty(&worker->worker_list)) { | ||
| 427 | list_del_init(&worker->worker_list); | ||
| 428 | put_worker(worker); | ||
| 429 | can_stop = 1; | ||
| 430 | } else | ||
| 431 | can_stop = 0; | ||
| 432 | spin_unlock_irq(&workers->lock); | ||
| 433 | if (can_stop) | ||
| 434 | kthread_stop(worker->task); | ||
| 435 | spin_lock_irq(&workers->lock); | ||
| 436 | put_worker(worker); | ||
| 437 | } | 260 | } |
| 438 | spin_unlock_irq(&workers->lock); | 261 | if (!need_order) |
| 262 | trace_btrfs_all_work_done(work); | ||
| 439 | } | 263 | } |
| 440 | 264 | ||
| 441 | /* | 265 | void btrfs_init_work(struct btrfs_work *work, |
| 442 | * simple init on struct btrfs_workers | 266 | btrfs_func_t func, |
| 443 | */ | 267 | btrfs_func_t ordered_func, |
| 444 | void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max, | 268 | btrfs_func_t ordered_free) |
| 445 | struct btrfs_workers *async_helper) | ||
| 446 | { | 269 | { |
| 447 | workers->num_workers = 0; | 270 | work->func = func; |
| 448 | workers->num_workers_starting = 0; | 271 | work->ordered_func = ordered_func; |
| 449 | INIT_LIST_HEAD(&workers->worker_list); | 272 | work->ordered_free = ordered_free; |
| 450 | INIT_LIST_HEAD(&workers->idle_list); | 273 | INIT_WORK(&work->normal_work, normal_work_helper); |
| 451 | INIT_LIST_HEAD(&workers->order_list); | 274 | INIT_LIST_HEAD(&work->ordered_list); |
| 452 | INIT_LIST_HEAD(&workers->prio_order_list); | 275 | work->flags = 0; |
| 453 | spin_lock_init(&workers->lock); | ||
| 454 | spin_lock_init(&workers->order_lock); | ||
| 455 | workers->max_workers = max; | ||
| 456 | workers->idle_thresh = 32; | ||
| 457 | workers->name = name; | ||
| 458 | workers->ordered = 0; | ||
| 459 | workers->atomic_start_pending = 0; | ||
| 460 | workers->atomic_worker_start = async_helper; | ||
| 461 | workers->stopping = 0; | ||
| 462 | } | 276 | } |
| 463 | 277 | ||
| 464 | /* | 278 | static inline void __btrfs_queue_work(struct __btrfs_workqueue *wq, |
| 465 | * starts new worker threads. This does not enforce the max worker | 279 | struct btrfs_work *work) |
| 466 | * count in case you need to temporarily go past it. | ||
| 467 | */ | ||
| 468 | static int __btrfs_start_workers(struct btrfs_workers *workers) | ||
| 469 | { | 280 | { |
| 470 | struct btrfs_worker_thread *worker; | 281 | unsigned long flags; |
| 471 | int ret = 0; | ||
| 472 | |||
| 473 | worker = kzalloc(sizeof(*worker), GFP_NOFS); | ||
| 474 | if (!worker) { | ||
| 475 | ret = -ENOMEM; | ||
| 476 | goto fail; | ||
| 477 | } | ||
| 478 | |||
| 479 | INIT_LIST_HEAD(&worker->pending); | ||
| 480 | INIT_LIST_HEAD(&worker->prio_pending); | ||
| 481 | INIT_LIST_HEAD(&worker->worker_list); | ||
| 482 | spin_lock_init(&worker->lock); | ||
| 483 | |||
| 484 | atomic_set(&worker->num_pending, 0); | ||
| 485 | atomic_set(&worker->refs, 1); | ||
| 486 | worker->workers = workers; | ||
| 487 | worker->task = kthread_create(worker_loop, worker, | ||
| 488 | "btrfs-%s-%d", workers->name, | ||
| 489 | workers->num_workers + 1); | ||
| 490 | if (IS_ERR(worker->task)) { | ||
| 491 | ret = PTR_ERR(worker->task); | ||
| 492 | goto fail; | ||
| 493 | } | ||
| 494 | 282 | ||
| 495 | spin_lock_irq(&workers->lock); | 283 | work->wq = wq; |
| 496 | if (workers->stopping) { | 284 | thresh_queue_hook(wq); |
| 497 | spin_unlock_irq(&workers->lock); | 285 | if (work->ordered_func) { |
| 498 | ret = -EINVAL; | 286 | spin_lock_irqsave(&wq->list_lock, flags); |
| 499 | goto fail_kthread; | 287 | list_add_tail(&work->ordered_list, &wq->ordered_list); |
| 288 | spin_unlock_irqrestore(&wq->list_lock, flags); | ||
| 500 | } | 289 | } |
| 501 | list_add_tail(&worker->worker_list, &workers->idle_list); | 290 | queue_work(wq->normal_wq, &work->normal_work); |
| 502 | worker->idle = 1; | 291 | trace_btrfs_work_queued(work); |
| 503 | workers->num_workers++; | ||
| 504 | workers->num_workers_starting--; | ||
| 505 | WARN_ON(workers->num_workers_starting < 0); | ||
| 506 | spin_unlock_irq(&workers->lock); | ||
| 507 | |||
| 508 | wake_up_process(worker->task); | ||
| 509 | return 0; | ||
| 510 | |||
| 511 | fail_kthread: | ||
| 512 | kthread_stop(worker->task); | ||
| 513 | fail: | ||
| 514 | kfree(worker); | ||
| 515 | spin_lock_irq(&workers->lock); | ||
| 516 | workers->num_workers_starting--; | ||
| 517 | spin_unlock_irq(&workers->lock); | ||
| 518 | return ret; | ||
| 519 | } | 292 | } |
| 520 | 293 | ||
| 521 | int btrfs_start_workers(struct btrfs_workers *workers) | 294 | void btrfs_queue_work(struct btrfs_workqueue *wq, |
| 295 | struct btrfs_work *work) | ||
| 522 | { | 296 | { |
| 523 | spin_lock_irq(&workers->lock); | 297 | struct __btrfs_workqueue *dest_wq; |
| 524 | workers->num_workers_starting++; | ||
| 525 | spin_unlock_irq(&workers->lock); | ||
| 526 | return __btrfs_start_workers(workers); | ||
| 527 | } | ||
| 528 | |||
| 529 | /* | ||
| 530 | * run through the list and find a worker thread that doesn't have a lot | ||
| 531 | * to do right now. This can return null if we aren't yet at the thread | ||
| 532 | * count limit and all of the threads are busy. | ||
| 533 | */ | ||
| 534 | static struct btrfs_worker_thread *next_worker(struct btrfs_workers *workers) | ||
| 535 | { | ||
| 536 | struct btrfs_worker_thread *worker; | ||
| 537 | struct list_head *next; | ||
| 538 | int enforce_min; | ||
| 539 | |||
| 540 | enforce_min = (workers->num_workers + workers->num_workers_starting) < | ||
| 541 | workers->max_workers; | ||
| 542 | |||
| 543 | /* | ||
| 544 | * if we find an idle thread, don't move it to the end of the | ||
| 545 | * idle list. This improves the chance that the next submission | ||
| 546 | * will reuse the same thread, and maybe catch it while it is still | ||
| 547 | * working | ||
| 548 | */ | ||
| 549 | if (!list_empty(&workers->idle_list)) { | ||
| 550 | next = workers->idle_list.next; | ||
| 551 | worker = list_entry(next, struct btrfs_worker_thread, | ||
| 552 | worker_list); | ||
| 553 | return worker; | ||
| 554 | } | ||
| 555 | if (enforce_min || list_empty(&workers->worker_list)) | ||
| 556 | return NULL; | ||
| 557 | |||
| 558 | /* | ||
| 559 | * if we pick a busy task, move the task to the end of the list. | ||
| 560 | * hopefully this will keep things somewhat evenly balanced. | ||
| 561 | * Do the move in batches based on the sequence number. This groups | ||
| 562 | * requests submitted at roughly the same time onto the same worker. | ||
| 563 | */ | ||
| 564 | next = workers->worker_list.next; | ||
| 565 | worker = list_entry(next, struct btrfs_worker_thread, worker_list); | ||
| 566 | worker->sequence++; | ||
| 567 | 298 | ||
| 568 | if (worker->sequence % workers->idle_thresh == 0) | 299 | if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags) && wq->high) |
| 569 | list_move_tail(next, &workers->worker_list); | 300 | dest_wq = wq->high; |
| 570 | return worker; | 301 | else |
| 302 | dest_wq = wq->normal; | ||
| 303 | __btrfs_queue_work(dest_wq, work); | ||
| 571 | } | 304 | } |
| 572 | 305 | ||
| 573 | /* | 306 | static inline void |
| 574 | * selects a worker thread to take the next job. This will either find | 307 | __btrfs_destroy_workqueue(struct __btrfs_workqueue *wq) |
| 575 | * an idle worker, start a new worker up to the max count, or just return | ||
| 576 | * one of the existing busy workers. | ||
| 577 | */ | ||
| 578 | static struct btrfs_worker_thread *find_worker(struct btrfs_workers *workers) | ||
| 579 | { | 308 | { |
| 580 | struct btrfs_worker_thread *worker; | 309 | destroy_workqueue(wq->normal_wq); |
| 581 | unsigned long flags; | 310 | trace_btrfs_workqueue_destroy(wq); |
| 582 | struct list_head *fallback; | 311 | kfree(wq); |
| 583 | int ret; | ||
| 584 | |||
| 585 | spin_lock_irqsave(&workers->lock, flags); | ||
| 586 | again: | ||
| 587 | worker = next_worker(workers); | ||
| 588 | |||
| 589 | if (!worker) { | ||
| 590 | if (workers->num_workers + workers->num_workers_starting >= | ||
| 591 | workers->max_workers) { | ||
| 592 | goto fallback; | ||
| 593 | } else if (workers->atomic_worker_start) { | ||
| 594 | workers->atomic_start_pending = 1; | ||
| 595 | goto fallback; | ||
| 596 | } else { | ||
| 597 | workers->num_workers_starting++; | ||
| 598 | spin_unlock_irqrestore(&workers->lock, flags); | ||
| 599 | /* we're below the limit, start another worker */ | ||
| 600 | ret = __btrfs_start_workers(workers); | ||
| 601 | spin_lock_irqsave(&workers->lock, flags); | ||
| 602 | if (ret) | ||
| 603 | goto fallback; | ||
| 604 | goto again; | ||
| 605 | } | ||
| 606 | } | ||
| 607 | goto found; | ||
| 608 | |||
| 609 | fallback: | ||
| 610 | fallback = NULL; | ||
| 611 | /* | ||
| 612 | * we have failed to find any workers, just | ||
| 613 | * return the first one we can find. | ||
| 614 | */ | ||
| 615 | if (!list_empty(&workers->worker_list)) | ||
| 616 | fallback = workers->worker_list.next; | ||
| 617 | if (!list_empty(&workers->idle_list)) | ||
| 618 | fallback = workers->idle_list.next; | ||
| 619 | BUG_ON(!fallback); | ||
| 620 | worker = list_entry(fallback, | ||
| 621 | struct btrfs_worker_thread, worker_list); | ||
| 622 | found: | ||
| 623 | /* | ||
| 624 | * this makes sure the worker doesn't exit before it is placed | ||
| 625 | * onto a busy/idle list | ||
| 626 | */ | ||
| 627 | atomic_inc(&worker->num_pending); | ||
| 628 | spin_unlock_irqrestore(&workers->lock, flags); | ||
| 629 | return worker; | ||
| 630 | } | 312 | } |
| 631 | 313 | ||
| 632 | /* | 314 | void btrfs_destroy_workqueue(struct btrfs_workqueue *wq) |
| 633 | * btrfs_requeue_work just puts the work item back on the tail of the list | ||
| 634 | * it was taken from. It is intended for use with long running work functions | ||
| 635 | * that make some progress and want to give the cpu up for others. | ||
| 636 | */ | ||
| 637 | void btrfs_requeue_work(struct btrfs_work *work) | ||
| 638 | { | 315 | { |
| 639 | struct btrfs_worker_thread *worker = work->worker; | 316 | if (!wq) |
| 640 | unsigned long flags; | ||
| 641 | int wake = 0; | ||
| 642 | |||
| 643 | if (test_and_set_bit(WORK_QUEUED_BIT, &work->flags)) | ||
| 644 | return; | 317 | return; |
| 645 | 318 | if (wq->high) | |
| 646 | spin_lock_irqsave(&worker->lock, flags); | 319 | __btrfs_destroy_workqueue(wq->high); |
| 647 | if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags)) | 320 | __btrfs_destroy_workqueue(wq->normal); |
| 648 | list_add_tail(&work->list, &worker->prio_pending); | 321 | kfree(wq); |
| 649 | else | ||
| 650 | list_add_tail(&work->list, &worker->pending); | ||
| 651 | atomic_inc(&worker->num_pending); | ||
| 652 | |||
| 653 | /* by definition we're busy, take ourselves off the idle | ||
| 654 | * list | ||
| 655 | */ | ||
| 656 | if (worker->idle) { | ||
| 657 | spin_lock(&worker->workers->lock); | ||
| 658 | worker->idle = 0; | ||
| 659 | list_move_tail(&worker->worker_list, | ||
| 660 | &worker->workers->worker_list); | ||
| 661 | spin_unlock(&worker->workers->lock); | ||
| 662 | } | ||
| 663 | if (!worker->working) { | ||
| 664 | wake = 1; | ||
| 665 | worker->working = 1; | ||
| 666 | } | ||
| 667 | |||
| 668 | if (wake) | ||
| 669 | wake_up_process(worker->task); | ||
| 670 | spin_unlock_irqrestore(&worker->lock, flags); | ||
| 671 | } | 322 | } |
| 672 | 323 | ||
| 673 | void btrfs_set_work_high_prio(struct btrfs_work *work) | 324 | void btrfs_workqueue_set_max(struct btrfs_workqueue *wq, int max) |
| 674 | { | 325 | { |
| 675 | set_bit(WORK_HIGH_PRIO_BIT, &work->flags); | 326 | wq->normal->max_active = max; |
| 327 | if (wq->high) | ||
| 328 | wq->high->max_active = max; | ||
| 676 | } | 329 | } |
| 677 | 330 | ||
| 678 | /* | 331 | void btrfs_set_work_high_priority(struct btrfs_work *work) |
| 679 | * places a struct btrfs_work into the pending queue of one of the kthreads | ||
| 680 | */ | ||
| 681 | void btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work) | ||
| 682 | { | 332 | { |
| 683 | struct btrfs_worker_thread *worker; | 333 | set_bit(WORK_HIGH_PRIO_BIT, &work->flags); |
| 684 | unsigned long flags; | ||
| 685 | int wake = 0; | ||
| 686 | |||
| 687 | /* don't requeue something already on a list */ | ||
| 688 | if (test_and_set_bit(WORK_QUEUED_BIT, &work->flags)) | ||
| 689 | return; | ||
| 690 | |||
| 691 | worker = find_worker(workers); | ||
| 692 | if (workers->ordered) { | ||
| 693 | /* | ||
| 694 | * you're not allowed to do ordered queues from an | ||
| 695 | * interrupt handler | ||
| 696 | */ | ||
| 697 | spin_lock(&workers->order_lock); | ||
| 698 | if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags)) { | ||
| 699 | list_add_tail(&work->order_list, | ||
| 700 | &workers->prio_order_list); | ||
| 701 | } else { | ||
| 702 | list_add_tail(&work->order_list, &workers->order_list); | ||
| 703 | } | ||
| 704 | spin_unlock(&workers->order_lock); | ||
| 705 | } else { | ||
| 706 | INIT_LIST_HEAD(&work->order_list); | ||
| 707 | } | ||
| 708 | |||
| 709 | spin_lock_irqsave(&worker->lock, flags); | ||
| 710 | |||
| 711 | if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags)) | ||
| 712 | list_add_tail(&work->list, &worker->prio_pending); | ||
| 713 | else | ||
| 714 | list_add_tail(&work->list, &worker->pending); | ||
| 715 | check_busy_worker(worker); | ||
| 716 | |||
| 717 | /* | ||
| 718 | * avoid calling into wake_up_process if this thread has already | ||
| 719 | * been kicked | ||
| 720 | */ | ||
| 721 | if (!worker->working) | ||
| 722 | wake = 1; | ||
| 723 | worker->working = 1; | ||
| 724 | |||
| 725 | if (wake) | ||
| 726 | wake_up_process(worker->task); | ||
| 727 | spin_unlock_irqrestore(&worker->lock, flags); | ||
| 728 | } | 334 | } |
