diff options
Diffstat (limited to 'fs/btrfs')
35 files changed, 2004 insertions, 1629 deletions
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index c1e0b0caf9cc..ecb5832c0967 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c | |||
| @@ -1,5 +1,6 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * Copyright (C) 2007 Oracle. All rights reserved. | 2 | * Copyright (C) 2007 Oracle. All rights reserved. |
| 3 | * Copyright (C) 2014 Fujitsu. All rights reserved. | ||
| 3 | * | 4 | * |
| 4 | * This program is free software; you can redistribute it and/or | 5 | * This program is free software; you can redistribute it and/or |
| 5 | * modify it under the terms of the GNU General Public | 6 | * modify it under the terms of the GNU General Public |
| @@ -21,708 +22,313 @@ | |||
| 21 | #include <linux/list.h> | 22 | #include <linux/list.h> |
| 22 | #include <linux/spinlock.h> | 23 | #include <linux/spinlock.h> |
| 23 | #include <linux/freezer.h> | 24 | #include <linux/freezer.h> |
| 25 | #include <linux/workqueue.h> | ||
| 24 | #include "async-thread.h" | 26 | #include "async-thread.h" |
| 27 | #include "ctree.h" | ||
| 28 | |||
| 29 | #define WORK_DONE_BIT 0 | ||
| 30 | #define WORK_ORDER_DONE_BIT 1 | ||
| 31 | #define WORK_HIGH_PRIO_BIT 2 | ||
| 32 | |||
| 33 | #define NO_THRESHOLD (-1) | ||
| 34 | #define DFT_THRESHOLD (32) | ||
| 35 | |||
| 36 | struct __btrfs_workqueue { | ||
| 37 | struct workqueue_struct *normal_wq; | ||
| 38 | /* List head pointing to ordered work list */ | ||
| 39 | struct list_head ordered_list; | ||
| 40 | |||
| 41 | /* Spinlock for ordered_list */ | ||
| 42 | spinlock_t list_lock; | ||
| 43 | |||
| 44 | /* Thresholding related variants */ | ||
| 45 | atomic_t pending; | ||
| 46 | int max_active; | ||
| 47 | int current_max; | ||
| 48 | int thresh; | ||
| 49 | unsigned int count; | ||
| 50 | spinlock_t thres_lock; | ||
| 51 | }; | ||
| 25 | 52 | ||
| 26 | #define WORK_QUEUED_BIT 0 | 53 | struct btrfs_workqueue { |
| 27 | #define WORK_DONE_BIT 1 | 54 | struct __btrfs_workqueue *normal; |
| 28 | #define WORK_ORDER_DONE_BIT 2 | 55 | struct __btrfs_workqueue *high; |
| 29 | #define WORK_HIGH_PRIO_BIT 3 | 56 | }; |
| 30 | |||
| 31 | /* | ||
| 32 | * container for the kthread task pointer and the list of pending work | ||
| 33 | * One of these is allocated per thread. | ||
| 34 | */ | ||
| 35 | struct btrfs_worker_thread { | ||
| 36 | /* pool we belong to */ | ||
| 37 | struct btrfs_workers *workers; | ||
| 38 | |||
| 39 | /* list of struct btrfs_work that are waiting for service */ | ||
| 40 | struct list_head pending; | ||
| 41 | struct list_head prio_pending; | ||
| 42 | |||
| 43 | /* list of worker threads from struct btrfs_workers */ | ||
| 44 | struct list_head worker_list; | ||
| 45 | |||
| 46 | /* kthread */ | ||
| 47 | struct task_struct *task; | ||
| 48 | 57 | ||
| 49 | /* number of things on the pending list */ | 58 | static inline struct __btrfs_workqueue |
| 50 | atomic_t num_pending; | 59 | *__btrfs_alloc_workqueue(const char *name, int flags, int max_active, |
| 60 | int thresh) | ||
| 61 | { | ||
| 62 | struct __btrfs_workqueue *ret = kzalloc(sizeof(*ret), GFP_NOFS); | ||
| 51 | 63 | ||
| 52 | /* reference counter for this struct */ | 64 | if (unlikely(!ret)) |
| 53 | atomic_t refs; | 65 | return NULL; |
| 54 | 66 | ||
| 55 | unsigned long sequence; | 67 | ret->max_active = max_active; |
| 68 | atomic_set(&ret->pending, 0); | ||
| 69 | if (thresh == 0) | ||
| 70 | thresh = DFT_THRESHOLD; | ||
| 71 | /* For low threshold, disabling threshold is a better choice */ | ||
| 72 | if (thresh < DFT_THRESHOLD) { | ||
| 73 | ret->current_max = max_active; | ||
| 74 | ret->thresh = NO_THRESHOLD; | ||
| 75 | } else { | ||
| 76 | ret->current_max = 1; | ||
| 77 | ret->thresh = thresh; | ||
| 78 | } | ||
| 56 | 79 | ||
| 57 | /* protects the pending list. */ | 80 | if (flags & WQ_HIGHPRI) |
| 58 | spinlock_t lock; | 81 | ret->normal_wq = alloc_workqueue("%s-%s-high", flags, |
| 82 | ret->max_active, | ||
| 83 | "btrfs", name); | ||
| 84 | else | ||
| 85 | ret->normal_wq = alloc_workqueue("%s-%s", flags, | ||
| 86 | ret->max_active, "btrfs", | ||
| 87 | name); | ||
| 88 | if (unlikely(!ret->normal_wq)) { | ||
| 89 | kfree(ret); | ||
| 90 | return NULL; | ||
| 91 | } | ||
| 59 | 92 | ||
| 60 | /* set to non-zero when this thread is already awake and kicking */ | 93 | INIT_LIST_HEAD(&ret->ordered_list); |
| 61 | int working; | 94 | spin_lock_init(&ret->list_lock); |
| 95 | spin_lock_init(&ret->thres_lock); | ||
| 96 | trace_btrfs_workqueue_alloc(ret, name, flags & WQ_HIGHPRI); | ||
| 97 | return ret; | ||
| 98 | } | ||
| 62 | 99 | ||
| 63 | /* are we currently idle */ | 100 | static inline void |
| 64 | int idle; | 101 | __btrfs_destroy_workqueue(struct __btrfs_workqueue *wq); |
| 65 | }; | ||
| 66 | 102 | ||
| 67 | static int __btrfs_start_workers(struct btrfs_workers *workers); | 103 | struct btrfs_workqueue *btrfs_alloc_workqueue(const char *name, |
| 104 | int flags, | ||
| 105 | int max_active, | ||
| 106 | int thresh) | ||
| 107 | { | ||
| 108 | struct btrfs_workqueue *ret = kzalloc(sizeof(*ret), GFP_NOFS); | ||
| 68 | 109 | ||
| 69 | /* | 110 | if (unlikely(!ret)) |
| 70 | * btrfs_start_workers uses kthread_run, which can block waiting for memory | 111 | return NULL; |
| 71 | * for a very long time. It will actually throttle on page writeback, | ||
| 72 | * and so it may not make progress until after our btrfs worker threads | ||
| 73 | * process all of the pending work structs in their queue | ||
| 74 | * | ||
| 75 | * This means we can't use btrfs_start_workers from inside a btrfs worker | ||
| 76 | * thread that is used as part of cleaning dirty memory, which pretty much | ||
| 77 | * involves all of the worker threads. | ||
| 78 | * | ||
| 79 | * Instead we have a helper queue who never has more than one thread | ||
| 80 | * where we scheduler thread start operations. This worker_start struct | ||
| 81 | * is used to contain the work and hold a pointer to the queue that needs | ||
| 82 | * another worker. | ||
| 83 | */ | ||
| 84 | struct worker_start { | ||
| 85 | struct btrfs_work work; | ||
| 86 | struct btrfs_workers *queue; | ||
| 87 | }; | ||
| 88 | 112 | ||
| 89 | static void start_new_worker_func(struct btrfs_work *work) | 113 | ret->normal = __btrfs_alloc_workqueue(name, flags & ~WQ_HIGHPRI, |
| 90 | { | 114 | max_active, thresh); |
| 91 | struct worker_start *start; | 115 | if (unlikely(!ret->normal)) { |
| 92 | start = container_of(work, struct worker_start, work); | 116 | kfree(ret); |
| 93 | __btrfs_start_workers(start->queue); | 117 | return NULL; |
| 94 | kfree(start); | 118 | } |
| 95 | } | ||
| 96 | 119 | ||
| 97 | /* | 120 | if (flags & WQ_HIGHPRI) { |
| 98 | * helper function to move a thread onto the idle list after it | 121 | ret->high = __btrfs_alloc_workqueue(name, flags, max_active, |
| 99 | * has finished some requests. | 122 | thresh); |
| 100 | */ | 123 | if (unlikely(!ret->high)) { |
| 101 | static void check_idle_worker(struct btrfs_worker_thread *worker) | 124 | __btrfs_destroy_workqueue(ret->normal); |
| 102 | { | 125 | kfree(ret); |
| 103 | if (!worker->idle && atomic_read(&worker->num_pending) < | 126 | return NULL; |
| 104 | worker->workers->idle_thresh / 2) { | ||
| 105 | unsigned long flags; | ||
| 106 | spin_lock_irqsave(&worker->workers->lock, flags); | ||
| 107 | worker->idle = 1; | ||
| 108 | |||
| 109 | /* the list may be empty if the worker is just starting */ | ||
| 110 | if (!list_empty(&worker->worker_list) && | ||
| 111 | !worker->workers->stopping) { | ||
| 112 | list_move(&worker->worker_list, | ||
| 113 | &worker->workers->idle_list); | ||
| 114 | } | 127 | } |
| 115 | spin_unlock_irqrestore(&worker->workers->lock, flags); | ||
| 116 | } | 128 | } |
| 129 | return ret; | ||
| 117 | } | 130 | } |
| 118 | 131 | ||
| 119 | /* | 132 | /* |
| 120 | * helper function to move a thread off the idle list after new | 133 | * Hook for threshold which will be called in btrfs_queue_work. |
| 121 | * pending work is added. | 134 | * This hook WILL be called in IRQ handler context, |
| 135 | * so workqueue_set_max_active MUST NOT be called in this hook | ||
| 122 | */ | 136 | */ |
| 123 | static void check_busy_worker(struct btrfs_worker_thread *worker) | 137 | static inline void thresh_queue_hook(struct __btrfs_workqueue *wq) |
| 124 | { | 138 | { |
| 125 | if (worker->idle && atomic_read(&worker->num_pending) >= | 139 | if (wq->thresh == NO_THRESHOLD) |
| 126 | worker->workers->idle_thresh) { | 140 | return; |
| 127 | unsigned long flags; | 141 | atomic_inc(&wq->pending); |
| 128 | spin_lock_irqsave(&worker->workers->lock, flags); | ||
| 129 | worker->idle = 0; | ||
| 130 | |||
| 131 | if (!list_empty(&worker->worker_list) && | ||
| 132 | !worker->workers->stopping) { | ||
| 133 | list_move_tail(&worker->worker_list, | ||
| 134 | &worker->workers->worker_list); | ||
| 135 | } | ||
| 136 | spin_unlock_irqrestore(&worker->workers->lock, flags); | ||
| 137 | } | ||
| 138 | } | 142 | } |
| 139 | 143 | ||
| 140 | static void check_pending_worker_creates(struct btrfs_worker_thread *worker) | 144 | /* |
| 145 | * Hook for threshold which will be called before executing the work, | ||
| 146 | * This hook is called in kthread content. | ||
| 147 | * So workqueue_set_max_active is called here. | ||
| 148 | */ | ||
| 149 | static inline void thresh_exec_hook(struct __btrfs_workqueue *wq) | ||
| 141 | { | 150 | { |
| 142 | struct btrfs_workers *workers = worker->workers; | 151 | int new_max_active; |
| 143 | struct worker_start *start; | 152 | long pending; |
| 144 | unsigned long flags; | 153 | int need_change = 0; |
| 145 | 154 | ||
| 146 | rmb(); | 155 | if (wq->thresh == NO_THRESHOLD) |
| 147 | if (!workers->atomic_start_pending) | ||
| 148 | return; | 156 | return; |
| 149 | 157 | ||
| 150 | start = kzalloc(sizeof(*start), GFP_NOFS); | 158 | atomic_dec(&wq->pending); |
| 151 | if (!start) | 159 | spin_lock(&wq->thres_lock); |
| 152 | return; | 160 | /* |
| 153 | 161 | * Use wq->count to limit the calling frequency of | |
| 154 | start->work.func = start_new_worker_func; | 162 | * workqueue_set_max_active. |
| 155 | start->queue = workers; | 163 | */ |
| 156 | 164 | wq->count++; | |
| 157 | spin_lock_irqsave(&workers->lock, flags); | 165 | wq->count %= (wq->thresh / 4); |
| 158 | if (!workers->atomic_start_pending) | 166 | if (!wq->count) |
| 159 | goto out; | 167 | goto out; |
| 160 | 168 | new_max_active = wq->current_max; | |
| 161 | workers->atomic_start_pending = 0; | ||
| 162 | if (workers->num_workers + workers->num_workers_starting >= | ||
| 163 | workers->max_workers) | ||
| 164 | goto out; | ||
| 165 | |||
| 166 | workers->num_workers_starting += 1; | ||
| 167 | spin_unlock_irqrestore(&workers->lock, flags); | ||
| 168 | btrfs_queue_worker(workers->atomic_worker_start, &start->work); | ||
| 169 | return; | ||
| 170 | 169 | ||
| 170 | /* | ||
| 171 | * pending may be changed later, but it's OK since we really | ||
| 172 | * don't need it so accurate to calculate new_max_active. | ||
| 173 | */ | ||
| 174 | pending = atomic_read(&wq->pending); | ||
| 175 | if (pending > wq->thresh) | ||
| 176 | new_max_active++; | ||
| 177 | if (pending < wq->thresh / 2) | ||
| 178 | new_max_active--; | ||
| 179 | new_max_active = clamp_val(new_max_active, 1, wq->max_active); | ||
| 180 | if (new_max_active != wq->current_max) { | ||
| 181 | need_change = 1; | ||
| 182 | wq->current_max = new_max_active; | ||
| 183 | } | ||
| 171 | out: | 184 | out: |
| 172 | kfree(start); | 185 | spin_unlock(&wq->thres_lock); |
| 173 | spin_unlock_irqrestore(&workers->lock, flags); | 186 | |
| 187 | if (need_change) { | ||
| 188 | workqueue_set_max_active(wq->normal_wq, wq->current_max); | ||
| 189 | } | ||
| 174 | } | 190 | } |
| 175 | 191 | ||
| 176 | static noinline void run_ordered_completions(struct btrfs_workers *workers, | 192 | static void run_ordered_work(struct __btrfs_workqueue *wq) |
| 177 | struct btrfs_work *work) | ||
| 178 | { | 193 | { |
| 179 | if (!workers->ordered) | 194 | struct list_head *list = &wq->ordered_list; |
| 180 | return; | 195 | struct btrfs_work *work; |
| 181 | 196 | spinlock_t *lock = &wq->list_lock; | |
| 182 | set_bit(WORK_DONE_BIT, &work->flags); | 197 | unsigned long flags; |
| 183 | |||
| 184 | spin_lock(&workers->order_lock); | ||
| 185 | 198 | ||
| 186 | while (1) { | 199 | while (1) { |
| 187 | if (!list_empty(&workers->prio_order_list)) { | 200 | spin_lock_irqsave(lock, flags); |
| 188 | work = list_entry(workers->prio_order_list.next, | 201 | if (list_empty(list)) |
| 189 | struct btrfs_work, order_list); | ||
| 190 | } else if (!list_empty(&workers->order_list)) { | ||
| 191 | work = list_entry(workers->order_list.next, | ||
| 192 | struct btrfs_work, order_list); | ||
| 193 | } else { | ||
| 194 | break; | 202 | break; |
| 195 | } | 203 | work = list_entry(list->next, struct btrfs_work, |
| 204 | ordered_list); | ||
| 196 | if (!test_bit(WORK_DONE_BIT, &work->flags)) | 205 | if (!test_bit(WORK_DONE_BIT, &work->flags)) |
| 197 | break; | 206 | break; |
| 198 | 207 | ||
| 199 | /* we are going to call the ordered done function, but | 208 | /* |
| 209 | * we are going to call the ordered done function, but | ||
| 200 | * we leave the work item on the list as a barrier so | 210 | * we leave the work item on the list as a barrier so |
| 201 | * that later work items that are done don't have their | 211 | * that later work items that are done don't have their |
| 202 | * functions called before this one returns | 212 | * functions called before this one returns |
| 203 | */ | 213 | */ |
| 204 | if (test_and_set_bit(WORK_ORDER_DONE_BIT, &work->flags)) | 214 | if (test_and_set_bit(WORK_ORDER_DONE_BIT, &work->flags)) |
| 205 | break; | 215 | break; |
| 206 | 216 | trace_btrfs_ordered_sched(work); | |
| 207 | spin_unlock(&workers->order_lock); | 217 | spin_unlock_irqrestore(lock, flags); |
| 208 | |||
| 209 | work->ordered_func(work); | 218 | work->ordered_func(work); |
| 210 | 219 | ||
| 211 | /* now take the lock again and drop our item from the list */ | 220 | /* now take the lock again and drop our item from the list */ |
| 212 | spin_lock(&workers->order_lock); | 221 | spin_lock_irqsave(lock, flags); |
| 213 | list_del(&work->order_list); | 222 | list_del(&work->ordered_list); |
| 214 | spin_unlock(&workers->order_lock); | 223 | spin_unlock_irqrestore(lock, flags); |
| 215 | 224 | ||
| 216 | /* | 225 | /* |
| 217 | * we don't want to call the ordered free functions | 226 | * we don't want to call the ordered free functions |
| 218 | * with the lock held though | 227 | * with the lock held though |
| 219 | */ | 228 | */ |
| 220 | work->ordered_free(work); | 229 | work->ordered_free(work); |
| 221 | spin_lock(&workers->order_lock); | 230 | trace_btrfs_all_work_done(work); |
| 222 | } | ||
| 223 | |||
| 224 | spin_unlock(&workers->order_lock); | ||
| 225 | } | ||
| 226 | |||
| 227 | static void put_worker(struct btrfs_worker_thread *worker) | ||
| 228 | { | ||
| 229 | if (atomic_dec_and_test(&worker->refs)) | ||
| 230 | kfree(worker); | ||
| 231 | } | ||
| 232 | |||
| 233 | static int try_worker_shutdown(struct btrfs_worker_thread *worker) | ||
| 234 | { | ||
| 235 | int freeit = 0; | ||
| 236 | |||
| 237 | spin_lock_irq(&worker->lock); | ||
| 238 | spin_lock(&worker->workers->lock); | ||
| 239 | if (worker->workers->num_workers > 1 && | ||
| 240 | worker->idle && | ||
| 241 | !worker->working && | ||
| 242 | !list_empty(&worker->worker_list) && | ||
| 243 | list_empty(&worker->prio_pending) && | ||
| 244 | list_empty(&worker->pending) && | ||
| 245 | atomic_read(&worker->num_pending) == 0) { | ||
| 246 | freeit = 1; | ||
| 247 | list_del_init(&worker->worker_list); | ||
| 248 | worker->workers->num_workers--; | ||
| 249 | } | 231 | } |
| 250 | spin_unlock(&worker->workers->lock); | 232 | spin_unlock_irqrestore(lock, flags); |
| 251 | spin_unlock_irq(&worker->lock); | ||
| 252 | |||
| 253 | if (freeit) | ||
| 254 | put_worker(worker); | ||
| 255 | return freeit; | ||
| 256 | } | 233 | } |
| 257 | 234 | ||
| 258 | static struct btrfs_work *get_next_work(struct btrfs_worker_thread *worker, | 235 | static void normal_work_helper(struct work_struct *arg) |
| 259 | struct list_head *prio_head, | ||
| 260 | struct list_head *head) | ||
| 261 | { | ||
| 262 | struct btrfs_work *work = NULL; | ||
| 263 | struct list_head *cur = NULL; | ||
| 264 | |||
| 265 | if (!list_empty(prio_head)) | ||
| 266 | cur = prio_head->next; | ||
| 267 | |||
| 268 | smp_mb(); | ||
| 269 | if (!list_empty(&worker->prio_pending)) | ||
| 270 | goto refill; | ||
| 271 | |||
| 272 | if (!list_empty(head)) | ||
| 273 | cur = head->next; | ||
| 274 | |||
| 275 | if (cur) | ||
| 276 | goto out; | ||
| 277 | |||
| 278 | refill: | ||
| 279 | spin_lock_irq(&worker->lock); | ||
| 280 | list_splice_tail_init(&worker->prio_pending, prio_head); | ||
| 281 | list_splice_tail_init(&worker->pending, head); | ||
| 282 | |||
| 283 | if (!list_empty(prio_head)) | ||
| 284 | cur = prio_head->next; | ||
| 285 | else if (!list_empty(head)) | ||
| 286 | cur = head->next; | ||
| 287 | spin_unlock_irq(&worker->lock); | ||
| 288 | |||
| 289 | if (!cur) | ||
| 290 | goto out_fail; | ||
| 291 | |||
| 292 | out: | ||
| 293 | work = list_entry(cur, struct btrfs_work, list); | ||
| 294 | |||
| 295 | out_fail: | ||
| 296 | return work; | ||
| 297 | } | ||
| 298 | |||
| 299 | /* | ||
| 300 | * main loop for servicing work items | ||
| 301 | */ | ||
| 302 | static int worker_loop(void *arg) | ||
| 303 | { | 236 | { |
| 304 | struct btrfs_worker_thread *worker = arg; | ||
| 305 | struct list_head head; | ||
| 306 | struct list_head prio_head; | ||
| 307 | struct btrfs_work *work; | 237 | struct btrfs_work *work; |
| 238 | struct __btrfs_workqueue *wq; | ||
| 239 | int need_order = 0; | ||
| 308 | 240 | ||
| 309 | INIT_LIST_HEAD(&head); | 241 | work = container_of(arg, struct btrfs_work, normal_work); |
| 310 | INIT_LIST_HEAD(&prio_head); | 242 | /* |
| 311 | 243 | * We should not touch things inside work in the following cases: | |
| 312 | do { | 244 | * 1) after work->func() if it has no ordered_free |
| 313 | again: | 245 | * Since the struct is freed in work->func(). |
| 314 | while (1) { | 246 | * 2) after setting WORK_DONE_BIT |
| 315 | 247 | * The work may be freed in other threads almost instantly. | |
| 316 | 248 | * So we save the needed things here. | |
| 317 | work = get_next_work(worker, &prio_head, &head); | 249 | */ |
| 318 | if (!work) | 250 | if (work->ordered_func) |
| 319 | break; | 251 | need_order = 1; |
| 320 | 252 | wq = work->wq; | |
| 321 | list_del(&work->list); | 253 | |
| 322 | clear_bit(WORK_QUEUED_BIT, &work->flags); | 254 | trace_btrfs_work_sched(work); |
| 323 | 255 | thresh_exec_hook(wq); | |
| 324 | work->worker = worker; | 256 | work->func(work); |
| 325 | 257 | if (need_order) { | |
| 326 | work->func(work); | 258 | set_bit(WORK_DONE_BIT, &work->flags); |
| 327 | 259 | run_ordered_work(wq); | |
| 328 | atomic_dec(&worker->num_pending); | ||
| 329 | /* | ||
| 330 | * unless this is an ordered work queue, | ||
| 331 | * 'work' was probably freed by func above. | ||
| 332 | */ | ||
| 333 | run_ordered_completions(worker->workers, work); | ||
| 334 | |||
| 335 | check_pending_worker_creates(worker); | ||
| 336 | cond_resched(); | ||
| 337 | } | ||
| 338 | |||
| 339 | spin_lock_irq(&worker->lock); | ||
| 340 | check_idle_worker(worker); | ||
| 341 | |||
| 342 | if (freezing(current)) { | ||
| 343 | worker->working = 0; | ||
| 344 | spin_unlock_irq(&worker->lock); | ||
| 345 | try_to_freeze(); | ||
| 346 | } else { | ||
| 347 | spin_unlock_irq(&worker->lock); | ||
| 348 | if (!kthread_should_stop()) { | ||
| 349 | cpu_relax(); | ||
| 350 | /* | ||
| 351 | * we've dropped the lock, did someone else | ||
| 352 | * jump_in? | ||
| 353 | */ | ||
| 354 | smp_mb(); | ||
| 355 | if (!list_empty(&worker->pending) || | ||
| 356 | !list_empty(&worker->prio_pending)) | ||
| 357 | continue; | ||
| 358 | |||
| 359 | /* | ||
| 360 | * this short schedule allows more work to | ||
| 361 | * come in without the queue functions | ||
| 362 | * needing to go through wake_up_process() | ||
| 363 | * | ||
| 364 | * worker->working is still 1, so nobody | ||
| 365 | * is going to try and wake us up | ||
| 366 | */ | ||
| 367 | schedule_timeout(1); | ||
| 368 | smp_mb(); | ||
| 369 | if (!list_empty(&worker->pending) || | ||
| 370 | !list_empty(&worker->prio_pending)) | ||
| 371 | continue; | ||
| 372 | |||
| 373 | if (kthread_should_stop()) | ||
| 374 | break; | ||
| 375 | |||
| 376 | /* still no more work?, sleep for real */ | ||
| 377 | spin_lock_irq(&worker->lock); | ||
| 378 | set_current_state(TASK_INTERRUPTIBLE); | ||
| 379 | if (!list_empty(&worker->pending) || | ||
| 380 | !list_empty(&worker->prio_pending)) { | ||
| 381 | spin_unlock_irq(&worker->lock); | ||
| 382 | set_current_state(TASK_RUNNING); | ||
| 383 | goto again; | ||
| 384 | } | ||
| 385 | |||
| 386 | /* | ||
| 387 | * this makes sure we get a wakeup when someone | ||
| 388 | * adds something new to the queue | ||
| 389 | */ | ||
| 390 | worker->working = 0; | ||
| 391 | spin_unlock_irq(&worker->lock); | ||
| 392 | |||
| 393 | if (!kthread_should_stop()) { | ||
| 394 | schedule_timeout(HZ * 120); | ||
| 395 | if (!worker->working && | ||
| 396 | try_worker_shutdown(worker)) { | ||
| 397 | return 0; | ||
| 398 | } | ||
| 399 | } | ||
| 400 | } | ||
| 401 | __set_current_state(TASK_RUNNING); | ||
| 402 | } | ||
| 403 | } while (!kthread_should_stop()); | ||
| 404 | return 0; | ||
| 405 | } | ||
| 406 | |||
| 407 | /* | ||
| 408 | * this will wait for all the worker threads to shutdown | ||
| 409 | */ | ||
| 410 | void btrfs_stop_workers(struct btrfs_workers *workers) | ||
| 411 | { | ||
| 412 | struct list_head *cur; | ||
| 413 | struct btrfs_worker_thread *worker; | ||
| 414 | int can_stop; | ||
| 415 | |||
| 416 | spin_lock_irq(&workers->lock); | ||
| 417 | workers->stopping = 1; | ||
| 418 | list_splice_init(&workers->idle_list, &workers->worker_list); | ||
| 419 | while (!list_empty(&workers->worker_list)) { | ||
| 420 | cur = workers->worker_list.next; | ||
| 421 | worker = list_entry(cur, struct btrfs_worker_thread, | ||
| 422 | worker_list); | ||
| 423 | |||
| 424 | atomic_inc(&worker->refs); | ||
| 425 | workers->num_workers -= 1; | ||
| 426 | if (!list_empty(&worker->worker_list)) { | ||
| 427 | list_del_init(&worker->worker_list); | ||
| 428 | put_worker(worker); | ||
| 429 | can_stop = 1; | ||
| 430 | } else | ||
| 431 | can_stop = 0; | ||
| 432 | spin_unlock_irq(&workers->lock); | ||
| 433 | if (can_stop) | ||
| 434 | kthread_stop(worker->task); | ||
| 435 | spin_lock_irq(&workers->lock); | ||
| 436 | put_worker(worker); | ||
| 437 | } | 260 | } |
| 438 | spin_unlock_irq(&workers->lock); | 261 | if (!need_order) |
| 262 | trace_btrfs_all_work_done(work); | ||
| 439 | } | 263 | } |
| 440 | 264 | ||
| 441 | /* | 265 | void btrfs_init_work(struct btrfs_work *work, |
| 442 | * simple init on struct btrfs_workers | 266 | btrfs_func_t func, |
| 443 | */ | 267 | btrfs_func_t ordered_func, |
| 444 | void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max, | 268 | btrfs_func_t ordered_free) |
| 445 | struct btrfs_workers *async_helper) | ||
| 446 | { | 269 | { |
| 447 | workers->num_workers = 0; | 270 | work->func = func; |
| 448 | workers->num_workers_starting = 0; | 271 | work->ordered_func = ordered_func; |
| 449 | INIT_LIST_HEAD(&workers->worker_list); | 272 | work->ordered_free = ordered_free; |
| 450 | INIT_LIST_HEAD(&workers->idle_list); | 273 | INIT_WORK(&work->normal_work, normal_work_helper); |
| 451 | INIT_LIST_HEAD(&workers->order_list); | 274 | INIT_LIST_HEAD(&work->ordered_list); |
| 452 | INIT_LIST_HEAD(&workers->prio_order_list); | 275 | work->flags = 0; |
| 453 | spin_lock_init(&workers->lock); | ||
| 454 | spin_lock_init(&workers->order_lock); | ||
| 455 | workers->max_workers = max; | ||
| 456 | workers->idle_thresh = 32; | ||
| 457 | workers->name = name; | ||
| 458 | workers->ordered = 0; | ||
| 459 | workers->atomic_start_pending = 0; | ||
| 460 | workers->atomic_worker_start = async_helper; | ||
| 461 | workers->stopping = 0; | ||
| 462 | } | 276 | } |
| 463 | 277 | ||
| 464 | /* | 278 | static inline void __btrfs_queue_work(struct __btrfs_workqueue *wq, |
| 465 | * starts new worker threads. This does not enforce the max worker | 279 | struct btrfs_work *work) |
| 466 | * count in case you need to temporarily go past it. | ||
| 467 | */ | ||
| 468 | static int __btrfs_start_workers(struct btrfs_workers *workers) | ||
| 469 | { | 280 | { |
| 470 | struct btrfs_worker_thread *worker; | 281 | unsigned long flags; |
| 471 | int ret = 0; | ||
| 472 | |||
| 473 | worker = kzalloc(sizeof(*worker), GFP_NOFS); | ||
| 474 | if (!worker) { | ||
| 475 | ret = -ENOMEM; | ||
| 476 | goto fail; | ||
| 477 | } | ||
| 478 | |||
| 479 | INIT_LIST_HEAD(&worker->pending); | ||
| 480 | INIT_LIST_HEAD(&worker->prio_pending); | ||
| 481 | INIT_LIST_HEAD(&worker->worker_list); | ||
| 482 | spin_lock_init(&worker->lock); | ||
| 483 | |||
| 484 | atomic_set(&worker->num_pending, 0); | ||
| 485 | atomic_set(&worker->refs, 1); | ||
| 486 | worker->workers = workers; | ||
| 487 | worker->task = kthread_create(worker_loop, worker, | ||
| 488 | "btrfs-%s-%d", workers->name, | ||
| 489 | workers->num_workers + 1); | ||
| 490 | if (IS_ERR(worker->task)) { | ||
| 491 | ret = PTR_ERR(worker->task); | ||
| 492 | goto fail; | ||
| 493 | } | ||
| 494 | 282 | ||
| 495 | spin_lock_irq(&workers->lock); | 283 | work->wq = wq; |
| 496 | if (workers->stopping) { | 284 | thresh_queue_hook(wq); |
| 497 | spin_unlock_irq(&workers->lock); | 285 | if (work->ordered_func) { |
| 498 | ret = -EINVAL; | 286 | spin_lock_irqsave(&wq->list_lock, flags); |
| 499 | goto fail_kthread; | 287 | list_add_tail(&work->ordered_list, &wq->ordered_list); |
| 288 | spin_unlock_irqrestore(&wq->list_lock, flags); | ||
| 500 | } | 289 | } |
| 501 | list_add_tail(&worker->worker_list, &workers->idle_list); | 290 | queue_work(wq->normal_wq, &work->normal_work); |
| 502 | worker->idle = 1; | 291 | trace_btrfs_work_queued(work); |
| 503 | workers->num_workers++; | ||
| 504 | workers->num_workers_starting--; | ||
| 505 | WARN_ON(workers->num_workers_starting < 0); | ||
| 506 | spin_unlock_irq(&workers->lock); | ||
| 507 | |||
| 508 | wake_up_process(worker->task); | ||
| 509 | return 0; | ||
| 510 | |||
| 511 | fail_kthread: | ||
| 512 | kthread_stop(worker->task); | ||
| 513 | fail: | ||
| 514 | kfree(worker); | ||
| 515 | spin_lock_irq(&workers->lock); | ||
| 516 | workers->num_workers_starting--; | ||
| 517 | spin_unlock_irq(&workers->lock); | ||
| 518 | return ret; | ||
| 519 | } | 292 | } |
| 520 | 293 | ||
| 521 | int btrfs_start_workers(struct btrfs_workers *workers) | 294 | void btrfs_queue_work(struct btrfs_workqueue *wq, |
| 295 | struct btrfs_work *work) | ||
| 522 | { | 296 | { |
| 523 | spin_lock_irq(&workers->lock); | 297 | struct __btrfs_workqueue *dest_wq; |
| 524 | workers->num_workers_starting++; | ||
| 525 | spin_unlock_irq(&workers->lock); | ||
| 526 | return __btrfs_start_workers(workers); | ||
| 527 | } | ||
| 528 | |||
| 529 | /* | ||
| 530 | * run through the list and find a worker thread that doesn't have a lot | ||
| 531 | * to do right now. This can return null if we aren't yet at the thread | ||
| 532 | * count limit and all of the threads are busy. | ||
| 533 | */ | ||
| 534 | static struct btrfs_worker_thread *next_worker(struct btrfs_workers *workers) | ||
| 535 | { | ||
| 536 | struct btrfs_worker_thread *worker; | ||
| 537 | struct list_head *next; | ||
| 538 | int enforce_min; | ||
| 539 | |||
| 540 | enforce_min = (workers->num_workers + workers->num_workers_starting) < | ||
| 541 | workers->max_workers; | ||
| 542 | |||
| 543 | /* | ||
| 544 | * if we find an idle thread, don't move it to the end of the | ||
| 545 | * idle list. This improves the chance that the next submission | ||
| 546 | * will reuse the same thread, and maybe catch it while it is still | ||
| 547 | * working | ||
| 548 | */ | ||
| 549 | if (!list_empty(&workers->idle_list)) { | ||
| 550 | next = workers->idle_list.next; | ||
| 551 | worker = list_entry(next, struct btrfs_worker_thread, | ||
| 552 | worker_list); | ||
| 553 | return worker; | ||
| 554 | } | ||
| 555 | if (enforce_min || list_empty(&workers->worker_list)) | ||
| 556 | return NULL; | ||
| 557 | |||
| 558 | /* | ||
| 559 | * if we pick a busy task, move the task to the end of the list. | ||
| 560 | * hopefully this will keep things somewhat evenly balanced. | ||
| 561 | * Do the move in batches based on the sequence number. This groups | ||
| 562 | * requests submitted at roughly the same time onto the same worker. | ||
| 563 | */ | ||
| 564 | next = workers->worker_list.next; | ||
| 565 | worker = list_entry(next, struct btrfs_worker_thread, worker_list); | ||
| 566 | worker->sequence++; | ||
| 567 | 298 | ||
| 568 | if (worker->sequence % workers->idle_thresh == 0) | 299 | if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags) && wq->high) |
| 569 | list_move_tail(next, &workers->worker_list); | 300 | dest_wq = wq->high; |
| 570 | return worker; | 301 | else |
| 302 | dest_wq = wq->normal; | ||
| 303 | __btrfs_queue_work(dest_wq, work); | ||
| 571 | } | 304 | } |
| 572 | 305 | ||
| 573 | /* | 306 | static inline void |
| 574 | * selects a worker thread to take the next job. This will either find | 307 | __btrfs_destroy_workqueue(struct __btrfs_workqueue *wq) |
| 575 | * an idle worker, start a new worker up to the max count, or just return | ||
| 576 | * one of the existing busy workers. | ||
| 577 | */ | ||
| 578 | static struct btrfs_worker_thread *find_worker(struct btrfs_workers *workers) | ||
| 579 | { | 308 | { |
| 580 | struct btrfs_worker_thread *worker; | 309 | destroy_workqueue(wq->normal_wq); |
| 581 | unsigned long flags; | 310 | trace_btrfs_workqueue_destroy(wq); |
| 582 | struct list_head *fallback; | 311 | kfree(wq); |
| 583 | int ret; | ||
| 584 | |||
| 585 | spin_lock_irqsave(&workers->lock, flags); | ||
| 586 | again: | ||
| 587 | worker = next_worker(workers); | ||
| 588 | |||
| 589 | if (!worker) { | ||
| 590 | if (workers->num_workers + workers->num_workers_starting >= | ||
| 591 | workers->max_workers) { | ||
| 592 | goto fallback; | ||
| 593 | } else if (workers->atomic_worker_start) { | ||
| 594 | workers->atomic_start_pending = 1; | ||
| 595 | goto fallback; | ||
| 596 | } else { | ||
| 597 | workers->num_workers_starting++; | ||
| 598 | spin_unlock_irqrestore(&workers->lock, flags); | ||
| 599 | /* we're below the limit, start another worker */ | ||
| 600 | ret = __btrfs_start_workers(workers); | ||
| 601 | spin_lock_irqsave(&workers->lock, flags); | ||
| 602 | if (ret) | ||
| 603 | goto fallback; | ||
| 604 | goto again; | ||
| 605 | } | ||
| 606 | } | ||
| 607 | goto found; | ||
| 608 | |||
| 609 | fallback: | ||
| 610 | fallback = NULL; | ||
| 611 | /* | ||
| 612 | * we have failed to find any workers, just | ||
| 613 | * return the first one we can find. | ||
| 614 | */ | ||
| 615 | if (!list_empty(&workers->worker_list)) | ||
| 616 | fallback = workers->worker_list.next; | ||
| 617 | if (!list_empty(&workers->idle_list)) | ||
| 618 | fallback = workers->idle_list.next; | ||
| 619 | BUG_ON(!fallback); | ||
| 620 | worker = list_entry(fallback, | ||
| 621 | struct btrfs_worker_thread, worker_list); | ||
| 622 | found: | ||
| 623 | /* | ||
| 624 | * this makes sure the worker doesn't exit before it is placed | ||
| 625 | * onto a busy/idle list | ||
| 626 | */ | ||
| 627 | atomic_inc(&worker->num_pending); | ||
| 628 | spin_unlock_irqrestore(&workers->lock, flags); | ||
| 629 | return worker; | ||
| 630 | } | 312 | } |
| 631 | 313 | ||
| 632 | /* | 314 | void btrfs_destroy_workqueue(struct btrfs_workqueue *wq) |
| 633 | * btrfs_requeue_work just puts the work item back on the tail of the list | ||
| 634 | * it was taken from. It is intended for use with long running work functions | ||
| 635 | * that make some progress and want to give the cpu up for others. | ||
| 636 | */ | ||
| 637 | void btrfs_requeue_work(struct btrfs_work *work) | ||
| 638 | { | 315 | { |
| 639 | struct btrfs_worker_thread *worker = work->worker; | 316 | if (!wq) |
| 640 | unsigned long flags; | ||
| 641 | int wake = 0; | ||
| 642 | |||
| 643 | if (test_and_set_bit(WORK_QUEUED_BIT, &work->flags)) | ||
| 644 | return; | 317 | return; |
| 645 | 318 | if (wq->high) | |
| 646 | spin_lock_irqsave(&worker->lock, flags); | 319 | __btrfs_destroy_workqueue(wq->high); |
| 647 | if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags)) | 320 | __btrfs_destroy_workqueue(wq->normal); |
| 648 | list_add_tail(&work->list, &worker->prio_pending); | 321 | kfree(wq); |
| 649 | else | ||
| 650 | list_add_tail(&work->list, &worker->pending); | ||
| 651 | atomic_inc(&worker->num_pending); | ||
| 652 | |||
| 653 | /* by definition we're busy, take ourselves off the idle | ||
| 654 | * list | ||
| 655 | */ | ||
| 656 | if (worker->idle) { | ||
| 657 | spin_lock(&worker->workers->lock); | ||
| 658 | worker->idle = 0; | ||
| 659 | list_move_tail(&worker->worker_list, | ||
| 660 | &worker->workers->worker_list); | ||
| 661 | spin_unlock(&worker->workers->lock); | ||
| 662 | } | ||
| 663 | if (!worker->working) { | ||
| 664 | wake = 1; | ||
| 665 | worker->working = 1; | ||
| 666 | } | ||
| 667 | |||
| 668 | if (wake) | ||
| 669 | wake_up_process(worker->task); | ||
| 670 | spin_unlock_irqrestore(&worker->lock, flags); | ||
| 671 | } | 322 | } |
| 672 | 323 | ||
| 673 | void btrfs_set_work_high_prio(struct btrfs_work *work) | 324 | void btrfs_workqueue_set_max(struct btrfs_workqueue *wq, int max) |
| 674 | { | 325 | { |
| 675 | set_bit(WORK_HIGH_PRIO_BIT, &work->flags); | 326 | wq->normal->max_active = max; |
| 327 | if (wq->high) | ||
| 328 | wq->high->max_active = max; | ||
| 676 | } | 329 | } |
| 677 | 330 | ||
| 678 | /* | 331 | void btrfs_set_work_high_priority(struct btrfs_work *work) |
| 679 | * places a struct btrfs_work into the pending queue of one of the kthreads | ||
| 680 | */ | ||
| 681 | void btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work) | ||
| 682 | { | 332 | { |
| 683 | struct btrfs_worker_thread *worker; | 333 | set_bit(WORK_HIGH_PRIO_BIT, &work->flags); |
| 684 | unsigned long flags; | ||
| 685 | int wake = 0; | ||
| 686 | |||
| 687 | /* don't requeue something already on a list */ | ||
| 688 | if (test_and_set_bit(WORK_QUEUED_BIT, &work->flags)) | ||
| 689 | return; | ||
| 690 | |||
| 691 | worker = find_worker(workers); | ||
| 692 | if (workers->ordered) { | ||
| 693 | /* | ||
| 694 | * you're not allowed to do ordered queues from an | ||
| 695 | * interrupt handler | ||
| 696 | */ | ||
| 697 | spin_lock(&workers->order_lock); | ||
| 698 | if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags)) { | ||
| 699 | list_add_tail(&work->order_list, | ||
| 700 | &workers->prio_order_list); | ||
| 701 | } else { | ||
| 702 | list_add_tail(&work->order_list, &workers->order_list); | ||
| 703 | } | ||
| 704 | spin_unlock(&workers->order_lock); | ||
| 705 | } else { | ||
| 706 | INIT_LIST_HEAD(&work->order_list); | ||
| 707 | } | ||
| 708 | |||
| 709 | spin_lock_irqsave(&worker->lock, flags); | ||
| 710 | |||
| 711 | if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags)) | ||
| 712 | list_add_tail(&work->list, &worker->prio_pending); | ||
| 713 | else | ||
| 714 | list_add_tail(&work->list, &worker->pending); | ||
| 715 | check_busy_worker(worker); | ||
| 716 | |||
| 717 | /* | ||
| 718 | * avoid calling into wake_up_process if this thread has already | ||
| 719 | * been kicked | ||
| 720 | */ | ||
| 721 | if (!worker->working) | ||
| 722 | wake = 1; | ||
| 723 | worker->working = 1; | ||
| 724 | |||
| 725 | if (wake) | ||
| 726 | wake_up_process(worker->task); | ||
| 727 | spin_unlock_irqrestore(&worker->lock, flags); | ||
| 728 | } | 334 | } |
diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h index 1f26792683ed..9c6b66d15fb0 100644 --- a/fs/btrfs/async-thread.h +++ b/fs/btrfs/async-thread.h | |||
| @@ -1,5 +1,6 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * Copyright (C) 2007 Oracle. All rights reserved. | 2 | * Copyright (C) 2007 Oracle. All rights reserved. |
| 3 | * Copyright (C) 2014 Fujitsu. All rights reserved. | ||
| 3 | * | 4 | * |
| 4 | * This program is free software; you can redistribute it and/or | 5 | * This program is free software; you can redistribute it and/or |
| 5 | * modify it under the terms of the GNU General Public | 6 | * modify it under the terms of the GNU General Public |
| @@ -19,103 +20,35 @@ | |||
| 19 | #ifndef __BTRFS_ASYNC_THREAD_ | 20 | #ifndef __BTRFS_ASYNC_THREAD_ |
| 20 | #define __BTRFS_ASYNC_THREAD_ | 21 | #define __BTRFS_ASYNC_THREAD_ |
| 21 | 22 | ||
| 22 | struct btrfs_worker_thread; | 23 | struct btrfs_workqueue; |
| 24 | /* Internal use only */ | ||
| 25 | struct __btrfs_workqueue; | ||
| 26 | struct btrfs_work; | ||
| 27 | typedef void (*btrfs_func_t)(struct btrfs_work *arg); | ||
| 23 | 28 | ||
| 24 | /* | ||
| 25 | * This is similar to a workqueue, but it is meant to spread the operations | ||
| 26 | * across all available cpus instead of just the CPU that was used to | ||
| 27 | * queue the work. There is also some batching introduced to try and | ||
| 28 | * cut down on context switches. | ||
| 29 | * | ||
| 30 | * By default threads are added on demand up to 2 * the number of cpus. | ||
| 31 | * Changing struct btrfs_workers->max_workers is one way to prevent | ||
| 32 | * demand creation of kthreads. | ||
| 33 | * | ||
| 34 | * the basic model of these worker threads is to embed a btrfs_work | ||
| 35 | * structure in your own data struct, and use container_of in a | ||
| 36 | * work function to get back to your data struct. | ||
| 37 | */ | ||
| 38 | struct btrfs_work { | 29 | struct btrfs_work { |
| 39 | /* | 30 | btrfs_func_t func; |
| 40 | * func should be set to the function you want called | 31 | btrfs_func_t ordered_func; |
| 41 | * your work struct is passed as the only arg | 32 | btrfs_func_t ordered_free; |
| 42 | * | 33 | |
| 43 | * ordered_func must be set for work sent to an ordered work queue, | 34 | /* Don't touch things below */ |
| 44 | * and it is called to complete a given work item in the same | 35 | struct work_struct normal_work; |
| 45 | * order they were sent to the queue. | 36 | struct list_head ordered_list; |
| 46 | */ | 37 | struct __btrfs_workqueue *wq; |
| 47 | void (*func)(struct btrfs_work *work); | ||
| 48 | void (*ordered_func)(struct btrfs_work *work); | ||
| 49 | void (*ordered_free)(struct btrfs_work *work); | ||
| 50 | |||
| 51 | /* | ||
| 52 | * flags should be set to zero. It is used to make sure the | ||
| 53 | * struct is only inserted once into the list. | ||
| 54 | */ | ||
| 55 | unsigned long flags; | 38 | unsigned long flags; |
| 56 | |||
| 57 | /* don't touch these */ | ||
| 58 | struct btrfs_worker_thread *worker; | ||
| 59 | struct list_head list; | ||
| 60 | struct list_head order_list; | ||
| 61 | }; | ||
| 62 | |||
| 63 | struct btrfs_workers { | ||
| 64 | /* current number of running workers */ | ||
| 65 | int num_workers; | ||
| 66 | |||
| 67 | int num_workers_starting; | ||
| 68 | |||
| 69 | /* max number of workers allowed. changed by btrfs_start_workers */ | ||
| 70 | int max_workers; | ||
| 71 | |||
| 72 | /* once a worker has this many requests or fewer, it is idle */ | ||
| 73 | int idle_thresh; | ||
| 74 | |||
| 75 | /* force completions in the order they were queued */ | ||
| 76 | int ordered; | ||
| 77 | |||
| 78 | /* more workers required, but in an interrupt handler */ | ||
| 79 | int atomic_start_pending; | ||
| 80 | |||
| 81 | /* | ||
| 82 | * are we allowed to sleep while starting workers or are we required | ||
| 83 | * to start them at a later time? If we can't sleep, this indicates | ||
| 84 | * which queue we need to use to schedule thread creation. | ||
| 85 | */ | ||
| 86 | struct btrfs_workers *atomic_worker_start; | ||
| 87 | |||
| 88 | /* list with all the work threads. The workers on the idle thread | ||
| 89 | * may be actively servicing jobs, but they haven't yet hit the | ||
| 90 | * idle thresh limit above. | ||
| 91 | */ | ||
| 92 | struct list_head worker_list; | ||
| 93 | struct list_head idle_list; | ||
| 94 | |||
| 95 | /* | ||
| 96 | * when operating in ordered mode, this maintains the list | ||
| 97 | * of work items waiting for completion | ||
| 98 | */ | ||
| 99 | struct list_head order_list; | ||
| 100 | struct list_head prio_order_list; | ||
| 101 | |||
| 102 | /* lock for finding the next worker thread to queue on */ | ||
| 103 | spinlock_t lock; | ||
| 104 | |||
| 105 | /* lock for the ordered lists */ | ||
| 106 | spinlock_t order_lock; | ||
| 107 | |||
| 108 | /* extra name for this worker, used for current->name */ | ||
| 109 | char *name; | ||
| 110 | |||
| 111 | int stopping; | ||
| 112 | }; | 39 | }; |
| 113 | 40 | ||
| 114 | void btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work); | 41 | struct btrfs_workqueue *btrfs_alloc_workqueue(const char *name, |
| 115 | int btrfs_start_workers(struct btrfs_workers *workers); | 42 | int flags, |
| 116 | void btrfs_stop_workers(struct btrfs_workers *workers); | 43 | int max_active, |
| 117 | void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max, | 44 | int thresh); |
| 118 | struct btrfs_workers *async_starter); | 45 | void btrfs_init_work(struct btrfs_work *work, |
| 119 | void btrfs_requeue_work(struct btrfs_work *work); | 46 | btrfs_func_t func, |
| 120 | void btrfs_set_work_high_prio(struct btrfs_work *work); | 47 | btrfs_func_t ordered_func, |
| 48 | btrfs_func_t ordered_free); | ||
| 49 | void btrfs_queue_work(struct btrfs_workqueue *wq, | ||
| 50 | struct btrfs_work *work); | ||
| 51 | void btrfs_destroy_workqueue(struct btrfs_workqueue *wq); | ||
| 52 | void btrfs_workqueue_set_max(struct btrfs_workqueue *wq, int max); | ||
| 53 | void btrfs_set_work_high_priority(struct btrfs_work *work); | ||
| 121 | #endif | 54 | #endif |
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index aded3ef3d3d4..aad7201ad11b 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c | |||
| @@ -220,7 +220,8 @@ static int __add_prelim_ref(struct list_head *head, u64 root_id, | |||
| 220 | 220 | ||
| 221 | static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path, | 221 | static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path, |
| 222 | struct ulist *parents, struct __prelim_ref *ref, | 222 | struct ulist *parents, struct __prelim_ref *ref, |
| 223 | int level, u64 time_seq, const u64 *extent_item_pos) | 223 | int level, u64 time_seq, const u64 *extent_item_pos, |
| 224 | u64 total_refs) | ||
| 224 | { | 225 | { |
| 225 | int ret = 0; | 226 | int ret = 0; |
| 226 | int slot; | 227 | int slot; |
| @@ -249,7 +250,7 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path, | |||
| 249 | if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) | 250 | if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) |
| 250 | ret = btrfs_next_old_leaf(root, path, time_seq); | 251 | ret = btrfs_next_old_leaf(root, path, time_seq); |
| 251 | 252 | ||
| 252 | while (!ret && count < ref->count) { | 253 | while (!ret && count < total_refs) { |
| 253 | eb = path->nodes[0]; | 254 | eb = path->nodes[0]; |
| 254 | slot = path->slots[0]; | 255 | slot = path->slots[0]; |
| 255 | 256 | ||
| @@ -306,7 +307,7 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info, | |||
| 306 | struct btrfs_path *path, u64 time_seq, | 307 | struct btrfs_path *path, u64 time_seq, |
| 307 | struct __prelim_ref *ref, | 308 | struct __prelim_ref *ref, |
| 308 | struct ulist *parents, | 309 | struct ulist *parents, |
| 309 | const u64 *extent_item_pos) | 310 | const u64 *extent_item_pos, u64 total_refs) |
| 310 | { | 311 | { |
| 311 | struct btrfs_root *root; | 312 | struct btrfs_root *root; |
| 312 | struct btrfs_key root_key; | 313 | struct btrfs_key root_key; |
| @@ -361,7 +362,7 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info, | |||
| 361 | } | 362 | } |
| 362 | 363 | ||
| 363 | ret = add_all_parents(root, path, parents, ref, level, time_seq, | 364 | ret = add_all_parents(root, path, parents, ref, level, time_seq, |
| 364 | extent_item_pos); | 365 | extent_item_pos, total_refs); |
| 365 | out: | 366 | out: |
| 366 | path->lowest_level = 0; | 367 | path->lowest_level = 0; |
| 367 | btrfs_release_path(path); | 368 | btrfs_release_path(path); |
| @@ -374,7 +375,7 @@ out: | |||
| 374 | static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info, | 375 | static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info, |
| 375 | struct btrfs_path *path, u64 time_seq, | 376 | struct btrfs_path *path, u64 time_seq, |
| 376 | struct list_head *head, | 377 | struct list_head *head, |
| 377 | const u64 *extent_item_pos) | 378 | const u64 *extent_item_pos, u64 total_refs) |
| 378 | { | 379 | { |
| 379 | int err; | 380 | int err; |
| 380 | int ret = 0; | 381 | int ret = 0; |
| @@ -400,7 +401,8 @@ static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info, | |||
| 400 | if (ref->count == 0) | 401 | if (ref->count == 0) |
| 401 | continue; | 402 | continue; |
| 402 | err = __resolve_indirect_ref(fs_info, path, time_seq, ref, | 403 | err = __resolve_indirect_ref(fs_info, path, time_seq, ref, |
| 403 | parents, extent_item_pos); | 404 | parents, extent_item_pos, |
| 405 | total_refs); | ||
| 404 | /* | 406 | /* |
| 405 | * we can only tolerate ENOENT,otherwise,we should catch error | 407 | * we can only tolerate ENOENT,otherwise,we should catch error |
| 406 | * and return directly. | 408 | * and return directly. |
| @@ -557,7 +559,7 @@ static void __merge_refs(struct list_head *head, int mode) | |||
| 557 | * smaller or equal that seq to the list | 559 | * smaller or equal that seq to the list |
| 558 | */ | 560 | */ |
| 559 | static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, | 561 | static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, |
| 560 | struct list_head *prefs) | 562 | struct list_head *prefs, u64 *total_refs) |
| 561 | { | 563 | { |
| 562 | struct btrfs_delayed_extent_op *extent_op = head->extent_op; | 564 | struct btrfs_delayed_extent_op *extent_op = head->extent_op; |
| 563 | struct rb_node *n = &head->node.rb_node; | 565 | struct rb_node *n = &head->node.rb_node; |
| @@ -593,6 +595,7 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, | |||
| 593 | default: | 595 | default: |
| 594 | BUG_ON(1); | 596 | BUG_ON(1); |
| 595 | } | 597 | } |
| 598 | *total_refs += (node->ref_mod * sgn); | ||
| 596 | switch (node->type) { | 599 | switch (node->type) { |
| 597 | case BTRFS_TREE_BLOCK_REF_KEY: { | 600 | case BTRFS_TREE_BLOCK_REF_KEY: { |
| 598 | struct btrfs_delayed_tree_ref *ref; | 601 | struct btrfs_delayed_tree_ref *ref; |
| @@ -653,7 +656,8 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, | |||
| 653 | */ | 656 | */ |
| 654 | static int __add_inline_refs(struct btrfs_fs_info *fs_info, | 657 | static int __add_inline_refs(struct btrfs_fs_info *fs_info, |
| 655 | struct btrfs_path *path, u64 bytenr, | 658 | struct btrfs_path *path, u64 bytenr, |
| 656 | int *info_level, struct list_head *prefs) | 659 | int *info_level, struct list_head *prefs, |
| 660 | u64 *total_refs) | ||
| 657 | { | 661 | { |
| 658 | int ret = 0; | 662 | int ret = 0; |
| 659 | int slot; | 663 | int slot; |
| @@ -677,6 +681,7 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info, | |||
| 677 | 681 | ||
| 678 | ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item); | 682 | ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item); |
| 679 | flags = btrfs_extent_flags(leaf, ei); | 683 | flags = btrfs_extent_flags(leaf, ei); |
| 684 | *total_refs += btrfs_extent_refs(leaf, ei); | ||
| 680 | btrfs_item_key_to_cpu(leaf, &found_key, slot); | 685 | btrfs_item_key_to_cpu(leaf, &found_key, slot); |
| 681 | 686 | ||
| 682 | ptr = (unsigned long)(ei + 1); | 687 | ptr = (unsigned long)(ei + 1); |
| @@ -859,6 +864,7 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans, | |||
| 859 | struct list_head prefs; | 864 | struct list_head prefs; |
| 860 | struct __prelim_ref *ref; | 865 | struct __prelim_ref *ref; |
| 861 | struct extent_inode_elem *eie = NULL; | 866 | struct extent_inode_elem *eie = NULL; |
| 867 | u64 total_refs = 0; | ||
| 862 | 868 | ||
| 863 | INIT_LIST_HEAD(&prefs); | 869 | INIT_LIST_HEAD(&prefs); |
| 864 | INIT_LIST_HEAD(&prefs_delayed); | 870 | INIT_LIST_HEAD(&prefs_delayed); |
| @@ -873,8 +879,10 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans, | |||
| 873 | path = btrfs_alloc_path(); | 879 | path = btrfs_alloc_path(); |
| 874 | if (!path) | 880 | if (!path) |
| 875 | return -ENOMEM; | 881 | return -ENOMEM; |
| 876 | if (!trans) | 882 | if (!trans) { |
| 877 | path->search_commit_root = 1; | 883 | path->search_commit_root = 1; |
| 884 | path->skip_locking = 1; | ||
| 885 | } | ||
| 878 | 886 | ||
| 879 | /* | 887 | /* |
| 880 | * grab both a lock on the path and a lock on the delayed ref head. | 888 | * grab both a lock on the path and a lock on the delayed ref head. |
| @@ -915,7 +923,7 @@ again: | |||
| 915 | } | 923 | } |
| 916 | spin_unlock(&delayed_refs->lock); | 924 | spin_unlock(&delayed_refs->lock); |
| 917 | ret = __add_delayed_refs(head, time_seq, | 925 | ret = __add_delayed_refs(head, time_seq, |
| 918 | &prefs_delayed); | 926 | &prefs_delayed, &total_refs); |
| 919 | mutex_unlock(&head->mutex); | 927 | mutex_unlock(&head->mutex); |
| 920 | if (ret) | 928 | if (ret) |
| 921 | goto out; | 929 | goto out; |
| @@ -936,7 +944,8 @@ again: | |||
| 936 | (key.type == BTRFS_EXTENT_ITEM_KEY || | 944 | (key.type == BTRFS_EXTENT_ITEM_KEY || |
| 937 | key.type == BTRFS_METADATA_ITEM_KEY)) { | 945 | key.type == BTRFS_METADATA_ITEM_KEY)) { |
| 938 | ret = __add_inline_refs(fs_info, path, bytenr, | 946 | ret = __add_inline_refs(fs_info, path, bytenr, |
| 939 | &info_level, &prefs); | 947 | &info_level, &prefs, |
| 948 | &total_refs); | ||
| 940 | if (ret) | 949 | if (ret) |
| 941 | goto out; | 950 | goto out; |
| 942 | ret = __add_keyed_refs(fs_info, path, bytenr, | 951 | ret = __add_keyed_refs(fs_info, path, bytenr, |
| @@ -956,7 +965,7 @@ again: | |||
| 956 | __merge_refs(&prefs, 1); | 965 | __merge_refs(&prefs, 1); |
| 957 | 966 | ||
| 958 | ret = __resolve_indirect_refs(fs_info, path, time_seq, &prefs, | 967 | ret = __resolve_indirect_refs(fs_info, path, time_seq, &prefs, |
| 959 | extent_item_pos); | 968 | extent_item_pos, total_refs); |
| 960 | if (ret) | 969 | if (ret) |
| 961 | goto out; | 970 | goto out; |
| 962 | 971 | ||
| @@ -965,7 +974,7 @@ again: | |||
| 965 | while (!list_empty(&prefs)) { | 974 | while (!list_empty(&prefs)) { |
| 966 | ref = list_first_entry(&prefs, struct __prelim_ref, list); | 975 | ref = list_first_entry(&prefs, struct __prelim_ref, list); |
| 967 | WARN_ON(ref->count < 0); | 976 | WARN_ON(ref->count < 0); |
| 968 | if (ref->count && ref->root_id && ref->parent == 0) { | 977 | if (roots && ref->count && ref->root_id && ref->parent == 0) { |
| 969 | /* no parent == root of tree */ | 978 | /* no parent == root of tree */ |
| 970 | ret = ulist_add(roots, ref->root_id, 0, GFP_NOFS); | 979 | ret = ulist_add(roots, ref->root_id, 0, GFP_NOFS); |
| 971 | if (ret < 0) | 980 | if (ret < 0) |
| @@ -1061,22 +1070,14 @@ static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans, | |||
| 1061 | u64 time_seq, struct ulist **leafs, | 1070 | u64 time_seq, struct ulist **leafs, |
| 1062 | const u64 *extent_item_pos) | 1071 | const u64 *extent_item_pos) |
| 1063 | { | 1072 | { |
| 1064 | struct ulist *tmp; | ||
| 1065 | int ret; | 1073 | int ret; |
| 1066 | 1074 | ||
| 1067 | tmp = ulist_alloc(GFP_NOFS); | ||
| 1068 | if (!tmp) | ||
| 1069 | return -ENOMEM; | ||
| 1070 | *leafs = ulist_alloc(GFP_NOFS); | 1075 | *leafs = ulist_alloc(GFP_NOFS); |
| 1071 | if (!*leafs) { | 1076 | if (!*leafs) |
| 1072 | ulist_free(tmp); | ||
| 1073 | return -ENOMEM; | 1077 | return -ENOMEM; |
| 1074 | } | ||
| 1075 | 1078 | ||
| 1076 | ret = find_parent_nodes(trans, fs_info, bytenr, | 1079 | ret = find_parent_nodes(trans, fs_info, bytenr, |
| 1077 | time_seq, *leafs, tmp, extent_item_pos); | 1080 | time_seq, *leafs, NULL, extent_item_pos); |
| 1078 | ulist_free(tmp); | ||
| 1079 | |||
| 1080 | if (ret < 0 && ret != -ENOENT) { | 1081 | if (ret < 0 && ret != -ENOENT) { |
| 1081 | free_leaf_list(*leafs); | 1082 | free_leaf_list(*leafs); |
| 1082 | return ret; | 1083 | return ret; |
| @@ -1333,38 +1334,13 @@ int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical, | |||
| 1333 | if (ret < 0) | 1334 | if (ret < 0) |
| 1334 | return ret; | 1335 | return ret; |
| 1335 | 1336 | ||
| 1336 | while (1) { | 1337 | ret = btrfs_previous_extent_item(fs_info->extent_root, path, 0); |
| 1337 | u32 nritems; | 1338 | if (ret) { |
| 1338 | if (path->slots[0] == 0) { | 1339 | if (ret > 0) |
| 1339 | btrfs_set_path_blocking(path); | 1340 | ret = -ENOENT; |
| 1340 | ret = btrfs_prev_leaf(fs_info->extent_root, path); | 1341 | return ret; |
| 1341 | if (ret != 0) { | ||
| 1342 | if (ret > 0) { | ||
| 1343 | pr_debug("logical %llu is not within " | ||
| 1344 | "any extent\n", logical); | ||
| 1345 | ret = -ENOENT; | ||
| 1346 | } | ||
| 1347 | return ret; | ||
| 1348 | } | ||
| 1349 | } else { | ||
| 1350 | path->slots[0]--; | ||
| 1351 | } | ||
| 1352 | nritems = btrfs_header_nritems(path->nodes[0]); | ||
| 1353 | if (nritems == 0) { | ||
| 1354 | pr_debug("logical %llu is not within any extent\n", | ||
| 1355 | logical); | ||
| 1356 | return -ENOENT; | ||
| 1357 | } | ||
| 1358 | if (path->slots[0] == nritems) | ||
| 1359 | path->slots[0]--; | ||
| 1360 | |||
| 1361 | btrfs_item_key_to_cpu(path->nodes[0], found_key, | ||
| 1362 | path->slots[0]); | ||
| 1363 | if (found_key->type == BTRFS_EXTENT_ITEM_KEY || | ||
| 1364 | found_key->type == BTRFS_METADATA_ITEM_KEY) | ||
| 1365 | break; | ||
| 1366 | } | 1342 | } |
| 1367 | 1343 | btrfs_item_key_to_cpu(path->nodes[0], found_key, path->slots[0]); | |
| 1368 | if (found_key->type == BTRFS_METADATA_ITEM_KEY) | 1344 | if (found_key->type == BTRFS_METADATA_ITEM_KEY) |
| 1369 | size = fs_info->extent_root->leafsize; | 1345 | size = fs_info->extent_root->leafsize; |
| 1370 | else if (found_key->type == BTRFS_EXTENT_ITEM_KEY) | 1346 | else if (found_key->type == BTRFS_EXTENT_ITEM_KEY) |
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 8fed2125689e..c9a24444ec9a 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h | |||
| @@ -109,14 +109,17 @@ struct btrfs_inode { | |||
| 109 | u64 last_trans; | 109 | u64 last_trans; |
| 110 | 110 | ||
| 111 | /* | 111 | /* |
| 112 | * log transid when this inode was last modified | 112 | * transid that last logged this inode |
| 113 | */ | 113 | */ |
| 114 | u64 last_sub_trans; | 114 | u64 logged_trans; |
| 115 | 115 | ||
| 116 | /* | 116 | /* |
| 117 | * transid that last logged this inode | 117 | * log transid when this inode was last modified |
| 118 | */ | 118 | */ |
| 119 | u64 logged_trans; | 119 | int last_sub_trans; |
| 120 | |||
| 121 | /* a local copy of root's last_log_commit */ | ||
| 122 | int last_log_commit; | ||
| 120 | 123 | ||
| 121 | /* total number of bytes pending delalloc, used by stat to calc the | 124 | /* total number of bytes pending delalloc, used by stat to calc the |
| 122 | * real block usage of the file | 125 | * real block usage of the file |
| @@ -155,9 +158,6 @@ struct btrfs_inode { | |||
| 155 | /* flags field from the on disk inode */ | 158 | /* flags field from the on disk inode */ |
| 156 | u32 flags; | 159 | u32 flags; |
| 157 | 160 | ||
| 158 | /* a local copy of root's last_log_commit */ | ||
| 159 | unsigned long last_log_commit; | ||
| 160 | |||
| 161 | /* | 161 | /* |
| 162 | * Counters to keep track of the number of extent item's we may use due | 162 | * Counters to keep track of the number of extent item's we may use due |
| 163 | * to delalloc and such. outstanding_extents is the number of extent | 163 | * to delalloc and such. outstanding_extents is the number of extent |
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index b01fb6c527e3..d43c544d3b68 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c | |||
| @@ -472,7 +472,7 @@ static noinline int add_ra_bio_pages(struct inode *inode, | |||
| 472 | rcu_read_lock(); | 472 | rcu_read_lock(); |
| 473 | page = radix_tree_lookup(&mapping->page_tree, pg_index); | 473 | page = radix_tree_lookup(&mapping->page_tree, pg_index); |
| 474 | rcu_read_unlock(); | 474 | rcu_read_unlock(); |
| 475 | if (page) { | 475 | if (page && !radix_tree_exceptional_entry(page)) { |
| 476 | misses++; | 476 | misses++; |
| 477 | if (misses > 4) | 477 | if (misses > 4) |
| 478 | break; | 478 | break; |
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index cbd3a7d6fa68..88d1b1eedc9c 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c | |||
| @@ -5376,6 +5376,8 @@ int btrfs_compare_trees(struct btrfs_root *left_root, | |||
| 5376 | int advance_right; | 5376 | int advance_right; |
| 5377 | u64 left_blockptr; | 5377 | u64 left_blockptr; |
| 5378 | u64 right_blockptr; | 5378 | u64 right_blockptr; |
| 5379 | u64 left_gen; | ||
| 5380 | u64 right_gen; | ||
| 5379 | u64 left_start_ctransid; | 5381 | u64 left_start_ctransid; |
| 5380 | u64 right_start_ctransid; | 5382 | u64 right_start_ctransid; |
| 5381 | u64 ctransid; | 5383 | u64 ctransid; |
| @@ -5640,7 +5642,14 @@ int btrfs_compare_trees(struct btrfs_root *left_root, | |||
| 5640 | right_blockptr = btrfs_node_blockptr( | 5642 | right_blockptr = btrfs_node_blockptr( |
| 5641 | right_path->nodes[right_level], | 5643 | right_path->nodes[right_level], |
| 5642 | right_path->slots[right_level]); | 5644 | right_path->slots[right_level]); |
| 5643 | if (left_blockptr == right_blockptr) { | 5645 | left_gen = btrfs_node_ptr_generation( |
| 5646 | left_path->nodes[left_level], | ||
| 5647 | left_path->slots[left_level]); | ||
| 5648 | right_gen = btrfs_node_ptr_generation( | ||
| 5649 | right_path->nodes[right_level], | ||
| 5650 | right_path->slots[right_level]); | ||
| 5651 | if (left_blockptr == right_blockptr && | ||
| 5652 | left_gen == right_gen) { | ||
| 5644 | /* | 5653 | /* |
| 5645 | * As we're on a shared block, don't | 5654 | * As we're on a shared block, don't |
| 5646 | * allow to go deeper. | 5655 | * allow to go deeper. |
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 2c1a42ca519f..bc96c03dd259 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
| @@ -351,6 +351,7 @@ static inline unsigned long btrfs_chunk_item_size(int num_stripes) | |||
| 351 | #define BTRFS_FS_STATE_ERROR 0 | 351 | #define BTRFS_FS_STATE_ERROR 0 |
| 352 | #define BTRFS_FS_STATE_REMOUNTING 1 | 352 | #define BTRFS_FS_STATE_REMOUNTING 1 |
| 353 | #define BTRFS_FS_STATE_TRANS_ABORTED 2 | 353 | #define BTRFS_FS_STATE_TRANS_ABORTED 2 |
| 354 | #define BTRFS_FS_STATE_DEV_REPLACING 3 | ||
| 354 | 355 | ||
| 355 | /* Super block flags */ | 356 | /* Super block flags */ |
| 356 | /* Errors detected */ | 357 | /* Errors detected */ |
| @@ -1489,6 +1490,7 @@ struct btrfs_fs_info { | |||
| 1489 | */ | 1490 | */ |
| 1490 | struct list_head ordered_roots; | 1491 | struct list_head ordered_roots; |
| 1491 | 1492 | ||
| 1493 | struct mutex delalloc_root_mutex; | ||
| 1492 | spinlock_t delalloc_root_lock; | 1494 | spinlock_t delalloc_root_lock; |
| 1493 | /* all fs/file tree roots that have delalloc inodes. */ | 1495 | /* all fs/file tree roots that have delalloc inodes. */ |
| 1494 | struct list_head delalloc_roots; | 1496 | struct list_head delalloc_roots; |
| @@ -1503,28 +1505,27 @@ struct btrfs_fs_info { | |||
| 1503 | * A third pool does submit_bio to avoid deadlocking with the other | 1505 | * A third pool does submit_bio to avoid deadlocking with the other |
| 1504 | * two | 1506 | * two |
| 1505 | */ | 1507 | */ |
| 1506 | struct btrfs_workers generic_worker; | 1508 | struct btrfs_workqueue *workers; |
| 1507 | struct btrfs_workers workers; | 1509 | struct btrfs_workqueue *delalloc_workers; |
| 1508 | struct btrfs_workers delalloc_workers; | 1510 | struct btrfs_workqueue *flush_workers; |
| 1509 | struct btrfs_workers flush_workers; | 1511 | struct btrfs_workqueue *endio_workers; |
| 1510 | struct btrfs_workers endio_workers; | 1512 | struct btrfs_workqueue *endio_meta_workers; |
| 1511 | struct btrfs_workers endio_meta_workers; | 1513 | struct btrfs_workqueue *endio_raid56_workers; |
| 1512 | struct btrfs_workers endio_raid56_workers; | 1514 | struct btrfs_workqueue *rmw_workers; |
| 1513 | struct btrfs_workers rmw_workers; | 1515 | struct btrfs_workqueue *endio_meta_write_workers; |
| 1514 | struct btrfs_workers endio_meta_write_workers; | 1516 | struct btrfs_workqueue *endio_write_workers; |
| 1515 | struct btrfs_workers endio_write_workers; | 1517 | struct btrfs_workqueue *endio_freespace_worker; |
| 1516 | struct btrfs_workers endio_freespace_worker; | 1518 | struct btrfs_workqueue *submit_workers; |
| 1517 | struct btrfs_workers submit_workers; | 1519 | struct btrfs_workqueue *caching_workers; |
| 1518 | struct btrfs_workers caching_workers; | 1520 | struct btrfs_workqueue *readahead_workers; |
| 1519 | struct btrfs_workers readahead_workers; | ||
| 1520 | 1521 | ||
| 1521 | /* | 1522 | /* |
| 1522 | * fixup workers take dirty pages that didn't properly go through | 1523 | * fixup workers take dirty pages that didn't properly go through |
| 1523 | * the cow mechanism and make them safe to write. It happens | 1524 | * the cow mechanism and make them safe to write. It happens |
| 1524 | * for the sys_munmap function call path | 1525 | * for the sys_munmap function call path |
| 1525 | */ | 1526 | */ |
| 1526 | struct btrfs_workers fixup_workers; | 1527 | struct btrfs_workqueue *fixup_workers; |
| 1527 | struct btrfs_workers delayed_workers; | 1528 | struct btrfs_workqueue *delayed_workers; |
| 1528 | struct task_struct *transaction_kthread; | 1529 | struct task_struct *transaction_kthread; |
| 1529 | struct task_struct *cleaner_kthread; | 1530 | struct task_struct *cleaner_kthread; |
| 1530 | int thread_pool_size; | 1531 | int thread_pool_size; |
| @@ -1604,9 +1605,9 @@ struct btrfs_fs_info { | |||
| 1604 | atomic_t scrub_cancel_req; | 1605 | atomic_t scrub_cancel_req; |
| 1605 | wait_queue_head_t scrub_pause_wait; | 1606 | wait_queue_head_t scrub_pause_wait; |
| 1606 | int scrub_workers_refcnt; | 1607 | int scrub_workers_refcnt; |
| 1607 | struct btrfs_workers scrub_workers; | 1608 | struct btrfs_workqueue *scrub_workers; |
| 1608 | struct btrfs_workers scrub_wr_completion_workers; | 1609 | struct btrfs_workqueue *scrub_wr_completion_workers; |
| 1609 | struct btrfs_workers scrub_nocow_workers; | 1610 | struct btrfs_workqueue *scrub_nocow_workers; |
| 1610 | 1611 | ||
| 1611 | #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY | 1612 | #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY |
| 1612 | u32 check_integrity_print_mask; | 1613 | u32 check_integrity_print_mask; |
| @@ -1647,7 +1648,7 @@ struct btrfs_fs_info { | |||
| 1647 | /* qgroup rescan items */ | 1648 | /* qgroup rescan items */ |
| 1648 | struct mutex qgroup_rescan_lock; /* protects the progress item */ | 1649 | struct mutex qgroup_rescan_lock; /* protects the progress item */ |
| 1649 | struct btrfs_key qgroup_rescan_progress; | 1650 | struct btrfs_key qgroup_rescan_progress; |
| 1650 | struct btrfs_workers qgroup_rescan_workers; | 1651 | struct btrfs_workqueue *qgroup_rescan_workers; |
| 1651 | struct completion qgroup_rescan_completion; | 1652 | struct completion qgroup_rescan_completion; |
| 1652 | struct btrfs_work qgroup_rescan_work; | 1653 | struct btrfs_work qgroup_rescan_work; |
| 1653 | 1654 | ||
| @@ -1674,10 +1675,18 @@ struct btrfs_fs_info { | |||
| 1674 | 1675 | ||
| 1675 | atomic_t mutually_exclusive_operation_running; | 1676 | atomic_t mutually_exclusive_operation_running; |
| 1676 | 1677 | ||
| 1678 | struct percpu_counter bio_counter; | ||
| 1679 | wait_queue_head_t replace_wait; | ||
| 1680 | |||
| 1677 | struct semaphore uuid_tree_rescan_sem; | 1681 | struct semaphore uuid_tree_rescan_sem; |
| 1678 | unsigned int update_uuid_tree_gen:1; | 1682 | unsigned int update_uuid_tree_gen:1; |
| 1679 | }; | 1683 | }; |
| 1680 | 1684 | ||
| 1685 | struct btrfs_subvolume_writers { | ||
| 1686 | struct percpu_counter counter; | ||
| 1687 | wait_queue_head_t wait; | ||
| 1688 | }; | ||
| 1689 | |||
| 1681 | /* | 1690 | /* |
| 1682 | * in ram representation of the tree. extent_root is used for all allocations | 1691 | * in ram representation of the tree. extent_root is used for all allocations |
| 1683 | * and for the extent tree extent_root root. | 1692 | * and for the extent tree extent_root root. |
| @@ -1714,11 +1723,15 @@ struct btrfs_root { | |||
| 1714 | struct mutex log_mutex; | 1723 | struct mutex log_mutex; |
| 1715 | wait_queue_head_t log_writer_wait; | 1724 | wait_queue_head_t log_writer_wait; |
| 1716 | wait_queue_head_t log_commit_wait[2]; | 1725 | wait_queue_head_t log_commit_wait[2]; |
| 1726 | struct list_head log_ctxs[2]; | ||
| 1717 | atomic_t log_writers; | 1727 | atomic_t log_writers; |
| 1718 | atomic_t log_commit[2]; | 1728 | atomic_t log_commit[2]; |
| 1719 | atomic_t log_batch; | 1729 | atomic_t log_batch; |
| 1720 | unsigned long log_transid; | 1730 | int log_transid; |
| 1721 | unsigned long last_log_commit; | 1731 | /* No matter the commit succeeds or not*/ |
| 1732 | int log_transid_committed; | ||
| 1733 | /* Just be updated when the commit succeeds. */ | ||
| 1734 | int last_log_commit; | ||
| 1722 | pid_t log_start_pid; | 1735 | pid_t log_start_pid; |
| 1723 | bool log_multiple_pids; | 1736 | bool log_multiple_pids; |
| 1724 | 1737 | ||
| @@ -1793,6 +1806,7 @@ struct btrfs_root { | |||
| 1793 | spinlock_t root_item_lock; | 1806 | spinlock_t root_item_lock; |
| 1794 | atomic_t refs; | 1807 | atomic_t refs; |
| 1795 | 1808 | ||
| 1809 | struct mutex delalloc_mutex; | ||
| 1796 | spinlock_t delalloc_lock; | 1810 | spinlock_t delalloc_lock; |
| 1797 | /* | 1811 | /* |
| 1798 | * all of the inodes that have delalloc bytes. It is possible for | 1812 | * all of the inodes that have delalloc bytes. It is possible for |
| @@ -1802,6 +1816,8 @@ struct btrfs_root { | |||
| 1802 | struct list_head delalloc_inodes; | 1816 | struct list_head delalloc_inodes; |
| 1803 | struct list_head delalloc_root; | 1817 | struct list_head delalloc_root; |
| 1804 | u64 nr_delalloc_inodes; | 1818 | u64 nr_delalloc_inodes; |
| 1819 | |||
| 1820 | struct mutex ordered_extent_mutex; | ||
| 1805 | /* | 1821 | /* |
| 1806 | * this is used by the balancing code to wait for all the pending | 1822 | * this is used by the balancing code to wait for all the pending |
| 1807 | * ordered extents | 1823 | * ordered extents |
| @@ -1822,6 +1838,8 @@ struct btrfs_root { | |||
| 1822 | * manipulation with the read-only status via SUBVOL_SETFLAGS | 1838 | * manipulation with the read-only status via SUBVOL_SETFLAGS |
| 1823 | */ | 1839 | */ |
| 1824 | int send_in_progress; | 1840 | int send_in_progress; |
| 1841 | struct btrfs_subvolume_writers *subv_writers; | ||
| 1842 | atomic_t will_be_snapshoted; | ||
| 1825 | }; | 1843 | }; |
| 1826 | 1844 | ||
| 1827 | struct btrfs_ioctl_defrag_range_args { | 1845 | struct btrfs_ioctl_defrag_range_args { |
| @@ -3346,6 +3364,9 @@ int btrfs_init_space_info(struct btrfs_fs_info *fs_info); | |||
| 3346 | int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans, | 3364 | int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans, |
| 3347 | struct btrfs_fs_info *fs_info); | 3365 | struct btrfs_fs_info *fs_info); |
| 3348 | int __get_raid_index(u64 flags); | 3366 | int __get_raid_index(u64 flags); |
| 3367 | |||
| 3368 | int btrfs_start_nocow_write(struct btrfs_root *root); | ||
| 3369 | void btrfs_end_nocow_write(struct btrfs_root *root); | ||
| 3349 | /* ctree.c */ | 3370 | /* ctree.c */ |
| 3350 | int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, | 3371 | int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, |
| 3351 | int level, int *slot); | 3372 | int level, int *slot); |
| @@ -3723,7 +3744,8 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, | |||
| 3723 | u32 min_type); | 3744 | u32 min_type); |
| 3724 | 3745 | ||
| 3725 | int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput); | 3746 | int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput); |
| 3726 | int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int delay_iput); | 3747 | int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int delay_iput, |
| 3748 | int nr); | ||
| 3727 | int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end, | 3749 | int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end, |
| 3728 | struct extent_state **cached_state); | 3750 | struct extent_state **cached_state); |
| 3729 | int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, | 3751 | int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, |
| @@ -4005,6 +4027,11 @@ int btrfs_scrub_cancel_dev(struct btrfs_fs_info *info, | |||
| 4005 | int btrfs_scrub_progress(struct btrfs_root *root, u64 devid, | 4027 | int btrfs_scrub_progress(struct btrfs_root *root, u64 devid, |
| 4006 | struct btrfs_scrub_progress *progress); | 4028 | struct btrfs_scrub_progress *progress); |
| 4007 | 4029 | ||
| 4030 | /* dev-replace.c */ | ||
| 4031 | void btrfs_bio_counter_inc_blocked(struct btrfs_fs_info *fs_info); | ||
| 4032 | void btrfs_bio_counter_inc_noblocked(struct btrfs_fs_info *fs_info); | ||
| 4033 | void btrfs_bio_counter_dec(struct btrfs_fs_info *fs_info); | ||
| 4034 | |||
| 4008 | /* reada.c */ | 4035 | /* reada.c */ |
| 4009 | struct reada_control { | 4036 | struct reada_control { |
| 4010 | struct btrfs_root *root; /* tree to prefetch */ | 4037 | struct btrfs_root *root; /* tree to prefetch */ |
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 451b00c86f6c..33e561a84013 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c | |||
| @@ -1392,11 +1392,11 @@ static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root, | |||
| 1392 | return -ENOMEM; | 1392 | return -ENOMEM; |
| 1393 | 1393 | ||
| 1394 | async_work->delayed_root = delayed_root; | 1394 | async_work->delayed_root = delayed_root; |
| 1395 | async_work->work.func = btrfs_async_run_delayed_root; | 1395 | btrfs_init_work(&async_work->work, btrfs_async_run_delayed_root, |
| 1396 | async_work->work.flags = 0; | 1396 | NULL, NULL); |
| 1397 | async_work->nr = nr; | 1397 | async_work->nr = nr; |
| 1398 | 1398 | ||
| 1399 | btrfs_queue_worker(&root->fs_info->delayed_workers, &async_work->work); | 1399 | btrfs_queue_work(root->fs_info->delayed_workers, &async_work->work); |
| 1400 | return 0; | 1400 | return 0; |
| 1401 | } | 1401 | } |
| 1402 | 1402 | ||
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index f3bff89eecf0..31299646024d 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c | |||
| @@ -199,44 +199,31 @@ static struct btrfs_delayed_ref_head *htree_insert(struct rb_root *root, | |||
| 199 | */ | 199 | */ |
| 200 | static struct btrfs_delayed_ref_head * | 200 | static struct btrfs_delayed_ref_head * |
| 201 | find_ref_head(struct rb_root *root, u64 bytenr, | 201 | find_ref_head(struct rb_root *root, u64 bytenr, |
| 202 | struct btrfs_delayed_ref_head **last, int return_bigger) | 202 | int return_bigger) |
| 203 | { | 203 | { |
| 204 | struct rb_node *n; | 204 | struct rb_node *n; |
| 205 | struct btrfs_delayed_ref_head *entry; | 205 | struct btrfs_delayed_ref_head *entry; |
| 206 | int cmp = 0; | ||
| 207 | 206 | ||
| 208 | again: | ||
| 209 | n = root->rb_node; | 207 | n = root->rb_node; |
| 210 | entry = NULL; | 208 | entry = NULL; |
| 211 | while (n) { | 209 | while (n) { |
| 212 | entry = rb_entry(n, struct btrfs_delayed_ref_head, href_node); | 210 | entry = rb_entry(n, struct btrfs_delayed_ref_head, href_node); |
| 213 | if (last) | ||
| 214 | *last = entry; | ||
| 215 | 211 | ||
| 216 | if (bytenr < entry->node.bytenr) | 212 | if (bytenr < entry->node.bytenr) |
| 217 | cmp = -1; | ||
| 218 | else if (bytenr > entry->node.bytenr) | ||
| 219 | cmp = 1; | ||
| 220 | else | ||
| 221 | cmp = 0; | ||
| 222 | |||
| 223 | if (cmp < 0) | ||
| 224 | n = n->rb_left; | 213 | n = n->rb_left; |
| 225 | else if (cmp > 0) | 214 | else if (bytenr > entry->node.bytenr) |
| 226 | n = n->rb_right; | 215 | n = n->rb_right; |
| 227 | else | 216 | else |
| 228 | return entry; | 217 | return entry; |
| 229 | } | 218 | } |
| 230 | if (entry && return_bigger) { | 219 | if (entry && return_bigger) { |
| 231 | if (cmp > 0) { | 220 | if (bytenr > entry->node.bytenr) { |
| 232 | n = rb_next(&entry->href_node); | 221 | n = rb_next(&entry->href_node); |
| 233 | if (!n) | 222 | if (!n) |
| 234 | n = rb_first(root); | 223 | n = rb_first(root); |
| 235 | entry = rb_entry(n, struct btrfs_delayed_ref_head, | 224 | entry = rb_entry(n, struct btrfs_delayed_ref_head, |
| 236 | href_node); | 225 | href_node); |
| 237 | bytenr = entry->node.bytenr; | 226 | return entry; |
| 238 | return_bigger = 0; | ||
| 239 | goto again; | ||
| 240 | } | 227 | } |
| 241 | return entry; | 228 | return entry; |
| 242 | } | 229 | } |
| @@ -415,12 +402,12 @@ btrfs_select_ref_head(struct btrfs_trans_handle *trans) | |||
| 415 | 402 | ||
| 416 | again: | 403 | again: |
| 417 | start = delayed_refs->run_delayed_start; | 404 | start = delayed_refs->run_delayed_start; |
| 418 | head = find_ref_head(&delayed_refs->href_root, start, NULL, 1); | 405 | head = find_ref_head(&delayed_refs->href_root, start, 1); |
| 419 | if (!head && !loop) { | 406 | if (!head && !loop) { |
| 420 | delayed_refs->run_delayed_start = 0; | 407 | delayed_refs->run_delayed_start = 0; |
| 421 | start = 0; | 408 | start = 0; |
| 422 | loop = true; | 409 | loop = true; |
| 423 | head = find_ref_head(&delayed_refs->href_root, start, NULL, 1); | 410 | head = find_ref_head(&delayed_refs->href_root, start, 1); |
| 424 | if (!head) | 411 | if (!head) |
| 425 | return NULL; | 412 | return NULL; |
| 426 | } else if (!head && loop) { | 413 | } else if (!head && loop) { |
| @@ -508,6 +495,7 @@ update_existing_head_ref(struct btrfs_delayed_ref_node *existing, | |||
| 508 | ref = btrfs_delayed_node_to_head(update); | 495 | ref = btrfs_delayed_node_to_head(update); |
| 509 | BUG_ON(existing_ref->is_data != ref->is_data); | 496 | BUG_ON(existing_ref->is_data != ref->is_data); |
| 510 | 497 | ||
| 498 | spin_lock(&existing_ref->lock); | ||
| 511 | if (ref->must_insert_reserved) { | 499 | if (ref->must_insert_reserved) { |
| 512 | /* if the extent was freed and then | 500 | /* if the extent was freed and then |
| 513 | * reallocated before the delayed ref | 501 | * reallocated before the delayed ref |
| @@ -549,7 +537,6 @@ update_existing_head_ref(struct btrfs_delayed_ref_node *existing, | |||
| 549 | * only need the lock for this case cause we could be processing it | 537 | * only need the lock for this case cause we could be processing it |
| 550 | * currently, for refs we just added we know we're a-ok. | 538 | * currently, for refs we just added we know we're a-ok. |
| 551 | */ | 539 | */ |
| 552 | spin_lock(&existing_ref->lock); | ||
| 553 | existing->ref_mod += update->ref_mod; | 540 | existing->ref_mod += update->ref_mod; |
| 554 | spin_unlock(&existing_ref->lock); | 541 | spin_unlock(&existing_ref->lock); |
| 555 | } | 542 | } |
| @@ -898,7 +885,7 @@ btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr) | |||
| 898 | struct btrfs_delayed_ref_root *delayed_refs; | 885 | struct btrfs_delayed_ref_root *delayed_refs; |
| 899 | 886 | ||
| 900 | delayed_refs = &trans->transaction->delayed_refs; | 887 | delayed_refs = &trans->transaction->delayed_refs; |
| 901 | return find_ref_head(&delayed_refs->href_root, bytenr, NULL, 0); | 888 | return find_ref_head(&delayed_refs->href_root, bytenr, 0); |
| 902 | } | 889 | } |
| 903 | 890 | ||
| 904 | void btrfs_delayed_ref_exit(void) | 891 | void btrfs_delayed_ref_exit(void) |
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index 564c92638b20..9f2290509aca 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c | |||
| @@ -431,6 +431,35 @@ leave_no_lock: | |||
| 431 | return ret; | 431 | return ret; |
| 432 | } | 432 | } |
| 433 | 433 | ||
| 434 | /* | ||
| 435 | * blocked until all flighting bios are finished. | ||
| 436 | */ | ||
| 437 | static void btrfs_rm_dev_replace_blocked(struct btrfs_fs_info *fs_info) | ||
| 438 | { | ||
| 439 | s64 writers; | ||
| 440 | DEFINE_WAIT(wait); | ||
| 441 | |||
| 442 | set_bit(BTRFS_FS_STATE_DEV_REPLACING, &fs_info->fs_state); | ||
| 443 | do { | ||
| 444 | prepare_to_wait(&fs_info->replace_wait, &wait, | ||
| 445 | TASK_UNINTERRUPTIBLE); | ||
| 446 | writers = percpu_counter_sum(&fs_info->bio_counter); | ||
| 447 | if (writers) | ||
| 448 | schedule(); | ||
| 449 | finish_wait(&fs_info->replace_wait, &wait); | ||
| 450 | } while (writers); | ||
| 451 | } | ||
| 452 | |||
| 453 | /* | ||
| 454 | * we have removed target device, it is safe to allow new bios request. | ||
| 455 | */ | ||
| 456 | static void btrfs_rm_dev_replace_unblocked(struct btrfs_fs_info *fs_info) | ||
| 457 | { | ||
| 458 | clear_bit(BTRFS_FS_STATE_DEV_REPLACING, &fs_info->fs_state); | ||
| 459 | if (waitqueue_active(&fs_info->replace_wait)) | ||
| 460 | wake_up(&fs_info->replace_wait); | ||
| 461 | } | ||
| 462 | |||
| 434 | static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, | 463 | static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, |
| 435 | int scrub_ret) | 464 | int scrub_ret) |
| 436 | { | 465 | { |
| @@ -458,17 +487,11 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, | |||
| 458 | src_device = dev_replace->srcdev; | 487 | src_device = dev_replace->srcdev; |
| 459 | btrfs_dev_replace_unlock(dev_replace); | 488 | btrfs_dev_replace_unlock(dev_replace); |
| 460 | 489 | ||
| 461 | /* replace old device with new one in mapping tree */ | ||
| 462 | if (!scrub_ret) | ||
| 463 | btrfs_dev_replace_update_device_in_mapping_tree(fs_info, | ||
| 464 | src_device, | ||
| 465 | tgt_device); | ||
| 466 | |||
| 467 | /* | 490 | /* |
| 468 | * flush all outstanding I/O and inode extent mappings before the | 491 | * flush all outstanding I/O and inode extent mappings before the |
| 469 | * copy operation is declared as being finished | 492 | * copy operation is declared as being finished |
| 470 | */ | 493 | */ |
| 471 | ret = btrfs_start_delalloc_roots(root->fs_info, 0); | 494 | ret = btrfs_start_delalloc_roots(root->fs_info, 0, -1); |
| 472 | if (ret) { | 495 | if (ret) { |
| 473 | mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); | 496 | mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); |
| 474 | return ret; | 497 | return ret; |
| @@ -484,6 +507,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, | |||
| 484 | WARN_ON(ret); | 507 | WARN_ON(ret); |
| 485 | 508 | ||
| 486 | /* keep away write_all_supers() during the finishing procedure */ | 509 | /* keep away write_all_supers() during the finishing procedure */ |
| 510 | mutex_lock(&root->fs_info->chunk_mutex); | ||
| 487 | mutex_lock(&root->fs_info->fs_devices->device_list_mutex); | 511 | mutex_lock(&root->fs_info->fs_devices->device_list_mutex); |
| 488 | btrfs_dev_replace_lock(dev_replace); | 512 | btrfs_dev_replace_lock(dev_replace); |
| 489 | dev_replace->replace_state = | 513 | dev_replace->replace_state = |
| @@ -494,7 +518,12 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, | |||
| 494 | dev_replace->time_stopped = get_seconds(); | 518 | dev_replace->time_stopped = get_seconds(); |
| 495 | dev_replace->item_needs_writeback = 1; | 519 | dev_replace->item_needs_writeback = 1; |
| 496 | 520 | ||
| 497 | if (scrub_ret) { | 521 | /* replace old device with new one in mapping tree */ |
| 522 | if (!scrub_ret) { | ||
| 523 | btrfs_dev_replace_update_device_in_mapping_tree(fs_info, | ||
| 524 | src_device, | ||
| 525 | tgt_device); | ||
| 526 | } else { | ||
| 498 | printk_in_rcu(KERN_ERR | 527 | printk_in_rcu(KERN_ERR |
| 499 | "BTRFS: btrfs_scrub_dev(%s, %llu, %s) failed %d\n", | 528 | "BTRFS: btrfs_scrub_dev(%s, %llu, %s) failed %d\n", |
| 500 | src_device->missing ? "<missing disk>" : | 529 | src_device->missing ? "<missing disk>" : |
| @@ -503,6 +532,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, | |||
| 503 | rcu_str_deref(tgt_device->name), scrub_ret); | 532 | rcu_str_deref(tgt_device->name), scrub_ret); |
| 504 | btrfs_dev_replace_unlock(dev_replace); | 533 | btrfs_dev_replace_unlock(dev_replace); |
| 505 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); | 534 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); |
| 535 | mutex_unlock(&root->fs_info->chunk_mutex); | ||
| 506 | if (tgt_device) | 536 | if (tgt_device) |
| 507 | btrfs_destroy_dev_replace_tgtdev(fs_info, tgt_device); | 537 | btrfs_destroy_dev_replace_tgtdev(fs_info, tgt_device); |
| 508 | mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); | 538 | mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); |
| @@ -532,8 +562,12 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, | |||
| 532 | fs_info->fs_devices->latest_bdev = tgt_device->bdev; | 562 | fs_info->fs_devices->latest_bdev = tgt_device->bdev; |
| 533 | list_add(&tgt_device->dev_alloc_list, &fs_info->fs_devices->alloc_list); | 563 | list_add(&tgt_device->dev_alloc_list, &fs_info->fs_devices->alloc_list); |
| 534 | 564 | ||
| 565 | btrfs_rm_dev_replace_blocked(fs_info); | ||
| 566 | |||
| 535 | btrfs_rm_dev_replace_srcdev(fs_info, src_device); | 567 | btrfs_rm_dev_replace_srcdev(fs_info, src_device); |
| 536 | 568 | ||
| 569 | btrfs_rm_dev_replace_unblocked(fs_info); | ||
| 570 | |||
| 537 | /* | 571 | /* |
| 538 | * this is again a consistent state where no dev_replace procedure | 572 | * this is again a consistent state where no dev_replace procedure |
| 539 | * is running, the target device is part of the filesystem, the | 573 | * is running, the target device is part of the filesystem, the |
| @@ -543,6 +577,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, | |||
| 543 | */ | 577 | */ |
| 544 | btrfs_dev_replace_unlock(dev_replace); | 578 | btrfs_dev_replace_unlock(dev_replace); |
| 545 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); | 579 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); |
| 580 | mutex_unlock(&root->fs_info->chunk_mutex); | ||
| 546 | 581 | ||
| 547 | /* write back the superblocks */ | 582 | /* write back the superblocks */ |
| 548 | trans = btrfs_start_transaction(root, 0); | 583 | trans = btrfs_start_transaction(root, 0); |
| @@ -862,3 +897,31 @@ void btrfs_dev_replace_unlock(struct btrfs_dev_replace *dev_replace) | |||
| 862 | mutex_unlock(&dev_replace->lock_management_lock); | 897 | mutex_unlock(&dev_replace->lock_management_lock); |
| 863 | } | 898 | } |
| 864 | } | 899 | } |
| 900 | |||
| 901 | void btrfs_bio_counter_inc_noblocked(struct btrfs_fs_info *fs_info) | ||
| 902 | { | ||
| 903 | percpu_counter_inc(&fs_info->bio_counter); | ||
| 904 | } | ||
| 905 | |||
| 906 | void btrfs_bio_counter_dec(struct btrfs_fs_info *fs_info) | ||
| 907 | { | ||
| 908 | percpu_counter_dec(&fs_info->bio_counter); | ||
| 909 | |||
| 910 | if (waitqueue_active(&fs_info->replace_wait)) | ||
| 911 | wake_up(&fs_info->replace_wait); | ||
| 912 | } | ||
| 913 | |||
| 914 | void btrfs_bio_counter_inc_blocked(struct btrfs_fs_info *fs_info) | ||
| 915 | { | ||
| 916 | DEFINE_WAIT(wait); | ||
| 917 | again: | ||
| 918 | percpu_counter_inc(&fs_info->bio_counter); | ||
| 919 | if (test_bit(BTRFS_FS_STATE_DEV_REPLACING, &fs_info->fs_state)) { | ||
| 920 | btrfs_bio_counter_dec(fs_info); | ||
| 921 | wait_event(fs_info->replace_wait, | ||
| 922 | !test_bit(BTRFS_FS_STATE_DEV_REPLACING, | ||
| 923 | &fs_info->fs_state)); | ||
| 924 | goto again; | ||
| 925 | } | ||
| 926 | |||
| 927 | } | ||
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 81ea55314b1f..bd0f752b797b 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
| @@ -678,32 +678,31 @@ static void end_workqueue_bio(struct bio *bio, int err) | |||
| 678 | 678 | ||
| 679 | fs_info = end_io_wq->info; | 679 | fs_info = end_io_wq->info; |
| 680 | end_io_wq->error = err; | 680 | end_io_wq->error = err; |
| 681 | end_io_wq->work.func = end_workqueue_fn; | 681 | btrfs_init_work(&end_io_wq->work, end_workqueue_fn, NULL, NULL); |
| 682 | end_io_wq->work.flags = 0; | ||
| 683 | 682 | ||
| 684 | if (bio->bi_rw & REQ_WRITE) { | 683 | if (bio->bi_rw & REQ_WRITE) { |
| 685 | if (end_io_wq->metadata == BTRFS_WQ_ENDIO_METADATA) | 684 | if (end_io_wq->metadata == BTRFS_WQ_ENDIO_METADATA) |
| 686 | btrfs_queue_worker(&fs_info->endio_meta_write_workers, | 685 | btrfs_queue_work(fs_info->endio_meta_write_workers, |
| 687 | &end_io_wq->work); | 686 | &end_io_wq->work); |
| 688 | else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_FREE_SPACE) | 687 | else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_FREE_SPACE) |
| 689 | btrfs_queue_worker(&fs_info->endio_freespace_worker, | 688 | btrfs_queue_work(fs_info->endio_freespace_worker, |
| 690 | &end_io_wq->work); | 689 | &end_io_wq->work); |
| 691 | else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56) | 690 | else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56) |
| 692 | btrfs_queue_worker(&fs_info->endio_raid56_workers, | 691 | btrfs_queue_work(fs_info->endio_raid56_workers, |
| 693 | &end_io_wq->work); | 692 | &end_io_wq->work); |
| 694 | else | 693 | else |
| 695 | btrfs_queue_worker(&fs_info->endio_write_workers, | 694 | btrfs_queue_work(fs_info->endio_write_workers, |
| 696 | &end_io_wq->work); | 695 | &end_io_wq->work); |
| 697 | } else { | 696 | } else { |
| 698 | if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56) | 697 | if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56) |
| 699 | btrfs_queue_worker(&fs_info->endio_raid56_workers, | 698 | btrfs_queue_work(fs_info->endio_raid56_workers, |
| 700 | &end_io_wq->work); | 699 | &end_io_wq->work); |
| 701 | else if (end_io_wq->metadata) | 700 | else if (end_io_wq->metadata) |
| 702 | btrfs_queue_worker(&fs_info->endio_meta_workers, | 701 | btrfs_queue_work(fs_info->endio_meta_workers, |
| 703 | &end_io_wq->work); | 702 | &end_io_wq->work); |
| 704 | else | 703 | else |
| 705 | btrfs_queue_worker(&fs_info->endio_workers, | 704 | btrfs_queue_work(fs_info->endio_workers, |
| 706 | &end_io_wq->work); | 705 | &end_io_wq->work); |
| 707 | } | 706 | } |
| 708 | } | 707 | } |
| 709 | 708 | ||
| @@ -738,7 +737,7 @@ int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, | |||
| 738 | unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info) | 737 | unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info) |
| 739 | { | 738 | { |
| 740 | unsigned long limit = min_t(unsigned long, | 739 | unsigned long limit = min_t(unsigned long, |
| 741 | info->workers.max_workers, | 740 | info->thread_pool_size, |
| 742 | info->fs_devices->open_devices); | 741 | info->fs_devices->open_devices); |
| 743 | return 256 * limit; | 742 | return 256 * limit; |
| 744 | } | 743 | } |
| @@ -811,11 +810,9 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, | |||
| 811 | async->submit_bio_start = submit_bio_start; | 810 | async->submit_bio_start = submit_bio_start; |
| 812 | async->submit_bio_done = submit_bio_done; | 811 | async->submit_bio_done = submit_bio_done; |
| 813 | 812 | ||
| 814 | async->work.func = run_one_async_start; | 813 | btrfs_init_work(&async->work, run_one_async_start, |
| 815 | async->work.ordered_func = run_one_async_done; | 814 | run_one_async_done, run_one_async_free); |
| 816 | async->work.ordered_free = run_one_async_free; | ||
| 817 | 815 | ||
| 818 | async->work.flags = 0; | ||
| 819 | async->bio_flags = bio_flags; | 816 | async->bio_flags = bio_flags; |
| 820 | async->bio_offset = bio_offset; | 817 | async->bio_offset = bio_offset; |
| 821 | 818 | ||
| @@ -824,9 +821,9 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, | |||
| 824 | atomic_inc(&fs_info->nr_async_submits); | 821 | atomic_inc(&fs_info->nr_async_submits); |
| 825 | 822 | ||
| 826 | if (rw & REQ_SYNC) | 823 | if (rw & REQ_SYNC) |
| 827 | btrfs_set_work_high_prio(&async->work); | 824 | btrfs_set_work_high_priority(&async->work); |
| 828 | 825 | ||
| 829 | btrfs_queue_worker(&fs_info->workers, &async->work); | 826 | btrfs_queue_work(fs_info->workers, &async->work); |
| 830 | 827 | ||
| 831 | while (atomic_read(&fs_info->async_submit_draining) && | 828 | while (atomic_read(&fs_info->async_submit_draining) && |
| 832 | atomic_read(&fs_info->nr_async_submits)) { | 829 | atomic_read(&fs_info->nr_async_submits)) { |
| @@ -1149,6 +1146,32 @@ void clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, | |||
| 1149 | } | 1146 | } |
| 1150 | } | 1147 | } |
| 1151 | 1148 | ||
| 1149 | static struct btrfs_subvolume_writers *btrfs_alloc_subvolume_writers(void) | ||
| 1150 | { | ||
| 1151 | struct btrfs_subvolume_writers *writers; | ||
| 1152 | int ret; | ||
| 1153 | |||
| 1154 | writers = kmalloc(sizeof(*writers), GFP_NOFS); | ||
| 1155 | if (!writers) | ||
| 1156 | return ERR_PTR(-ENOMEM); | ||
| 1157 | |||
| 1158 | ret = percpu_counter_init(&writers->counter, 0); | ||
| 1159 | if (ret < 0) { | ||
| 1160 | kfree(writers); | ||
| 1161 | return ERR_PTR(ret); | ||
| 1162 | } | ||
| 1163 | |||
| 1164 | init_waitqueue_head(&writers->wait); | ||
| 1165 | return writers; | ||
| 1166 | } | ||
| 1167 | |||
| 1168 | static void | ||
| 1169 | btrfs_free_subvolume_writers(struct btrfs_subvolume_writers *writers) | ||
| 1170 | { | ||
| 1171 | percpu_counter_destroy(&writers->counter); | ||
| 1172 | kfree(writers); | ||
| 1173 | } | ||
| 1174 | |||
| 1152 | static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, | 1175 | static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, |
| 1153 | u32 stripesize, struct btrfs_root *root, | 1176 | u32 stripesize, struct btrfs_root *root, |
| 1154 | struct btrfs_fs_info *fs_info, | 1177 | struct btrfs_fs_info *fs_info, |
| @@ -1194,16 +1217,22 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, | |||
| 1194 | spin_lock_init(&root->log_extents_lock[1]); | 1217 | spin_lock_init(&root->log_extents_lock[1]); |
| 1195 | mutex_init(&root->objectid_mutex); | 1218 | mutex_init(&root->objectid_mutex); |
| 1196 | mutex_init(&root->log_mutex); | 1219 | mutex_init(&root->log_mutex); |
| 1220 | mutex_init(&root->ordered_extent_mutex); | ||
| 1221 | mutex_init(&root->delalloc_mutex); | ||
| 1197 | init_waitqueue_head(&root->log_writer_wait); | 1222 | init_waitqueue_head(&root->log_writer_wait); |
| 1198 | init_waitqueue_head(&root->log_commit_wait[0]); | 1223 | init_waitqueue_head(&root->log_commit_wait[0]); |
| 1199 | init_waitqueue_head(&root->log_commit_wait[1]); | 1224 | init_waitqueue_head(&root->log_commit_wait[1]); |
| 1225 | INIT_LIST_HEAD(&root->log_ctxs[0]); | ||
| 1226 | INIT_LIST_HEAD(&root->log_ctxs[1]); | ||
| 1200 | atomic_set(&root->log_commit[0], 0); | 1227 | atomic_set(&root->log_commit[0], 0); |
| 1201 | atomic_set(&root->log_commit[1], 0); | 1228 | atomic_set(&root->log_commit[1], 0); |
| 1202 | atomic_set(&root->log_writers, 0); | 1229 | atomic_set(&root->log_writers, 0); |
| 1203 | atomic_set(&root->log_batch, 0); | 1230 | atomic_set(&root->log_batch, 0); |
| 1204 | atomic_set(&root->orphan_inodes, 0); | 1231 | atomic_set(&root->orphan_inodes, 0); |
| 1205 | atomic_set(&root->refs, 1); | 1232 | atomic_set(&root->refs, 1); |
| 1233 | atomic_set(&root->will_be_snapshoted, 0); | ||
| 1206 | root->log_transid = 0; | 1234 | root->log_transid = 0; |
| 1235 | root->log_transid_committed = -1; | ||
| 1207 | root->last_log_commit = 0; | 1236 | root->last_log_commit = 0; |
| 1208 | if (fs_info) | 1237 | if (fs_info) |
| 1209 | extent_io_tree_init(&root->dirty_log_pages, | 1238 | extent_io_tree_init(&root->dirty_log_pages, |
| @@ -1417,6 +1446,7 @@ int btrfs_add_log_tree(struct btrfs_trans_handle *trans, | |||
| 1417 | WARN_ON(root->log_root); | 1446 | WARN_ON(root->log_root); |
| 1418 | root->log_root = log_root; | 1447 | root->log_root = log_root; |
| 1419 | root->log_transid = 0; | 1448 | root->log_transid = 0; |
| 1449 | root->log_transid_committed = -1; | ||
| 1420 | root->last_log_commit = 0; | 1450 | root->last_log_commit = 0; |
| 1421 | return 0; | 1451 | return 0; |
| 1422 | } | 1452 | } |
| @@ -1498,6 +1528,7 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_root *tree_root, | |||
| 1498 | int btrfs_init_fs_root(struct btrfs_root *root) | 1528 | int btrfs_init_fs_root(struct btrfs_root *root) |
| 1499 | { | 1529 | { |
| 1500 | int ret; | 1530 | int ret; |
| 1531 | struct btrfs_subvolume_writers *writers; | ||
| 1501 | 1532 | ||
| 1502 | root->free_ino_ctl = kzalloc(sizeof(*root->free_ino_ctl), GFP_NOFS); | 1533 | root->free_ino_ctl = kzalloc(sizeof(*root->free_ino_ctl), GFP_NOFS); |
| 1503 | root->free_ino_pinned = kzalloc(sizeof(*root->free_ino_pinned), | 1534 | root->free_ino_pinned = kzalloc(sizeof(*root->free_ino_pinned), |
| @@ -1507,6 +1538,13 @@ int btrfs_init_fs_root(struct btrfs_root *root) | |||
| 1507 | goto fail; | 1538 | goto fail; |
| 1508 | } | 1539 | } |
| 1509 | 1540 | ||
| 1541 | writers = btrfs_alloc_subvolume_writers(); | ||
| 1542 | if (IS_ERR(writers)) { | ||
| 1543 | ret = PTR_ERR(writers); | ||
| 1544 | goto fail; | ||
| 1545 | } | ||
| 1546 | root->subv_writers = writers; | ||
| 1547 | |||
| 1510 | btrfs_init_free_ino_ctl(root); | 1548 | btrfs_init_free_ino_ctl(root); |
| 1511 | mutex_init(&root->fs_commit_mutex); | 1549 | mutex_init(&root->fs_commit_mutex); |
| 1512 | spin_lock_init(&root->cache_lock); | 1550 | spin_lock_init(&root->cache_lock); |
| @@ -1514,8 +1552,11 @@ int btrfs_init_fs_root(struct btrfs_root *root) | |||
| 1514 | 1552 | ||
| 1515 | ret = get_anon_bdev(&root->anon_dev); | 1553 | ret = get_anon_bdev(&root->anon_dev); |
| 1516 | if (ret) | 1554 | if (ret) |
| 1517 | goto fail; | 1555 | goto free_writers; |
| 1518 | return 0; | 1556 | return 0; |
| 1557 | |||
| 1558 | free_writers: | ||
| 1559 | btrfs_free_subvolume_writers(root->subv_writers); | ||
| 1519 | fail: | 1560 | fail: |
| 1520 | kfree(root->free_ino_ctl); | 1561 | kfree(root->free_ino_ctl); |
| 1521 | kfree(root->free_ino_pinned); | 1562 | kfree(root->free_ino_pinned); |
| @@ -1990,23 +2031,22 @@ static noinline int next_root_backup(struct btrfs_fs_info *info, | |||
| 1990 | /* helper to cleanup workers */ | 2031 | /* helper to cleanup workers */ |
| 1991 | static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info) | 2032 | static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info) |
| 1992 | { | 2033 | { |
| 1993 | btrfs_stop_workers(&fs_info->generic_worker); | 2034 | btrfs_destroy_workqueue(fs_info->fixup_workers); |
| 1994 | btrfs_stop_workers(&fs_info->fixup_workers); | 2035 | btrfs_destroy_workqueue(fs_info->delalloc_workers); |
| 1995 | btrfs_stop_workers(&fs_info->delalloc_workers); | 2036 | btrfs_destroy_workqueue(fs_info->workers); |
| 1996 | btrfs_stop_workers(&fs_info->workers); | 2037 | btrfs_destroy_workqueue(fs_info->endio_workers); |
| 1997 | btrfs_stop_workers(&fs_info->endio_workers); | 2038 | btrfs_destroy_workqueue(fs_info->endio_meta_workers); |
| 1998 | btrfs_stop_workers(&fs_info->endio_meta_workers); | 2039 | btrfs_destroy_workqueue(fs_info->endio_raid56_workers); |
| 1999 | btrfs_stop_workers(&fs_info->endio_raid56_workers); | 2040 | btrfs_destroy_workqueue(fs_info->rmw_workers); |
| 2000 | btrfs_stop_workers(&fs_info->rmw_workers); | 2041 | btrfs_destroy_workqueue(fs_info->endio_meta_write_workers); |
| 2001 | btrfs_stop_workers(&fs_info->endio_meta_write_workers); | 2042 | btrfs_destroy_workqueue(fs_info->endio_write_workers); |
| 2002 | btrfs_stop_workers(&fs_info->endio_write_workers); | 2043 | btrfs_destroy_workqueue(fs_info->endio_freespace_worker); |
| 2003 | btrfs_stop_workers(&fs_info->endio_freespace_worker); | 2044 | btrfs_destroy_workqueue(fs_info->submit_workers); |
| 2004 | btrfs_stop_workers(&fs_info->submit_workers); | 2045 | btrfs_destroy_workqueue(fs_info->delayed_workers); |
| 2005 | btrfs_stop_workers(&fs_info->delayed_workers); | 2046 | btrfs_destroy_workqueue(fs_info->caching_workers); |
| 2006 | btrfs_stop_workers(&fs_info->caching_workers); | 2047 | btrfs_destroy_workqueue(fs_info->readahead_workers); |
| 2007 | btrfs_stop_workers(&fs_info->readahead_workers); | 2048 | btrfs_destroy_workqueue(fs_info->flush_workers); |
| 2008 | btrfs_stop_workers(&fs_info->flush_workers); | 2049 | btrfs_destroy_workqueue(fs_info->qgroup_rescan_workers); |
| 2009 | btrfs_stop_workers(&fs_info->qgroup_rescan_workers); | ||
| 2010 | } | 2050 | } |
| 2011 | 2051 | ||
| 2012 | static void free_root_extent_buffers(struct btrfs_root *root) | 2052 | static void free_root_extent_buffers(struct btrfs_root *root) |
| @@ -2097,6 +2137,8 @@ int open_ctree(struct super_block *sb, | |||
| 2097 | int err = -EINVAL; | 2137 | int err = -EINVAL; |
| 2098 | int num_backups_tried = 0; | 2138 | int num_backups_tried = 0; |
| 2099 | int backup_index = 0; | 2139 | int backup_index = 0; |
| 2140 | int max_active; | ||
| 2141 | int flags = WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_UNBOUND; | ||
| 2100 | bool create_uuid_tree; | 2142 | bool create_uuid_tree; |
| 2101 | bool check_uuid_tree; | 2143 | bool check_uuid_tree; |
| 2102 | 2144 | ||
| @@ -2133,10 +2175,16 @@ int open_ctree(struct super_block *sb, | |||
| 2133 | goto fail_dirty_metadata_bytes; | 2175 | goto fail_dirty_metadata_bytes; |
| 2134 | } | 2176 | } |
| 2135 | 2177 | ||
| 2178 | ret = percpu_counter_init(&fs_info->bio_counter, 0); | ||
| 2179 | if (ret) { | ||
| 2180 | err = ret; | ||
| 2181 | goto fail_delalloc_bytes; | ||
| 2182 | } | ||
| 2183 | |||
| 2136 | fs_info->btree_inode = new_inode(sb); | 2184 | fs_info->btree_inode = new_inode(sb); |
| 2137 | if (!fs_info->btree_inode) { | 2185 | if (!fs_info->btree_inode) { |
| 2138 | err = -ENOMEM; | 2186 | err = -ENOMEM; |
| 2139 | goto fail_delalloc_bytes; | 2187 | goto fail_bio_counter; |
| 2140 | } | 2188 | } |
| 2141 | 2189 | ||
| 2142 | mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS); | 2190 | mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS); |
| @@ -2159,6 +2207,7 @@ int open_ctree(struct super_block *sb, | |||
| 2159 | spin_lock_init(&fs_info->buffer_lock); | 2207 | spin_lock_init(&fs_info->buffer_lock); |
| 2160 | rwlock_init(&fs_info->tree_mod_log_lock); | 2208 | rwlock_init(&fs_info->tree_mod_log_lock); |
| 2161 | mutex_init(&fs_info->reloc_mutex); | 2209 | mutex_init(&fs_info->reloc_mutex); |
| 2210 | mutex_init(&fs_info->delalloc_root_mutex); | ||
| 2162 | seqlock_init(&fs_info->profiles_lock); | 2211 | seqlock_init(&fs_info->profiles_lock); |
| 2163 | 2212 | ||
| 2164 | init_completion(&fs_info->kobj_unregister); | 2213 | init_completion(&fs_info->kobj_unregister); |
| @@ -2211,6 +2260,7 @@ int open_ctree(struct super_block *sb, | |||
| 2211 | atomic_set(&fs_info->scrub_pause_req, 0); | 2260 | atomic_set(&fs_info->scrub_pause_req, 0); |
| 2212 | atomic_set(&fs_info->scrubs_paused, 0); | 2261 | atomic_set(&fs_info->scrubs_paused, 0); |
| 2213 | atomic_set(&fs_info->scrub_cancel_req, 0); | 2262 | atomic_set(&fs_info->scrub_cancel_req, 0); |
| 2263 | init_waitqueue_head(&fs_info->replace_wait); | ||
| 2214 | init_waitqueue_head(&fs_info->scrub_pause_wait); | 2264 | init_waitqueue_head(&fs_info->scrub_pause_wait); |
| 2215 | fs_info->scrub_workers_refcnt = 0; | 2265 | fs_info->scrub_workers_refcnt = 0; |
| 2216 | #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY | 2266 | #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY |
| @@ -2458,104 +2508,68 @@ int open_ctree(struct super_block *sb, | |||
| 2458 | goto fail_alloc; | 2508 | goto fail_alloc; |
| 2459 | } | 2509 | } |
| 2460 | 2510 | ||
| 2461 | btrfs_init_workers(&fs_info->generic_worker, | 2511 | max_active = fs_info->thread_pool_size; |
| 2462 | "genwork", 1, NULL); | ||
| 2463 | |||
| 2464 | btrfs_init_workers(&fs_info->workers, "worker", | ||
| 2465 | fs_info->thread_pool_size, | ||
| 2466 | &fs_info->generic_worker); | ||
| 2467 | 2512 | ||
| 2468 | btrfs_init_workers(&fs_info->delalloc_workers, "delalloc", | 2513 | fs_info->workers = |
| 2469 | fs_info->thread_pool_size, NULL); | 2514 | btrfs_alloc_workqueue("worker", flags | WQ_HIGHPRI, |
| 2515 | max_active, 16); | ||
| 2470 | 2516 | ||
| 2471 | btrfs_init_workers(&fs_info->flush_workers, "flush_delalloc", | 2517 | fs_info->delalloc_workers = |
| 2472 | fs_info->thread_pool_size, NULL); | 2518 | btrfs_alloc_workqueue("delalloc", flags, max_active, 2); |
| 2473 | 2519 | ||
| 2474 | btrfs_init_workers(&fs_info->submit_workers, "submit", | 2520 | fs_info->flush_workers = |
| 2475 | min_t(u64, fs_devices->num_devices, | 2521 | btrfs_alloc_workqueue("flush_delalloc", flags, max_active, 0); |
| 2476 | fs_info->thread_pool_size), NULL); | ||
| 2477 | 2522 | ||
| 2478 | btrfs_init_workers(&fs_info->caching_workers, "cache", | 2523 | fs_info->caching_workers = |
| 2479 | fs_info->thread_pool_size, NULL); | 2524 | btrfs_alloc_workqueue("cache", flags, max_active, 0); |
| 2480 | 2525 | ||
| 2481 | /* a higher idle thresh on the submit workers makes it much more | 2526 | /* |
| 2527 | * a higher idle thresh on the submit workers makes it much more | ||
| 2482 | * likely that bios will be send down in a sane order to the | 2528 | * likely that bios will be send down in a sane order to the |
| 2483 | * devices | 2529 | * devices |
| 2484 | */ | 2530 | */ |
| 2485 | fs_info->submit_workers.idle_thresh = 64; | 2531 | fs_info->submit_workers = |
| 2486 | 2532 | btrfs_alloc_workqueue("submit", flags, | |
| 2487 | fs_info->workers.idle_thresh = 16; | 2533 | min_t(u64, fs_devices->num_devices, |
| 2488 | fs_info->workers.ordered = 1; | 2534 | max_active), 64); |
| 2489 | 2535 | ||
| 2490 | fs_info->delalloc_workers.idle_thresh = 2; | 2536 | fs_info->fixup_workers = |
| 2491 | fs_info->delalloc_workers.ordered = 1; | 2537 | btrfs_alloc_workqueue("fixup", flags, 1, 0); |
| 2492 | |||
| 2493 | btrfs_init_workers(&fs_info->fixup_workers, "fixup", 1, | ||
| 2494 | &fs_info->generic_worker); | ||
| 2495 | btrfs_init_workers(&fs_info->endio_workers, "endio", | ||
| 2496 | fs_info->thread_pool_size, | ||
| 2497 | &fs_info->generic_worker); | ||
| 2498 | btrfs_init_workers(&fs_info->endio_meta_workers, "endio-meta", | ||
| 2499 | fs_info->thread_pool_size, | ||
| 2500 | &fs_info->generic_worker); | ||
| 2501 | btrfs_init_workers(&fs_info->endio_meta_write_workers, | ||
| 2502 | "endio-meta-write", fs_info->thread_pool_size, | ||
| 2503 | &fs_info->generic_worker); | ||
| 2504 | btrfs_init_workers(&fs_info->endio_raid56_workers, | ||
| 2505 | "endio-raid56", fs_info->thread_pool_size, | ||
| 2506 | &fs_info->generic_worker); | ||
| 2507 | btrfs_init_workers(&fs_info->rmw_workers, | ||
| 2508 | "rmw", fs_info->thread_pool_size, | ||
| 2509 | &fs_info->generic_worker); | ||
| 2510 | btrfs_init_workers(&fs_info->endio_write_workers, "endio-write", | ||
| 2511 | fs_info->thread_pool_size, | ||
| 2512 | &fs_info->generic_worker); | ||
| 2513 | btrfs_init_workers(&fs_info->endio_freespace_worker, "freespace-write", | ||
| 2514 | 1, &fs_info->generic_worker); | ||
| 2515 | btrfs_init_workers(&fs_info->delayed_workers, "delayed-meta", | ||
| 2516 | fs_info->thread_pool_size, | ||
| 2517 | &fs_info->generic_worker); | ||
| 2518 | btrfs_init_workers(&fs_info->readahead_workers, "readahead", | ||
| 2519 | fs_info->thread_pool_size, | ||
| 2520 | &fs_info->generic_worker); | ||
| 2521 | btrfs_init_workers(&fs_info->qgroup_rescan_workers, "qgroup-rescan", 1, | ||
| 2522 | &fs_info->generic_worker); | ||
| 2523 | 2538 | ||
| 2524 | /* | 2539 | /* |
| 2525 | * endios are largely parallel and should have a very | 2540 | * endios are largely parallel and should have a very |
| 2526 | * low idle thresh | 2541 | * low idle thresh |
| 2527 | */ | 2542 | */ |
| 2528 | fs_info->endio_workers.idle_thresh = 4; | 2543 | fs_info->endio_workers = |
| 2529 | fs_info->endio_meta_workers.idle_thresh = 4; | 2544 | btrfs_alloc_workqueue("endio", flags, max_active, 4); |
| 2530 | fs_info->endio_raid56_workers.idle_thresh = 4; | 2545 | fs_info->endio_meta_workers = |
| 2531 | fs_info->rmw_workers.idle_thresh = 2; | 2546 | btrfs_alloc_workqueue("endio-meta", flags, max_active, 4); |
| 2532 | 2547 | fs_info->endio_meta_write_workers = | |
| 2533 | fs_info->endio_write_workers.idle_thresh = 2; | 2548 | btrfs_alloc_workqueue("endio-meta-write", flags, max_active, 2); |
| 2534 | fs_info->endio_meta_write_workers.idle_thresh = 2; | 2549 | fs_info->endio_raid56_workers = |
| 2535 | fs_info->readahead_workers.idle_thresh = 2; | 2550 | btrfs_alloc_workqueue("endio-raid56", flags, max_active, 4); |
| 2536 | 2551 | fs_info->rmw_workers = | |
| 2537 | /* | 2552 | btrfs_alloc_workqueue("rmw", flags, max_active, 2); |
| 2538 | * btrfs_start_workers can really only fail because of ENOMEM so just | 2553 | fs_info->endio_write_workers = |
| 2539 | * return -ENOMEM if any of these fail. | 2554 | btrfs_alloc_workqueue("endio-write", flags, max_active, 2); |
| 2540 | */ | 2555 | fs_info->endio_freespace_worker = |
| 2541 | ret = btrfs_start_workers(&fs_info->workers); | 2556 | btrfs_alloc_workqueue("freespace-write", flags, max_active, 0); |
| 2542 | ret |= btrfs_start_workers(&fs_info->generic_worker); | 2557 | fs_info->delayed_workers = |
| 2543 | ret |= btrfs_start_workers(&fs_info->submit_workers); | 2558 | btrfs_alloc_workqueue("delayed-meta", flags, max_active, 0); |
| 2544 | ret |= btrfs_start_workers(&fs_info->delalloc_workers); | 2559 | fs_info->readahead_workers = |
| 2545 | ret |= btrfs_start_workers(&fs_info->fixup_workers); | 2560 | btrfs_alloc_workqueue("readahead", flags, max_active, 2); |
| 2546 | ret |= btrfs_start_workers(&fs_info->endio_workers); | 2561 | fs_info->qgroup_rescan_workers = |
| 2547 | ret |= btrfs_start_workers(&fs_info->endio_meta_workers); | 2562 | btrfs_alloc_workqueue("qgroup-rescan", flags, 1, 0); |
| 2548 | ret |= btrfs_start_workers(&fs_info->rmw_workers); | 2563 | |
| 2549 | ret |= btrfs_start_workers(&fs_info->endio_raid56_workers); | 2564 | if (!(fs_info->workers && fs_info->delalloc_workers && |
| 2550 | ret |= btrfs_start_workers(&fs_info->endio_meta_write_workers); | 2565 | fs_info->submit_workers && fs_info->flush_workers && |
| 2551 | ret |= btrfs_start_workers(&fs_info->endio_write_workers); | 2566 | fs_info->endio_workers && fs_info->endio_meta_workers && |
| 2552 | ret |= btrfs_start_workers(&fs_info->endio_freespace_worker); | 2567 | fs_info->endio_meta_write_workers && |
| 2553 | ret |= btrfs_start_workers(&fs_info->delayed_workers); | 2568 | fs_info->endio_write_workers && fs_info->endio_raid56_workers && |
| 2554 | ret |= btrfs_start_workers(&fs_info->caching_workers); | 2569 | fs_info->endio_freespace_worker && fs_info->rmw_workers && |
| 2555 | ret |= btrfs_start_workers(&fs_info->readahead_workers); | 2570 | fs_info->caching_workers && fs_info->readahead_workers && |
| 2556 | ret |= btrfs_start_workers(&fs_info->flush_workers); | 2571 | fs_info->fixup_workers && fs_info->delayed_workers && |
| 2557 | ret |= btrfs_start_workers(&fs_info->qgroup_rescan_workers); | 2572 | fs_info->qgroup_rescan_workers)) { |
| 2558 | if (ret) { | ||
| 2559 | err = -ENOMEM; | 2573 | err = -ENOMEM; |
| 2560 | goto fail_sb_buffer; | 2574 | goto fail_sb_buffer; |
| 2561 | } | 2575 | } |
| @@ -2963,6 +2977,8 @@ fail_iput: | |||
| 2963 | btrfs_mapping_tree_free(&fs_info->mapping_tree); | 2977 | btrfs_mapping_tree_free(&fs_info->mapping_tree); |
| 2964 | 2978 | ||
| 2965 | iput(fs_info->btree_inode); | 2979 | iput(fs_info->btree_inode); |
| 2980 | fail_bio_counter: | ||
| 2981 | percpu_counter_destroy(&fs_info->bio_counter); | ||
| 2966 | fail_delalloc_bytes: | 2982 | fail_delalloc_bytes: |
| 2967 | percpu_counter_destroy(&fs_info->delalloc_bytes); | 2983 | percpu_counter_destroy(&fs_info->delalloc_bytes); |
| 2968 | fail_dirty_metadata_bytes: | 2984 | fail_dirty_metadata_bytes: |
| @@ -3244,6 +3260,8 @@ static int barrier_all_devices(struct btrfs_fs_info *info) | |||
| 3244 | /* send down all the barriers */ | 3260 | /* send down all the barriers */ |
| 3245 | head = &info->fs_devices->devices; | 3261 | head = &info->fs_devices->devices; |
| 3246 | list_for_each_entry_rcu(dev, head, dev_list) { | 3262 | list_for_each_entry_rcu(dev, head, dev_list) { |
| 3263 | if (dev->missing) | ||
| 3264 | continue; | ||
| 3247 | if (!dev->bdev) { | 3265 | if (!dev->bdev) { |
| 3248 | errors_send++; | 3266 | errors_send++; |
| 3249 | continue; | 3267 | continue; |
| @@ -3258,6 +3276,8 @@ static int barrier_all_devices(struct btrfs_fs_info *info) | |||
| 3258 | 3276 | ||
| 3259 | /* wait for all the barriers */ | 3277 | /* wait for all the barriers */ |
| 3260 | list_for_each_entry_rcu(dev, head, dev_list) { | 3278 | list_for_each_entry_rcu(dev, head, dev_list) { |
| 3279 | if (dev->missing) | ||
| 3280 | continue; | ||
| 3261 | if (!dev->bdev) { | 3281 | if (!dev->bdev) { |
| 3262 | errors_wait++; | 3282 | errors_wait++; |
| 3263 | continue; | 3283 | continue; |
| @@ -3477,6 +3497,8 @@ static void free_fs_root(struct btrfs_root *root) | |||
| 3477 | root->orphan_block_rsv = NULL; | 3497 | root->orphan_block_rsv = NULL; |
| 3478 | if (root->anon_dev) | 3498 | if (root->anon_dev) |
| 3479 | free_anon_bdev(root->anon_dev); | 3499 | free_anon_bdev(root->anon_dev); |
| 3500 | if (root->subv_writers) | ||
| 3501 | btrfs_free_subvolume_writers(root->subv_writers); | ||
| 3480 | free_extent_buffer(root->node); | 3502 | free_extent_buffer(root->node); |
| 3481 | free_extent_buffer(root->commit_root); | 3503 | free_extent_buffer(root->commit_root); |
| 3482 | kfree(root->free_ino_ctl); | 3504 | kfree(root->free_ino_ctl); |
| @@ -3610,6 +3632,7 @@ int close_ctree(struct btrfs_root *root) | |||
| 3610 | 3632 | ||
| 3611 | percpu_counter_destroy(&fs_info->dirty_metadata_bytes); | 3633 | percpu_counter_destroy(&fs_info->dirty_metadata_bytes); |
| 3612 | percpu_counter_destroy(&fs_info->delalloc_bytes); | 3634 | percpu_counter_destroy(&fs_info->delalloc_bytes); |
| 3635 | percpu_counter_destroy(&fs_info->bio_counter); | ||
| 3613 | bdi_destroy(&fs_info->bdi); | 3636 | bdi_destroy(&fs_info->bdi); |
| 3614 | cleanup_srcu_struct(&fs_info->subvol_srcu); | 3637 | cleanup_srcu_struct(&fs_info->subvol_srcu); |
| 3615 | 3638 | ||
| @@ -3791,9 +3814,11 @@ static void btrfs_destroy_all_ordered_extents(struct btrfs_fs_info *fs_info) | |||
| 3791 | list_move_tail(&root->ordered_root, | 3814 | list_move_tail(&root->ordered_root, |
| 3792 | &fs_info->ordered_roots); | 3815 | &fs_info->ordered_roots); |
| 3793 | 3816 | ||
| 3817 | spin_unlock(&fs_info->ordered_root_lock); | ||
| 3794 | btrfs_destroy_ordered_extents(root); | 3818 | btrfs_destroy_ordered_extents(root); |
| 3795 | 3819 | ||
| 3796 | cond_resched_lock(&fs_info->ordered_root_lock); | 3820 | cond_resched(); |
| 3821 | spin_lock(&fs_info->ordered_root_lock); | ||
| 3797 | } | 3822 | } |
| 3798 | spin_unlock(&fs_info->ordered_root_lock); | 3823 | spin_unlock(&fs_info->ordered_root_lock); |
| 3799 | } | 3824 | } |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 32312e09f0f5..c6b6a6e3e735 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
| @@ -549,7 +549,7 @@ static int cache_block_group(struct btrfs_block_group_cache *cache, | |||
| 549 | caching_ctl->block_group = cache; | 549 | caching_ctl->block_group = cache; |
| 550 | caching_ctl->progress = cache->key.objectid; | 550 | caching_ctl->progress = cache->key.objectid; |
| 551 | atomic_set(&caching_ctl->count, 1); | 551 | atomic_set(&caching_ctl->count, 1); |
| 552 | caching_ctl->work.func = caching_thread; | 552 | btrfs_init_work(&caching_ctl->work, caching_thread, NULL, NULL); |
| 553 | 553 | ||
| 554 | spin_lock(&cache->lock); | 554 | spin_lock(&cache->lock); |
| 555 | /* | 555 | /* |
| @@ -640,7 +640,7 @@ static int cache_block_group(struct btrfs_block_group_cache *cache, | |||
| 640 | 640 | ||
| 641 | btrfs_get_block_group(cache); | 641 | btrfs_get_block_group(cache); |
| 642 | 642 | ||
| 643 | btrfs_queue_worker(&fs_info->caching_workers, &caching_ctl->work); | 643 | btrfs_queue_work(fs_info->caching_workers, &caching_ctl->work); |
| 644 | 644 | ||
| 645 | return ret; | 645 | return ret; |
| 646 | } | 646 | } |
| @@ -3971,7 +3971,7 @@ static int can_overcommit(struct btrfs_root *root, | |||
| 3971 | } | 3971 | } |
| 3972 | 3972 | ||
| 3973 | static void btrfs_writeback_inodes_sb_nr(struct btrfs_root *root, | 3973 | static void btrfs_writeback_inodes_sb_nr(struct btrfs_root *root, |
| 3974 | unsigned long nr_pages) | 3974 | unsigned long nr_pages, int nr_items) |
| 3975 | { | 3975 | { |
| 3976 | struct super_block *sb = root->fs_info->sb; | 3976 | struct super_block *sb = root->fs_info->sb; |
| 3977 | 3977 | ||
| @@ -3986,9 +3986,9 @@ static void btrfs_writeback_inodes_sb_nr(struct btrfs_root *root, | |||
| 3986 | * the filesystem is readonly(all dirty pages are written to | 3986 | * the filesystem is readonly(all dirty pages are written to |
| 3987 | * the disk). | 3987 | * the disk). |
| 3988 | */ | 3988 | */ |
| 3989 | btrfs_start_delalloc_roots(root->fs_info, 0); | 3989 | btrfs_start_delalloc_roots(root->fs_info, 0, nr_items); |
| 3990 | if (!current->journal_info) | 3990 | if (!current->journal_info) |
| 3991 | btrfs_wait_ordered_roots(root->fs_info, -1); | 3991 | btrfs_wait_ordered_roots(root->fs_info, nr_items); |
| 3992 | } | 3992 | } |
| 3993 | } | 3993 | } |
| 3994 | 3994 | ||
| @@ -4045,7 +4045,7 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig, | |||
| 4045 | while (delalloc_bytes && loops < 3) { | 4045 | while (delalloc_bytes && loops < 3) { |
| 4046 | max_reclaim = min(delalloc_bytes, to_reclaim); | 4046 | max_reclaim = min(delalloc_bytes, to_reclaim); |
| 4047 | nr_pages = max_reclaim >> PAGE_CACHE_SHIFT; | 4047 | nr_pages = max_reclaim >> PAGE_CACHE_SHIFT; |
| 4048 | btrfs_writeback_inodes_sb_nr(root, nr_pages); | 4048 | btrfs_writeback_inodes_sb_nr(root, nr_pages, items); |
| 4049 | /* | 4049 | /* |
| 4050 | * We need to wait for the async pages to actually start before | 4050 | * We need to wait for the async pages to actually start before |
| 4051 | * we do anything. | 4051 | * we do anything. |
| @@ -4112,13 +4112,9 @@ static int may_commit_transaction(struct btrfs_root *root, | |||
| 4112 | goto commit; | 4112 | goto commit; |
| 4113 | 4113 | ||
| 4114 | /* See if there is enough pinned space to make this reservation */ | 4114 | /* See if there is enough pinned space to make this reservation */ |
| 4115 | spin_lock(&space_info->lock); | ||
| 4116 | if (percpu_counter_compare(&space_info->total_bytes_pinned, | 4115 | if (percpu_counter_compare(&space_info->total_bytes_pinned, |
| 4117 | bytes) >= 0) { | 4116 | bytes) >= 0) |
| 4118 | spin_unlock(&space_info->lock); | ||
| 4119 | goto commit; | 4117 | goto commit; |
| 4120 | } | ||
| 4121 | spin_unlock(&space_info->lock); | ||
| 4122 | 4118 | ||
| 4123 | /* | 4119 | /* |
| 4124 | * See if there is some space in the delayed insertion reservation for | 4120 | * See if there is some space in the delayed insertion reservation for |
| @@ -4127,16 +4123,13 @@ static int may_commit_transaction(struct btrfs_root *root, | |||
| 4127 | if (space_info != delayed_rsv->space_info) | 4123 | if (space_info != delayed_rsv->space_info) |
| 4128 | return -ENOSPC; | 4124 | return -ENOSPC; |
| 4129 | 4125 | ||
| 4130 | spin_lock(&space_info->lock); | ||
| 4131 | spin_lock(&delayed_rsv->lock); | 4126 | spin_lock(&delayed_rsv->lock); |
| 4132 | if (percpu_counter_compare(&space_info->total_bytes_pinned, | 4127 | if (percpu_counter_compare(&space_info->total_bytes_pinned, |
| 4133 | bytes - delayed_rsv->size) >= 0) { | 4128 | bytes - delayed_rsv->size) >= 0) { |
| 4134 | spin_unlock(&delayed_rsv->lock); | 4129 | spin_unlock(&delayed_rsv->lock); |
| 4135 | spin_unlock(&space_info->lock); | ||
| 4136 | return -ENOSPC; | 4130 | return -ENOSPC; |
| 4137 | } | 4131 | } |
| 4138 | spin_unlock(&delayed_rsv->lock); | 4132 | spin_unlock(&delayed_rsv->lock); |
| 4139 | spin_unlock(&space_info->lock); | ||
| 4140 | 4133 | ||
| 4141 | commit: | 4134 | commit: |
| 4142 | trans = btrfs_join_transaction(root); | 4135 | trans = btrfs_join_transaction(root); |
| @@ -4181,7 +4174,7 @@ static int flush_space(struct btrfs_root *root, | |||
| 4181 | break; | 4174 | break; |
| 4182 | case FLUSH_DELALLOC: | 4175 | case FLUSH_DELALLOC: |
| 4183 | case FLUSH_DELALLOC_WAIT: | 4176 | case FLUSH_DELALLOC_WAIT: |
| 4184 | shrink_delalloc(root, num_bytes, orig_bytes, | 4177 | shrink_delalloc(root, num_bytes * 2, orig_bytes, |
| 4185 | state == FLUSH_DELALLOC_WAIT); | 4178 | state == FLUSH_DELALLOC_WAIT); |
| 4186 | break; | 4179 | break; |
| 4187 | case ALLOC_CHUNK: | 4180 | case ALLOC_CHUNK: |
| @@ -8938,3 +8931,38 @@ int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range) | |||
| 8938 | range->len = trimmed; | 8931 | range->len = trimmed; |
| 8939 | return ret; | 8932 | return ret; |
| 8940 | } | 8933 | } |
| 8934 | |||
| 8935 | /* | ||
| 8936 | * btrfs_{start,end}_write() is similar to mnt_{want, drop}_write(), | ||
| 8937 | * they are used to prevent the some tasks writing data into the page cache | ||
| 8938 | * by nocow before the subvolume is snapshoted, but flush the data into | ||
| 8939 | * the disk after the snapshot creation. | ||
| 8940 | */ | ||
| 8941 | void btrfs_end_nocow_write(struct btrfs_root *root) | ||
| 8942 | { | ||
| 8943 | percpu_counter_dec(&root->subv_writers->counter); | ||
| 8944 | /* | ||
| 8945 | * Make sure counter is updated before we wake up | ||
| 8946 | * waiters. | ||
| 8947 | */ | ||
| 8948 | smp_mb(); | ||
| 8949 | if (waitqueue_active(&root->subv_writers->wait)) | ||
| 8950 | wake_up(&root->subv_writers->wait); | ||
| 8951 | } | ||
| 8952 | |||
| 8953 | int btrfs_start_nocow_write(struct btrfs_root *root) | ||
| 8954 | { | ||
| 8955 | if (unlikely(atomic_read(&root->will_be_snapshoted))) | ||
| 8956 | return 0; | ||
| 8957 | |||
| 8958 | percpu_counter_inc(&root->subv_writers->counter); | ||
| 8959 | /* | ||
| 8960 | * Make sure counter is updated before we check for snapshot creation. | ||
| 8961 | */ | ||
| 8962 | smp_mb(); | ||
| 8963 | if (unlikely(atomic_read(&root->will_be_snapshoted))) { | ||
| 8964 | btrfs_end_nocow_write(root); | ||
| 8965 | return 0; | ||
| 8966 | } | ||
| 8967 | return 1; | ||
| 8968 | } | ||
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 85bbd01f1271..ae69a00387e7 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
| @@ -229,12 +229,14 @@ void free_extent_state(struct extent_state *state) | |||
| 229 | } | 229 | } |
| 230 | } | 230 | } |
| 231 | 231 | ||
| 232 | static struct rb_node *tree_insert(struct rb_root *root, u64 offset, | 232 | static struct rb_node *tree_insert(struct rb_root *root, |
| 233 | struct rb_node *search_start, | ||
| 234 | u64 offset, | ||
| 233 | struct rb_node *node, | 235 | struct rb_node *node, |
| 234 | struct rb_node ***p_in, | 236 | struct rb_node ***p_in, |
| 235 | struct rb_node **parent_in) | 237 | struct rb_node **parent_in) |
| 236 | { | 238 | { |
| 237 | struct rb_node **p = &root->rb_node; | 239 | struct rb_node **p; |
| 238 | struct rb_node *parent = NULL; | 240 | struct rb_node *parent = NULL; |
| 239 | struct tree_entry *entry; | 241 | struct tree_entry *entry; |
| 240 | 242 | ||
| @@ -244,6 +246,7 @@ static struct rb_node *tree_insert(struct rb_root *root, u64 offset, | |||
| 244 | goto do_insert; | 246 | goto do_insert; |
| 245 | } | 247 | } |
| 246 | 248 | ||
| 249 | p = search_start ? &search_start : &root->rb_node; | ||
| 247 | while (*p) { | 250 | while (*p) { |
| 248 | parent = *p; | 251 | parent = *p; |
| 249 | entry = rb_entry(parent, struct tree_entry, rb_node); | 252 | entry = rb_entry(parent, struct tree_entry, rb_node); |
| @@ -430,7 +433,7 @@ static int insert_state(struct extent_io_tree *tree, | |||
| 430 | 433 | ||
| 431 | set_state_bits(tree, state, bits); | 434 | set_state_bits(tree, state, bits); |
| 432 | 435 | ||
| 433 | node = tree_insert(&tree->state, end, &state->rb_node, p, parent); | 436 | node = tree_insert(&tree->state, NULL, end, &state->rb_node, p, parent); |
| 434 | if (node) { | 437 | if (node) { |
| 435 | struct extent_state *found; | 438 | struct extent_state *found; |
| 436 | found = rb_entry(node, struct extent_state, rb_node); | 439 | found = rb_entry(node, struct extent_state, rb_node); |
| @@ -477,8 +480,8 @@ static int split_state(struct extent_io_tree *tree, struct extent_state *orig, | |||
| 477 | prealloc->state = orig->state; | 480 | prealloc->state = orig->state; |
| 478 | orig->start = split; | 481 | orig->start = split; |
| 479 | 482 | ||
| 480 | node = tree_insert(&tree->state, prealloc->end, &prealloc->rb_node, | 483 | node = tree_insert(&tree->state, &orig->rb_node, prealloc->end, |
| 481 | NULL, NULL); | 484 | &prealloc->rb_node, NULL, NULL); |
| 482 | if (node) { | 485 | if (node) { |
| 483 | free_extent_state(prealloc); | 486 | free_extent_state(prealloc); |
| 484 | return -EEXIST; | 487 | return -EEXIST; |
| @@ -2757,7 +2760,7 @@ __get_extent_map(struct inode *inode, struct page *page, size_t pg_offset, | |||
| 2757 | 2760 | ||
| 2758 | if (em_cached && *em_cached) { | 2761 | if (em_cached && *em_cached) { |
| 2759 | em = *em_cached; | 2762 | em = *em_cached; |
| 2760 | if (em->in_tree && start >= em->start && | 2763 | if (extent_map_in_tree(em) && start >= em->start && |
| 2761 | start < extent_map_end(em)) { | 2764 | start < extent_map_end(em)) { |
| 2762 | atomic_inc(&em->refs); | 2765 | atomic_inc(&em->refs); |
| 2763 | return em; | 2766 | return em; |
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 996ad56b57db..1874aee69c86 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c | |||
| @@ -51,7 +51,7 @@ struct extent_map *alloc_extent_map(void) | |||
| 51 | em = kmem_cache_zalloc(extent_map_cache, GFP_NOFS); | 51 | em = kmem_cache_zalloc(extent_map_cache, GFP_NOFS); |
| 52 | if (!em) | 52 | if (!em) |
| 53 | return NULL; | 53 | return NULL; |
| 54 | em->in_tree = 0; | 54 | RB_CLEAR_NODE(&em->rb_node); |
| 55 | em->flags = 0; | 55 | em->flags = 0; |
| 56 | em->compress_type = BTRFS_COMPRESS_NONE; | 56 | em->compress_type = BTRFS_COMPRESS_NONE; |
| 57 | em->generation = 0; | 57 | em->generation = 0; |
| @@ -73,7 +73,7 @@ void free_extent_map(struct extent_map *em) | |||
| 73 | return; | 73 | return; |
| 74 | WARN_ON(atomic_read(&em->refs) == 0); | 74 | WARN_ON(atomic_read(&em->refs) == 0); |
| 75 | if (atomic_dec_and_test(&em->refs)) { | 75 | if (atomic_dec_and_test(&em->refs)) { |
| 76 | WARN_ON(em->in_tree); | 76 | WARN_ON(extent_map_in_tree(em)); |
| 77 | WARN_ON(!list_empty(&em->list)); | 77 | WARN_ON(!list_empty(&em->list)); |
| 78 | kmem_cache_free(extent_map_cache, em); | 78 | kmem_cache_free(extent_map_cache, em); |
| 79 | } | 79 | } |
| @@ -99,8 +99,6 @@ static int tree_insert(struct rb_root *root, struct extent_map *em) | |||
| 99 | parent = *p; | 99 | parent = *p; |
| 100 | entry = rb_entry(parent, struct extent_map, rb_node); | 100 | entry = rb_entry(parent, struct extent_map, rb_node); |
| 101 | 101 | ||
| 102 | WARN_ON(!entry->in_tree); | ||
| 103 | |||
| 104 | if (em->start < entry->start) | 102 | if (em->start < entry->start) |
| 105 | p = &(*p)->rb_left; | 103 | p = &(*p)->rb_left; |
| 106 | else if (em->start >= extent_map_end(entry)) | 104 | else if (em->start >= extent_map_end(entry)) |
| @@ -128,7 +126,6 @@ static int tree_insert(struct rb_root *root, struct extent_map *em) | |||
| 128 | if (end > entry->start && em->start < extent_map_end(entry)) | 126 | if (end > entry->start && em->start < extent_map_end(entry)) |
| 129 | return -EEXIST; | 127 | return -EEXIST; |
| 130 | 128 | ||
| 131 | em->in_tree = 1; | ||
| 132 | rb_link_node(&em->rb_node, orig_parent, p); | 129 | rb_link_node(&em->rb_node, orig_parent, p); |
| 133 | rb_insert_color(&em->rb_node, root); | 130 | rb_insert_color(&em->rb_node, root); |
| 134 | return 0; | 131 | return 0; |
| @@ -153,8 +150,6 @@ static struct rb_node *__tree_search(struct rb_root *root, u64 offset, | |||
| 153 | prev = n; | 150 | prev = n; |
| 154 | prev_entry = entry; | 151 | prev_entry = entry; |
| 155 | 152 | ||
| 156 | WARN_ON(!entry->in_tree); | ||
| 157 | |||
| 158 | if (offset < entry->start) | 153 | if (offset < entry->start) |
| 159 | n = n->rb_left; | 154 | n = n->rb_left; |
| 160 | else if (offset >= extent_map_end(entry)) | 155 | else if (offset >= extent_map_end(entry)) |
| @@ -240,12 +235,12 @@ static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em) | |||
| 240 | em->len += merge->len; | 235 | em->len += merge->len; |
| 241 | em->block_len += merge->block_len; | 236 | em->block_len += merge->block_len; |
| 242 | em->block_start = merge->block_start; | 237 | em->block_start = merge->block_start; |
| 243 | merge->in_tree = 0; | ||
| 244 | em->mod_len = (em->mod_len + em->mod_start) - merge->mod_start; | 238 | em->mod_len = (em->mod_len + em->mod_start) - merge->mod_start; |
| 245 | em->mod_start = merge->mod_start; | 239 | em->mod_start = merge->mod_start; |
| 246 | em->generation = max(em->generation, merge->generation); | 240 | em->generation = max(em->generation, merge->generation); |
| 247 | 241 | ||
| 248 | rb_erase(&merge->rb_node, &tree->map); | 242 | rb_erase(&merge->rb_node, &tree->map); |
| 243 | RB_CLEAR_NODE(&merge->rb_node); | ||
| 249 | free_extent_map(merge); | 244 | free_extent_map(merge); |
| 250 | } | 245 | } |
| 251 | } | 246 | } |
| @@ -257,7 +252,7 @@ static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em) | |||
| 257 | em->len += merge->len; | 252 | em->len += merge->len; |
| 258 | em->block_len += merge->block_len; | 253 | em->block_len += merge->block_len; |
| 259 | rb_erase(&merge->rb_node, &tree->map); | 254 | rb_erase(&merge->rb_node, &tree->map); |
| 260 | merge->in_tree = 0; | 255 | RB_CLEAR_NODE(&merge->rb_node); |
| 261 | em->mod_len = (merge->mod_start + merge->mod_len) - em->mod_start; | 256 | em->mod_len = (merge->mod_start + merge->mod_len) - em->mod_start; |
| 262 | em->generation = max(em->generation, merge->generation); | 257 | em->generation = max(em->generation, merge->generation); |
| 263 | free_extent_map(merge); | 258 | free_extent_map(merge); |
| @@ -319,7 +314,21 @@ out: | |||
| 319 | void clear_em_logging(struct extent_map_tree *tree, struct extent_map *em) | 314 | void clear_em_logging(struct extent_map_tree *tree, struct extent_map *em) |
| 320 | { | 315 | { |
| 321 | clear_bit(EXTENT_FLAG_LOGGING, &em->flags); | 316 | clear_bit(EXTENT_FLAG_LOGGING, &em->flags); |
| 322 | if (em->in_tree) | 317 | if (extent_map_in_tree(em)) |
| 318 | try_merge_map(tree, em); | ||
| 319 | } | ||
| 320 | |||
| 321 | static inline void setup_extent_mapping(struct extent_map_tree *tree, | ||
| 322 | struct extent_map *em, | ||
| 323 | int modified) | ||
| 324 | { | ||
| 325 | atomic_inc(&em->refs); | ||
| 326 | em->mod_start = em->start; | ||
| 327 | em->mod_len = em->len; | ||
| 328 | |||
| 329 | if (modified) | ||
| 330 | list_move(&em->list, &tree->modified_extents); | ||
| 331 | else | ||
| 323 | try_merge_map(tree, em); | 332 | try_merge_map(tree, em); |
| 324 | } | 333 | } |
| 325 | 334 | ||
| @@ -342,15 +351,7 @@ int add_extent_mapping(struct extent_map_tree *tree, | |||
| 342 | if (ret) | 351 | if (ret) |
| 343 | goto out; | 352 | goto out; |
| 344 | 353 | ||
| 345 | atomic_inc(&em->refs); | 354 | setup_extent_mapping(tree, em, modified); |
| 346 | |||
| 347 | em->mod_start = em->start; | ||
| 348 | em->mod_len = em->len; | ||
| 349 | |||
| 350 | if (modified) | ||
| 351 | list_move(&em->list, &tree->modified_extents); | ||
| 352 | else | ||
| 353 | try_merge_map(tree, em); | ||
| 354 | out: | 355 | out: |
| 355 | return ret; | 356 | return ret; |
| 356 | } | 357 | } |
| @@ -434,6 +435,21 @@ int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em) | |||
| 434 | rb_erase(&em->rb_node, &tree->map); | 435 | rb_erase(&em->rb_node, &tree->map); |
| 435 | if (!test_bit(EXTENT_FLAG_LOGGING, &em->flags)) | 436 | if (!test_bit(EXTENT_FLAG_LOGGING, &em->flags)) |
| 436 | list_del_init(&em->list); | 437 | list_del_init(&em->list); |
| 437 | em->in_tree = 0; | 438 | RB_CLEAR_NODE(&em->rb_node); |
| 438 | return ret; | 439 | return ret; |
| 439 | } | 440 | } |
| 441 | |||
| 442 | void replace_extent_mapping(struct extent_map_tree *tree, | ||
| 443 | struct extent_map *cur, | ||
| 444 | struct extent_map *new, | ||
| 445 | int modified) | ||
| 446 | { | ||
| 447 | WARN_ON(test_bit(EXTENT_FLAG_PINNED, &cur->flags)); | ||
| 448 | ASSERT(extent_map_in_tree(cur)); | ||
| 449 | if (!test_bit(EXTENT_FLAG_LOGGING, &cur->flags)) | ||
| 450 | list_del_init(&cur->list); | ||
| 451 | rb_replace_node(&cur->rb_node, &new->rb_node, &tree->map); | ||
| 452 | RB_CLEAR_NODE(&cur->rb_node); | ||
| 453 | |||
| 454 | setup_extent_mapping(tree, new, modified); | ||
| 455 | } | ||
diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index 93fba716d7f8..e7fd8a56a140 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h | |||
| @@ -33,7 +33,6 @@ struct extent_map { | |||
| 33 | unsigned long flags; | 33 | unsigned long flags; |
| 34 | struct block_device *bdev; | 34 | struct block_device *bdev; |
| 35 | atomic_t refs; | 35 | atomic_t refs; |
| 36 | unsigned int in_tree; | ||
| 37 | unsigned int compress_type; | 36 | unsigned int compress_type; |
| 38 | struct list_head list; | 37 | struct list_head list; |
| 39 | }; | 38 | }; |
| @@ -44,6 +43,11 @@ struct extent_map_tree { | |||
| 44 | rwlock_t lock; | 43 | rwlock_t lock; |
| 45 | }; | 44 | }; |
| 46 | 45 | ||
| 46 | static inline int extent_map_in_tree(const struct extent_map *em) | ||
| 47 | { | ||
| 48 | return !RB_EMPTY_NODE(&em->rb_node); | ||
| 49 | } | ||
| 50 | |||
| 47 | static inline u64 extent_map_end(struct extent_map *em) | 51 | static inline u64 extent_map_end(struct extent_map *em) |
| 48 | { | 52 | { |
| 49 | if (em->start + em->len < em->start) | 53 | if (em->start + em->len < em->start) |
| @@ -64,6 +68,10 @@ struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree, | |||
| 64 | int add_extent_mapping(struct extent_map_tree *tree, | 68 | int add_extent_mapping(struct extent_map_tree *tree, |
| 65 | struct extent_map *em, int modified); | 69 | struct extent_map *em, int modified); |
| 66 | int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em); | 70 | int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em); |
| 71 | void replace_extent_mapping(struct extent_map_tree *tree, | ||
| 72 | struct extent_map *cur, | ||
| 73 | struct extent_map *new, | ||
| 74 | int modified); | ||
| 67 | 75 | ||
| 68 | struct extent_map *alloc_extent_map(void); | 76 | struct extent_map *alloc_extent_map(void); |
| 69 | void free_extent_map(struct extent_map *em); | 77 | void free_extent_map(struct extent_map *em); |
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 0165b8672f09..e1ffb1e22898 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
| @@ -591,7 +591,6 @@ void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | |||
| 591 | clear_bit(EXTENT_FLAG_PINNED, &em->flags); | 591 | clear_bit(EXTENT_FLAG_PINNED, &em->flags); |
| 592 | clear_bit(EXTENT_FLAG_LOGGING, &flags); | 592 | clear_bit(EXTENT_FLAG_LOGGING, &flags); |
| 593 | modified = !list_empty(&em->list); | 593 | modified = !list_empty(&em->list); |
| 594 | remove_extent_mapping(em_tree, em); | ||
| 595 | if (no_splits) | 594 | if (no_splits) |
| 596 | goto next; | 595 | goto next; |
| 597 | 596 | ||
| @@ -622,8 +621,7 @@ void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | |||
| 622 | split->bdev = em->bdev; | 621 | split->bdev = em->bdev; |
| 623 | split->flags = flags; | 622 | split->flags = flags; |
| 624 | split->compress_type = em->compress_type; | 623 | split->compress_type = em->compress_type; |
| 625 | ret = add_extent_mapping(em_tree, split, modified); | 624 | replace_extent_mapping(em_tree, em, split, modified); |
| 626 | BUG_ON(ret); /* Logic error */ | ||
| 627 | free_extent_map(split); | 625 | free_extent_map(split); |
| 628 | split = split2; | 626 | split = split2; |
| 629 | split2 = NULL; | 627 | split2 = NULL; |
| @@ -661,12 +659,20 @@ void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | |||
| 661 | split->orig_block_len = 0; | 659 | split->orig_block_len = 0; |
| 662 | } | 660 | } |
| 663 | 661 | ||
| 664 | ret = add_extent_mapping(em_tree, split, modified); | 662 | if (extent_map_in_tree(em)) { |
| 665 | BUG_ON(ret); /* Logic error */ | 663 | replace_extent_mapping(em_tree, em, split, |
| 664 | modified); | ||
| 665 | } else { | ||
| 666 | ret = add_extent_mapping(em_tree, split, | ||
| 667 | modified); | ||
| 668 | ASSERT(ret == 0); /* Logic error */ | ||
| 669 | } | ||
| 666 | free_extent_map(split); | 670 | free_extent_map(split); |
| 667 | split = NULL; | 671 | split = NULL; |
| 668 | } | 672 | } |
| 669 | next: | 673 | next: |
| 674 | if (extent_map_in_tree(em)) | ||
| 675 | remove_extent_mapping(em_tree, em); | ||
| 670 | write_unlock(&em_tree->lock); | 676 | write_unlock(&em_tree->lock); |
| 671 | 677 | ||
| 672 | /* once for us */ | 678 | /* once for us */ |
| @@ -720,7 +726,7 @@ int __btrfs_drop_extents(struct btrfs_trans_handle *trans, | |||
| 720 | if (drop_cache) | 726 | if (drop_cache) |
| 721 | btrfs_drop_extent_cache(inode, start, end - 1, 0); | 727 | btrfs_drop_extent_cache(inode, start, end - 1, 0); |
| 722 | 728 | ||
| 723 | if (start >= BTRFS_I(inode)->disk_i_size) | 729 | if (start >= BTRFS_I(inode)->disk_i_size && !replace_extent) |
| 724 | modify_tree = 0; | 730 | modify_tree = 0; |
| 725 | 731 | ||
| 726 | while (1) { | 732 | while (1) { |
| @@ -798,7 +804,10 @@ next_slot: | |||
| 798 | */ | 804 | */ |
| 799 | if (start > key.offset && end < extent_end) { | 805 | if (start > key.offset && end < extent_end) { |
| 800 | BUG_ON(del_nr > 0); | 806 | BUG_ON(del_nr > 0); |
| 801 | BUG_ON(extent_type == BTRFS_FILE_EXTENT_INLINE); | 807 | if (extent_type == BTRFS_FILE_EXTENT_INLINE) { |
| 808 | ret = -EINVAL; | ||
| 809 | break; | ||
| 810 | } | ||
| 802 | 811 | ||
| 803 | memcpy(&new_key, &key, sizeof(new_key)); | 812 | memcpy(&new_key, &key, sizeof(new_key)); |
| 804 | new_key.offset = start; | 813 | new_key.offset = start; |
| @@ -841,7 +850,10 @@ next_slot: | |||
| 841 | * | -------- extent -------- | | 850 | * | -------- extent -------- | |
| 842 | */ | 851 | */ |
| 843 | if (start <= key.offset && end < extent_end) { | 852 | if (start <= key.offset && end < extent_end) { |
| 844 | BUG_ON(extent_type == BTRFS_FILE_EXTENT_INLINE); | 853 | if (extent_type == BTRFS_FILE_EXTENT_INLINE) { |
| 854 | ret = -EINVAL; | ||
| 855 | break; | ||
| 856 | } | ||
| 845 | 857 | ||
| 846 | memcpy(&new_key, &key, sizeof(new_key)); | 858 | memcpy(&new_key, &key, sizeof(new_key)); |
| 847 | new_key.offset = end; | 859 | new_key.offset = end; |
| @@ -864,7 +876,10 @@ next_slot: | |||
| 864 | */ | 876 | */ |
| 865 | if (start > key.offset && end >= extent_end) { | 877 | if (start > key.offset && end >= extent_end) { |
| 866 | BUG_ON(del_nr > 0); | 878 | BUG_ON(del_nr > 0); |
| 867 | BUG_ON(extent_type == BTRFS_FILE_EXTENT_INLINE); | 879 | if (extent_type == BTRFS_FILE_EXTENT_INLINE) { |
| 880 | ret = -EINVAL; | ||
| 881 | break; | ||
| 882 | } | ||
| 868 | 883 | ||
| 869 | btrfs_set_file_extent_num_bytes(leaf, fi, | 884 | btrfs_set_file_extent_num_bytes(leaf, fi, |
| 870 | start - key.offset); | 885 | start - key.offset); |
| @@ -938,34 +953,42 @@ next_slot: | |||
| 938 | * Set path->slots[0] to first slot, so that after the delete | 953 | * Set path->slots[0] to first slot, so that after the delete |
| 939 | * if items are move off from our leaf to its immediate left or | 954 | * if items are move off from our leaf to its immediate left or |
| 940 | * right neighbor leafs, we end up with a correct and adjusted | 955 | * right neighbor leafs, we end up with a correct and adjusted |
| 941 | * path->slots[0] for our insertion. | 956 | * path->slots[0] for our insertion (if replace_extent != 0). |
| 942 | */ | 957 | */ |
| 943 | path->slots[0] = del_slot; | 958 | path->slots[0] = del_slot; |
| 944 | ret = btrfs_del_items(trans, root, path, del_slot, del_nr); | 959 | ret = btrfs_del_items(trans, root, path, del_slot, del_nr); |
| 945 | if (ret) | 960 | if (ret) |
| 946 | btrfs_abort_transaction(trans, root, ret); | 961 | btrfs_abort_transaction(trans, root, ret); |
| 962 | } | ||
| 947 | 963 | ||
| 948 | leaf = path->nodes[0]; | 964 | leaf = path->nodes[0]; |
| 949 | /* | 965 | /* |
| 950 | * leaf eb has flag EXTENT_BUFFER_STALE if it was deleted (that | 966 | * If btrfs_del_items() was called, it might have deleted a leaf, in |
| 951 | * is, its contents got pushed to its neighbors), in which case | 967 | * which case it unlocked our path, so check path->locks[0] matches a |
| 952 | * it means path->locks[0] == 0 | 968 | * write lock. |
| 953 | */ | 969 | */ |
| 954 | if (!ret && replace_extent && leafs_visited == 1 && | 970 | if (!ret && replace_extent && leafs_visited == 1 && |
| 955 | path->locks[0] && | 971 | (path->locks[0] == BTRFS_WRITE_LOCK_BLOCKING || |
| 956 | btrfs_leaf_free_space(root, leaf) >= | 972 | path->locks[0] == BTRFS_WRITE_LOCK) && |
| 957 | sizeof(struct btrfs_item) + extent_item_size) { | 973 | btrfs_leaf_free_space(root, leaf) >= |
| 958 | 974 | sizeof(struct btrfs_item) + extent_item_size) { | |
| 959 | key.objectid = ino; | 975 | |
| 960 | key.type = BTRFS_EXTENT_DATA_KEY; | 976 | key.objectid = ino; |
| 961 | key.offset = start; | 977 | key.type = BTRFS_EXTENT_DATA_KEY; |
| 962 | setup_items_for_insert(root, path, &key, | 978 | key.offset = start; |
| 963 | &extent_item_size, | 979 | if (!del_nr && path->slots[0] < btrfs_header_nritems(leaf)) { |
| 964 | extent_item_size, | 980 | struct btrfs_key slot_key; |
| 965 | sizeof(struct btrfs_item) + | 981 | |
| 966 | extent_item_size, 1); | 982 | btrfs_item_key_to_cpu(leaf, &slot_key, path->slots[0]); |
| 967 | *key_inserted = 1; | 983 | if (btrfs_comp_cpu_keys(&key, &slot_key) > 0) |
| 984 | path->slots[0]++; | ||
| 968 | } | 985 | } |
| 986 | setup_items_for_insert(root, path, &key, | ||
| 987 | &extent_item_size, | ||
| 988 | extent_item_size, | ||
| 989 | sizeof(struct btrfs_item) + | ||
| 990 | extent_item_size, 1); | ||
| 991 | *key_inserted = 1; | ||
| 969 | } | 992 | } |
| 970 | 993 | ||
| 971 | if (!replace_extent || !(*key_inserted)) | 994 | if (!replace_extent || !(*key_inserted)) |
| @@ -1346,11 +1369,11 @@ lock_and_cleanup_extent_if_need(struct inode *inode, struct page **pages, | |||
| 1346 | struct btrfs_ordered_extent *ordered; | 1369 | struct btrfs_ordered_extent *ordered; |
| 1347 | lock_extent_bits(&BTRFS_I(inode)->io_tree, | 1370 | lock_extent_bits(&BTRFS_I(inode)->io_tree, |
| 1348 | start_pos, last_pos, 0, cached_state); | 1371 | start_pos, last_pos, 0, cached_state); |
| 1349 | ordered = btrfs_lookup_first_ordered_extent(inode, last_pos); | 1372 | ordered = btrfs_lookup_ordered_range(inode, start_pos, |
| 1373 | last_pos - start_pos + 1); | ||
| 1350 | if (ordered && | 1374 | if (ordered && |
| 1351 | ordered->file_offset + ordered->len > start_pos && | 1375 | ordered->file_offset + ordered->len > start_pos && |
| 1352 | ordered->file_offset <= last_pos) { | 1376 | ordered->file_offset <= last_pos) { |
| 1353 | btrfs_put_ordered_extent(ordered); | ||
| 1354 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, | 1377 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, |
| 1355 | start_pos, last_pos, | 1378 | start_pos, last_pos, |
| 1356 | cached_state, GFP_NOFS); | 1379 | cached_state, GFP_NOFS); |
| @@ -1358,12 +1381,9 @@ lock_and_cleanup_extent_if_need(struct inode *inode, struct page **pages, | |||
| 1358 | unlock_page(pages[i]); | 1381 | unlock_page(pages[i]); |
| 1359 | page_cache_release(pages[i]); | 1382 | page_cache_release(pages[i]); |
| 1360 | } | 1383 | } |
| 1361 | ret = btrfs_wait_ordered_range(inode, start_pos, | 1384 | btrfs_start_ordered_extent(inode, ordered, 1); |
| 1362 | last_pos - start_pos + 1); | 1385 | btrfs_put_ordered_extent(ordered); |
| 1363 | if (ret) | 1386 | return -EAGAIN; |
| 1364 | return ret; | ||
| 1365 | else | ||
| 1366 | return -EAGAIN; | ||
| 1367 | } | 1387 | } |
| 1368 | if (ordered) | 1388 | if (ordered) |
| 1369 | btrfs_put_ordered_extent(ordered); | 1389 | btrfs_put_ordered_extent(ordered); |
| @@ -1396,8 +1416,12 @@ static noinline int check_can_nocow(struct inode *inode, loff_t pos, | |||
| 1396 | u64 num_bytes; | 1416 | u64 num_bytes; |
| 1397 | int ret; | 1417 | int ret; |
| 1398 | 1418 | ||
| 1419 | ret = btrfs_start_nocow_write(root); | ||
| 1420 | if (!ret) | ||
| 1421 | return -ENOSPC; | ||
| 1422 | |||
| 1399 | lockstart = round_down(pos, root->sectorsize); | 1423 | lockstart = round_down(pos, root->sectorsize); |
| 1400 | lockend = lockstart + round_up(*write_bytes, root->sectorsize) - 1; | 1424 | lockend = round_up(pos + *write_bytes, root->sectorsize) - 1; |
| 1401 | 1425 | ||
| 1402 | while (1) { | 1426 | while (1) { |
| 1403 | lock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend); | 1427 | lock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend); |
| @@ -1415,12 +1439,10 @@ static noinline int check_can_nocow(struct inode *inode, loff_t pos, | |||
| 1415 | ret = can_nocow_extent(inode, lockstart, &num_bytes, NULL, NULL, NULL); | 1439 | ret = can_nocow_extent(inode, lockstart, &num_bytes, NULL, NULL, NULL); |
| 1416 | if (ret <= 0) { | 1440 | if (ret <= 0) { |
| 1417 | ret = 0; | 1441 | ret = 0; |
| 1442 | btrfs_end_nocow_write(root); | ||
| 1418 | } else { | 1443 | } else { |
| 1419 | clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend, | 1444 | *write_bytes = min_t(size_t, *write_bytes , |
| 1420 | EXTENT_DIRTY | EXTENT_DELALLOC | | 1445 | num_bytes - pos + lockstart); |
| 1421 | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 0, 0, | ||
| 1422 | NULL, GFP_NOFS); | ||
| 1423 | *write_bytes = min_t(size_t, *write_bytes, num_bytes); | ||
| 1424 | } | 1446 | } |
| 1425 | 1447 | ||
| 1426 | unlock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend); | 1448 | unlock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend); |
| @@ -1510,6 +1532,8 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, | |||
| 1510 | if (!only_release_metadata) | 1532 | if (!only_release_metadata) |
| 1511 | btrfs_free_reserved_data_space(inode, | 1533 | btrfs_free_reserved_data_space(inode, |
| 1512 | reserve_bytes); | 1534 | reserve_bytes); |
| 1535 | else | ||
| 1536 | btrfs_end_nocow_write(root); | ||
| 1513 | break; | 1537 | break; |
| 1514 | } | 1538 | } |
| 1515 | 1539 | ||
| @@ -1598,6 +1622,9 @@ again: | |||
| 1598 | } | 1622 | } |
| 1599 | 1623 | ||
| 1600 | release_bytes = 0; | 1624 | release_bytes = 0; |
| 1625 | if (only_release_metadata) | ||
| 1626 | btrfs_end_nocow_write(root); | ||
| 1627 | |||
| 1601 | if (only_release_metadata && copied > 0) { | 1628 | if (only_release_metadata && copied > 0) { |
| 1602 | u64 lockstart = round_down(pos, root->sectorsize); | 1629 | u64 lockstart = round_down(pos, root->sectorsize); |
| 1603 | u64 lockend = lockstart + | 1630 | u64 lockend = lockstart + |
| @@ -1624,10 +1651,12 @@ again: | |||
| 1624 | kfree(pages); | 1651 | kfree(pages); |
| 1625 | 1652 | ||
| 1626 | if (release_bytes) { | 1653 | if (release_bytes) { |
| 1627 | if (only_release_metadata) | 1654 | if (only_release_metadata) { |
| 1655 | btrfs_end_nocow_write(root); | ||
| 1628 | btrfs_delalloc_release_metadata(inode, release_bytes); | 1656 | btrfs_delalloc_release_metadata(inode, release_bytes); |
| 1629 | else | 1657 | } else { |
| 1630 | btrfs_delalloc_release_space(inode, release_bytes); | 1658 | btrfs_delalloc_release_space(inode, release_bytes); |
| 1659 | } | ||
| 1631 | } | 1660 | } |
| 1632 | 1661 | ||
| 1633 | return num_written ? num_written : ret; | 1662 | return num_written ? num_written : ret; |
| @@ -1797,7 +1826,7 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, | |||
| 1797 | BTRFS_I(inode)->last_sub_trans = root->log_transid; | 1826 | BTRFS_I(inode)->last_sub_trans = root->log_transid; |
| 1798 | if (num_written > 0) { | 1827 | if (num_written > 0) { |
| 1799 | err = generic_write_sync(file, pos, num_written); | 1828 | err = generic_write_sync(file, pos, num_written); |
| 1800 | if (err < 0 && num_written > 0) | 1829 | if (err < 0) |
| 1801 | num_written = err; | 1830 | num_written = err; |
| 1802 | } | 1831 | } |
| 1803 | 1832 | ||
| @@ -1856,8 +1885,9 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
| 1856 | struct dentry *dentry = file->f_path.dentry; | 1885 | struct dentry *dentry = file->f_path.dentry; |
| 1857 | struct inode *inode = dentry->d_inode; | 1886 | struct inode *inode = dentry->d_inode; |
| 1858 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1887 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 1859 | int ret = 0; | ||
| 1860 | struct btrfs_trans_handle *trans; | 1888 | struct btrfs_trans_handle *trans; |
| 1889 | struct btrfs_log_ctx ctx; | ||
| 1890 | int ret = 0; | ||
| 1861 | bool full_sync = 0; | 1891 | bool full_sync = 0; |
| 1862 | 1892 | ||
| 1863 | trace_btrfs_sync_file(file, datasync); | 1893 | trace_btrfs_sync_file(file, datasync); |
| @@ -1951,7 +1981,9 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
| 1951 | } | 1981 | } |
| 1952 | trans->sync = true; | 1982 | trans->sync = true; |
| 1953 | 1983 | ||
| 1954 | ret = btrfs_log_dentry_safe(trans, root, dentry); | 1984 | btrfs_init_log_ctx(&ctx); |
| 1985 | |||
| 1986 | ret = btrfs_log_dentry_safe(trans, root, dentry, &ctx); | ||
| 1955 | if (ret < 0) { | 1987 | if (ret < 0) { |
| 1956 | /* Fallthrough and commit/free transaction. */ | 1988 | /* Fallthrough and commit/free transaction. */ |
| 1957 | ret = 1; | 1989 | ret = 1; |
| @@ -1971,7 +2003,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
| 1971 | 2003 | ||
| 1972 | if (ret != BTRFS_NO_LOG_SYNC) { | 2004 | if (ret != BTRFS_NO_LOG_SYNC) { |
| 1973 | if (!ret) { | 2005 | if (!ret) { |
| 1974 | ret = btrfs_sync_log(trans, root); | 2006 | ret = btrfs_sync_log(trans, root, &ctx); |
| 1975 | if (!ret) { | 2007 | if (!ret) { |
| 1976 | ret = btrfs_end_transaction(trans, root); | 2008 | ret = btrfs_end_transaction(trans, root); |
| 1977 | goto out; | 2009 | goto out; |
| @@ -2157,6 +2189,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) | |||
| 2157 | bool same_page = ((offset >> PAGE_CACHE_SHIFT) == | 2189 | bool same_page = ((offset >> PAGE_CACHE_SHIFT) == |
| 2158 | ((offset + len - 1) >> PAGE_CACHE_SHIFT)); | 2190 | ((offset + len - 1) >> PAGE_CACHE_SHIFT)); |
| 2159 | bool no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES); | 2191 | bool no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES); |
| 2192 | u64 ino_size = round_up(inode->i_size, PAGE_CACHE_SIZE); | ||
| 2160 | 2193 | ||
| 2161 | ret = btrfs_wait_ordered_range(inode, offset, len); | 2194 | ret = btrfs_wait_ordered_range(inode, offset, len); |
| 2162 | if (ret) | 2195 | if (ret) |
| @@ -2172,14 +2205,14 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) | |||
| 2172 | * entire page. | 2205 | * entire page. |
| 2173 | */ | 2206 | */ |
| 2174 | if (same_page && len < PAGE_CACHE_SIZE) { | 2207 | if (same_page && len < PAGE_CACHE_SIZE) { |
| 2175 | if (offset < round_up(inode->i_size, PAGE_CACHE_SIZE)) | 2208 | if (offset < ino_size) |
| 2176 | ret = btrfs_truncate_page(inode, offset, len, 0); | 2209 | ret = btrfs_truncate_page(inode, offset, len, 0); |
| 2177 | mutex_unlock(&inode->i_mutex); | 2210 | mutex_unlock(&inode->i_mutex); |
| 2178 | return ret; | 2211 | return ret; |
| 2179 | } | 2212 | } |
| 2180 | 2213 | ||
| 2181 | /* zero back part of the first page */ | 2214 | /* zero back part of the first page */ |
| 2182 | if (offset < round_up(inode->i_size, PAGE_CACHE_SIZE)) { | 2215 | if (offset < ino_size) { |
| 2183 | ret = btrfs_truncate_page(inode, offset, 0, 0); | 2216 | ret = btrfs_truncate_page(inode, offset, 0, 0); |
| 2184 | if (ret) { | 2217 | if (ret) { |
| 2185 | mutex_unlock(&inode->i_mutex); | 2218 | mutex_unlock(&inode->i_mutex); |
| @@ -2188,7 +2221,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) | |||
| 2188 | } | 2221 | } |
| 2189 | 2222 | ||
| 2190 | /* zero the front end of the last page */ | 2223 | /* zero the front end of the last page */ |
| 2191 | if (offset + len < round_up(inode->i_size, PAGE_CACHE_SIZE)) { | 2224 | if (offset + len < ino_size) { |
| 2192 | ret = btrfs_truncate_page(inode, offset + len, 0, 1); | 2225 | ret = btrfs_truncate_page(inode, offset + len, 0, 1); |
| 2193 | if (ret) { | 2226 | if (ret) { |
| 2194 | mutex_unlock(&inode->i_mutex); | 2227 | mutex_unlock(&inode->i_mutex); |
| @@ -2277,10 +2310,13 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) | |||
| 2277 | 2310 | ||
| 2278 | trans->block_rsv = &root->fs_info->trans_block_rsv; | 2311 | trans->block_rsv = &root->fs_info->trans_block_rsv; |
| 2279 | 2312 | ||
| 2280 | ret = fill_holes(trans, inode, path, cur_offset, drop_end); | 2313 | if (cur_offset < ino_size) { |
| 2281 | if (ret) { | 2314 | ret = fill_holes(trans, inode, path, cur_offset, |
| 2282 | err = ret; | 2315 | drop_end); |
| 2283 | break; | 2316 | if (ret) { |
| 2317 | err = ret; | ||
| 2318 | break; | ||
| 2319 | } | ||
| 2284 | } | 2320 | } |
| 2285 | 2321 | ||
| 2286 | cur_offset = drop_end; | 2322 | cur_offset = drop_end; |
| @@ -2313,10 +2349,12 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) | |||
| 2313 | } | 2349 | } |
| 2314 | 2350 | ||
| 2315 | trans->block_rsv = &root->fs_info->trans_block_rsv; | 2351 | trans->block_rsv = &root->fs_info->trans_block_rsv; |
| 2316 | ret = fill_holes(trans, inode, path, cur_offset, drop_end); | 2352 | if (cur_offset < ino_size) { |
| 2317 | if (ret) { | 2353 | ret = fill_holes(trans, inode, path, cur_offset, drop_end); |
| 2318 | err = ret; | 2354 | if (ret) { |
| 2319 | goto out_trans; | 2355 | err = ret; |
| 2356 | goto out_trans; | ||
| 2357 | } | ||
| 2320 | } | 2358 | } |
| 2321 | 2359 | ||
| 2322 | out_trans: | 2360 | out_trans: |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index d3d44486290b..06e9a4152b14 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
| @@ -864,7 +864,8 @@ static noinline int cow_file_range(struct inode *inode, | |||
| 864 | 864 | ||
| 865 | if (btrfs_is_free_space_inode(inode)) { | 865 | if (btrfs_is_free_space_inode(inode)) { |
| 866 | WARN_ON_ONCE(1); | 866 | WARN_ON_ONCE(1); |
| 867 | return -EINVAL; | 867 | ret = -EINVAL; |
| 868 | goto out_unlock; | ||
| 868 | } | 869 | } |
| 869 | 870 | ||
| 870 | num_bytes = ALIGN(end - start + 1, blocksize); | 871 | num_bytes = ALIGN(end - start + 1, blocksize); |
| @@ -1075,17 +1076,15 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page, | |||
| 1075 | async_cow->end = cur_end; | 1076 | async_cow->end = cur_end; |
| 1076 | INIT_LIST_HEAD(&async_cow->extents); | 1077 | INIT_LIST_HEAD(&async_cow->extents); |
| 1077 | 1078 | ||
| 1078 | async_cow->work.func = async_cow_start; | 1079 | btrfs_init_work(&async_cow->work, async_cow_start, |
| 1079 | async_cow->work.ordered_func = async_cow_submit; | 1080 | async_cow_submit, async_cow_free); |
| 1080 | async_cow->work.ordered_free = async_cow_free; | ||
| 1081 | async_cow->work.flags = 0; | ||
| 1082 | 1081 | ||
| 1083 | nr_pages = (cur_end - start + PAGE_CACHE_SIZE) >> | 1082 | nr_pages = (cur_end - start + PAGE_CACHE_SIZE) >> |
| 1084 | PAGE_CACHE_SHIFT; | 1083 | PAGE_CACHE_SHIFT; |
| 1085 | atomic_add(nr_pages, &root->fs_info->async_delalloc_pages); | 1084 | atomic_add(nr_pages, &root->fs_info->async_delalloc_pages); |
| 1086 | 1085 | ||
| 1087 | btrfs_queue_worker(&root->fs_info->delalloc_workers, | 1086 | btrfs_queue_work(root->fs_info->delalloc_workers, |
| 1088 | &async_cow->work); | 1087 | &async_cow->work); |
| 1089 | 1088 | ||
| 1090 | if (atomic_read(&root->fs_info->async_delalloc_pages) > limit) { | 1089 | if (atomic_read(&root->fs_info->async_delalloc_pages) > limit) { |
| 1091 | wait_event(root->fs_info->async_submit_wait, | 1090 | wait_event(root->fs_info->async_submit_wait, |
| @@ -1843,9 +1842,9 @@ static int btrfs_writepage_start_hook(struct page *page, u64 start, u64 end) | |||
| 1843 | 1842 | ||
| 1844 | SetPageChecked(page); | 1843 | SetPageChecked(page); |
| 1845 | page_cache_get(page); | 1844 | page_cache_get(page); |
| 1846 | fixup->work.func = btrfs_writepage_fixup_worker; | 1845 | btrfs_init_work(&fixup->work, btrfs_writepage_fixup_worker, NULL, NULL); |
| 1847 | fixup->page = page; | 1846 | fixup->page = page; |
| 1848 | btrfs_queue_worker(&root->fs_info->fixup_workers, &fixup->work); | 1847 | btrfs_queue_work(root->fs_info->fixup_workers, &fixup->work); |
| 1849 | return -EBUSY; | 1848 | return -EBUSY; |
| 1850 | } | 1849 | } |
| 1851 | 1850 | ||
| @@ -2239,6 +2238,11 @@ static noinline int relink_extent_backref(struct btrfs_path *path, | |||
| 2239 | return PTR_ERR(root); | 2238 | return PTR_ERR(root); |
| 2240 | } | 2239 | } |
| 2241 | 2240 | ||
| 2241 | if (btrfs_root_readonly(root)) { | ||
| 2242 | srcu_read_unlock(&fs_info->subvol_srcu, index); | ||
| 2243 | return 0; | ||
| 2244 | } | ||
| 2245 | |||
| 2242 | /* step 2: get inode */ | 2246 | /* step 2: get inode */ |
| 2243 | key.objectid = backref->inum; | 2247 | key.objectid = backref->inum; |
| 2244 | key.type = BTRFS_INODE_ITEM_KEY; | 2248 | key.type = BTRFS_INODE_ITEM_KEY; |
| @@ -2759,7 +2763,7 @@ static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, | |||
| 2759 | struct inode *inode = page->mapping->host; | 2763 | struct inode *inode = page->mapping->host; |
| 2760 | struct btrfs_root *root = BTRFS_I(inode)->root; | 2764 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 2761 | struct btrfs_ordered_extent *ordered_extent = NULL; | 2765 | struct btrfs_ordered_extent *ordered_extent = NULL; |
| 2762 | struct btrfs_workers *workers; | 2766 | struct btrfs_workqueue *workers; |
| 2763 | 2767 | ||
| 2764 | trace_btrfs_writepage_end_io_hook(page, start, end, uptodate); | 2768 | trace_btrfs_writepage_end_io_hook(page, start, end, uptodate); |
| 2765 | 2769 | ||
| @@ -2768,14 +2772,13 @@ static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, | |||
| 2768 | end - start + 1, uptodate)) | 2772 | end - start + 1, uptodate)) |
| 2769 | return 0; | 2773 | return 0; |
| 2770 | 2774 | ||
| 2771 | ordered_extent->work.func = finish_ordered_fn; | 2775 | btrfs_init_work(&ordered_extent->work, finish_ordered_fn, NULL, NULL); |
| 2772 | ordered_extent->work.flags = 0; | ||
| 2773 | 2776 | ||
| 2774 | if (btrfs_is_free_space_inode(inode)) | 2777 | if (btrfs_is_free_space_inode(inode)) |
| 2775 | workers = &root->fs_info->endio_freespace_worker; | 2778 | workers = root->fs_info->endio_freespace_worker; |
| 2776 | else | 2779 | else |
| 2777 | workers = &root->fs_info->endio_write_workers; | 2780 | workers = root->fs_info->endio_write_workers; |
| 2778 | btrfs_queue_worker(workers, &ordered_extent->work); | 2781 | btrfs_queue_work(workers, &ordered_extent->work); |
| 2779 | 2782 | ||
| 2780 | return 0; | 2783 | return 0; |
| 2781 | } | 2784 | } |
| @@ -4593,7 +4596,7 @@ static void evict_inode_truncate_pages(struct inode *inode) | |||
| 4593 | struct rb_node *node; | 4596 | struct rb_node *node; |
| 4594 | 4597 | ||
| 4595 | ASSERT(inode->i_state & I_FREEING); | 4598 | ASSERT(inode->i_state & I_FREEING); |
| 4596 | truncate_inode_pages(&inode->i_data, 0); | 4599 | truncate_inode_pages_final(&inode->i_data); |
| 4597 | 4600 | ||
| 4598 | write_lock(&map_tree->lock); | 4601 | write_lock(&map_tree->lock); |
| 4599 | while (!RB_EMPTY_ROOT(&map_tree->map)) { | 4602 | while (!RB_EMPTY_ROOT(&map_tree->map)) { |
| @@ -4924,7 +4927,8 @@ void btrfs_invalidate_inodes(struct btrfs_root *root) | |||
| 4924 | struct inode *inode; | 4927 | struct inode *inode; |
| 4925 | u64 objectid = 0; | 4928 | u64 objectid = 0; |
| 4926 | 4929 | ||
| 4927 | WARN_ON(btrfs_root_refs(&root->root_item) != 0); | 4930 | if (!test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state)) |
| 4931 | WARN_ON(btrfs_root_refs(&root->root_item) != 0); | ||
| 4928 | 4932 | ||
| 4929 | spin_lock(&root->inode_lock); | 4933 | spin_lock(&root->inode_lock); |
| 4930 | again: | 4934 | again: |
| @@ -5799,6 +5803,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, | |||
| 5799 | } | 5803 | } |
| 5800 | out_unlock: | 5804 | out_unlock: |
| 5801 | btrfs_end_transaction(trans, root); | 5805 | btrfs_end_transaction(trans, root); |
| 5806 | btrfs_balance_delayed_items(root); | ||
| 5802 | btrfs_btree_balance_dirty(root); | 5807 | btrfs_btree_balance_dirty(root); |
| 5803 | if (drop_inode) { | 5808 | if (drop_inode) { |
| 5804 | inode_dec_link_count(inode); | 5809 | inode_dec_link_count(inode); |
| @@ -5872,6 +5877,7 @@ out_unlock: | |||
| 5872 | inode_dec_link_count(inode); | 5877 | inode_dec_link_count(inode); |
| 5873 | iput(inode); | 5878 | iput(inode); |
| 5874 | } | 5879 | } |
| 5880 | btrfs_balance_delayed_items(root); | ||
| 5875 | btrfs_btree_balance_dirty(root); | 5881 | btrfs_btree_balance_dirty(root); |
| 5876 | return err; | 5882 | return err; |
| 5877 | } | 5883 | } |
| @@ -5930,6 +5936,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, | |||
| 5930 | } | 5936 | } |
| 5931 | 5937 | ||
| 5932 | btrfs_end_transaction(trans, root); | 5938 | btrfs_end_transaction(trans, root); |
| 5939 | btrfs_balance_delayed_items(root); | ||
| 5933 | fail: | 5940 | fail: |
| 5934 | if (drop_inode) { | 5941 | if (drop_inode) { |
| 5935 | inode_dec_link_count(inode); | 5942 | inode_dec_link_count(inode); |
| @@ -5996,6 +6003,7 @@ out_fail: | |||
| 5996 | btrfs_end_transaction(trans, root); | 6003 | btrfs_end_transaction(trans, root); |
| 5997 | if (drop_on_err) | 6004 | if (drop_on_err) |
| 5998 | iput(inode); | 6005 | iput(inode); |
| 6006 | btrfs_balance_delayed_items(root); | ||
| 5999 | btrfs_btree_balance_dirty(root); | 6007 | btrfs_btree_balance_dirty(root); |
| 6000 | return err; | 6008 | return err; |
| 6001 | } | 6009 | } |
| @@ -6550,6 +6558,7 @@ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len, | |||
| 6550 | int ret; | 6558 | int ret; |
| 6551 | struct extent_buffer *leaf; | 6559 | struct extent_buffer *leaf; |
| 6552 | struct btrfs_root *root = BTRFS_I(inode)->root; | 6560 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 6561 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | ||
| 6553 | struct btrfs_file_extent_item *fi; | 6562 | struct btrfs_file_extent_item *fi; |
| 6554 | struct btrfs_key key; | 6563 | struct btrfs_key key; |
| 6555 | u64 disk_bytenr; | 6564 | u64 disk_bytenr; |
| @@ -6626,6 +6635,20 @@ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len, | |||
| 6626 | 6635 | ||
| 6627 | if (btrfs_extent_readonly(root, disk_bytenr)) | 6636 | if (btrfs_extent_readonly(root, disk_bytenr)) |
| 6628 | goto out; | 6637 | goto out; |
| 6638 | |||
| 6639 | num_bytes = min(offset + *len, extent_end) - offset; | ||
| 6640 | if (!nocow && found_type == BTRFS_FILE_EXTENT_PREALLOC) { | ||
| 6641 | u64 range_end; | ||
| 6642 | |||
| 6643 | range_end = round_up(offset + num_bytes, root->sectorsize) - 1; | ||
| 6644 | ret = test_range_bit(io_tree, offset, range_end, | ||
| 6645 | EXTENT_DELALLOC, 0, NULL); | ||
| 6646 | if (ret) { | ||
| 6647 | ret = -EAGAIN; | ||
| 6648 | goto out; | ||
| 6649 | } | ||
| 6650 | } | ||
| 6651 | |||
| 6629 | btrfs_release_path(path); | 6652 | btrfs_release_path(path); |
| 6630 | 6653 | ||
| 6631 | /* | 6654 | /* |
| @@ -6654,7 +6677,6 @@ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len, | |||
| 6654 | */ | 6677 | */ |
| 6655 | disk_bytenr += backref_offset; | 6678 | disk_bytenr += backref_offset; |
| 6656 | disk_bytenr += offset - key.offset; | 6679 | disk_bytenr += offset - key.offset; |
| 6657 | num_bytes = min(offset + *len, extent_end) - offset; | ||
| 6658 | if (csum_exist_in_range(root, disk_bytenr, num_bytes)) | 6680 | if (csum_exist_in_range(root, disk_bytenr, num_bytes)) |
| 6659 | goto out; | 6681 | goto out; |
| 6660 | /* | 6682 | /* |
| @@ -7024,10 +7046,9 @@ again: | |||
| 7024 | if (!ret) | 7046 | if (!ret) |
| 7025 | goto out_test; | 7047 | goto out_test; |
| 7026 | 7048 | ||
| 7027 | ordered->work.func = finish_ordered_fn; | 7049 | btrfs_init_work(&ordered->work, finish_ordered_fn, NULL, NULL); |
| 7028 | ordered->work.flags = 0; | 7050 | btrfs_queue_work(root->fs_info->endio_write_workers, |
| 7029 | btrfs_queue_worker(&root->fs_info->endio_write_workers, | 7051 | &ordered->work); |
| 7030 | &ordered->work); | ||
| 7031 | out_test: | 7052 | out_test: |
| 7032 | /* | 7053 | /* |
| 7033 | * our bio might span multiple ordered extents. If we haven't | 7054 | * our bio might span multiple ordered extents. If we haven't |
| @@ -7404,15 +7425,15 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, | |||
| 7404 | smp_mb__after_atomic_inc(); | 7425 | smp_mb__after_atomic_inc(); |
| 7405 | 7426 | ||
| 7406 | /* | 7427 | /* |
| 7407 | * The generic stuff only does filemap_write_and_wait_range, which isn't | 7428 | * The generic stuff only does filemap_write_and_wait_range, which |
| 7408 | * enough if we've written compressed pages to this area, so we need to | 7429 | * isn't enough if we've written compressed pages to this area, so |
| 7409 | * call btrfs_wait_ordered_range to make absolutely sure that any | 7430 | * we need to flush the dirty pages again to make absolutely sure |
| 7410 | * outstanding dirty pages are on disk. | 7431 | * that any outstanding dirty pages are on disk. |
| 7411 | */ | 7432 | */ |
| 7412 | count = iov_length(iov, nr_segs); | 7433 | count = iov_length(iov, nr_segs); |
| 7413 | ret = btrfs_wait_ordered_range(inode, offset, count); | 7434 | if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, |
| 7414 | if (ret) | 7435 | &BTRFS_I(inode)->runtime_flags)) |
| 7415 | return ret; | 7436 | filemap_fdatawrite_range(inode->i_mapping, offset, count); |
| 7416 | 7437 | ||
| 7417 | if (rw & WRITE) { | 7438 | if (rw & WRITE) { |
| 7418 | /* | 7439 | /* |
| @@ -8404,7 +8425,7 @@ struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode, | |||
| 8404 | work->inode = inode; | 8425 | work->inode = inode; |
| 8405 | work->wait = wait; | 8426 | work->wait = wait; |
| 8406 | work->delay_iput = delay_iput; | 8427 | work->delay_iput = delay_iput; |
| 8407 | work->work.func = btrfs_run_delalloc_work; | 8428 | btrfs_init_work(&work->work, btrfs_run_delalloc_work, NULL, NULL); |
| 8408 | 8429 | ||
| 8409 | return work; | 8430 | return work; |
| 8410 | } | 8431 | } |
| @@ -8419,7 +8440,8 @@ void btrfs_wait_and_free_delalloc_work(struct btrfs_delalloc_work *work) | |||
| 8419 | * some fairly slow code that needs optimization. This walks the list | 8440 | * some fairly slow code that needs optimization. This walks the list |
| 8420 | * of all the inodes with pending delalloc and forces them to disk. | 8441 | * of all the inodes with pending delalloc and forces them to disk. |
| 8421 | */ | 8442 | */ |
| 8422 | static int __start_delalloc_inodes(struct btrfs_root *root, int delay_iput) | 8443 | static int __start_delalloc_inodes(struct btrfs_root *root, int delay_iput, |
| 8444 | int nr) | ||
| 8423 | { | 8445 | { |
| 8424 | struct btrfs_inode *binode; | 8446 | struct btrfs_inode *binode; |
| 8425 | struct inode *inode; | 8447 | struct inode *inode; |
| @@ -8431,6 +8453,7 @@ static int __start_delalloc_inodes(struct btrfs_root *root, int delay_iput) | |||
| 8431 | INIT_LIST_HEAD(&works); | 8453 | INIT_LIST_HEAD(&works); |
| 8432 | INIT_LIST_HEAD(&splice); | 8454 | INIT_LIST_HEAD(&splice); |
| 8433 | 8455 | ||
| 8456 | mutex_lock(&root->delalloc_mutex); | ||
| 8434 | spin_lock(&root->delalloc_lock); | 8457 | spin_lock(&root->delalloc_lock); |
| 8435 | list_splice_init(&root->delalloc_inodes, &splice); | 8458 | list_splice_init(&root->delalloc_inodes, &splice); |
| 8436 | while (!list_empty(&splice)) { | 8459 | while (!list_empty(&splice)) { |
| @@ -8453,12 +8476,14 @@ static int __start_delalloc_inodes(struct btrfs_root *root, int delay_iput) | |||
| 8453 | else | 8476 | else |
| 8454 | iput(inode); | 8477 | iput(inode); |
| 8455 | ret = -ENOMEM; | 8478 | ret = -ENOMEM; |
| 8456 | goto out; | 8479 | break; |
| 8457 | } | 8480 | } |
| 8458 | list_add_tail(&work->list, &works); | 8481 | list_add_tail(&work->list, &works); |
| 8459 | btrfs_queue_worker(&root->fs_info->flush_workers, | 8482 | btrfs_queue_work(root->fs_info->flush_workers, |
| 8460 | &work->work); | 8483 | &work->work); |
| 8461 | 8484 | ret++; | |
| 8485 | if (nr != -1 && ret >= nr) | ||
| 8486 | break; | ||
| 8462 | cond_resched(); | 8487 | cond_resched(); |
| 8463 | spin_lock(&root->delalloc_lock); | 8488 | spin_lock(&root->delalloc_lock); |
| 8464 | } | 8489 | } |
| @@ -8468,18 +8493,13 @@ static int __start_delalloc_inodes(struct btrfs_root *root, int delay_iput) | |||
| 8468 | list_del_init(&work->list); | 8493 | list_del_init(&work->list); |
| 8469 | btrfs_wait_and_free_delalloc_work(work); | 8494 | btrfs_wait_and_free_delalloc_work(work); |
| 8470 | } | 8495 | } |
| 8471 | return 0; | ||
| 8472 | out: | ||
| 8473 | list_for_each_entry_safe(work, next, &works, list) { | ||
| 8474 | list_del_init(&work->list); | ||
| 8475 | btrfs_wait_and_free_delalloc_work(work); | ||
| 8476 | } | ||
| 8477 | 8496 | ||
| 8478 | if (!list_empty_careful(&splice)) { | 8497 | if (!list_empty_careful(&splice)) { |
| 8479 | spin_lock(&root->delalloc_lock); | 8498 | spin_lock(&root->delalloc_lock); |
| 8480 | list_splice_tail(&splice, &root->delalloc_inodes); | 8499 | list_splice_tail(&splice, &root->delalloc_inodes); |
| 8481 | spin_unlock(&root->delalloc_lock); | 8500 | spin_unlock(&root->delalloc_lock); |
| 8482 | } | 8501 | } |
| 8502 | mutex_unlock(&root->delalloc_mutex); | ||
| 8483 | return ret; | 8503 | return ret; |
| 8484 | } | 8504 | } |
| 8485 | 8505 | ||
| @@ -8490,7 +8510,9 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput) | |||
| 8490 | if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state)) | 8510 | if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state)) |
| 8491 | return -EROFS; | 8511 | return -EROFS; |
| 8492 | 8512 | ||
| 8493 | ret = __start_delalloc_inodes(root, delay_iput); | 8513 | ret = __start_delalloc_inodes(root, delay_iput, -1); |
| 8514 | if (ret > 0) | ||
| 8515 | ret = 0; | ||
| 8494 | /* | 8516 | /* |
| 8495 | * the filemap_flush will queue IO into the worker threads, but | 8517 | * the filemap_flush will queue IO into the worker threads, but |
| 8496 | * we have to make sure the IO is actually started and that | 8518 | * we have to make sure the IO is actually started and that |
| @@ -8507,7 +8529,8 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput) | |||
| 8507 | return ret; | 8529 | return ret; |
| 8508 | } | 8530 | } |
| 8509 | 8531 | ||
| 8510 | int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int delay_iput) | 8532 | int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int delay_iput, |
| 8533 | int nr) | ||
| 8511 | { | 8534 | { |
| 8512 | struct btrfs_root *root; | 8535 | struct btrfs_root *root; |
| 8513 | struct list_head splice; | 8536 | struct list_head splice; |
| @@ -8518,9 +8541,10 @@ int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int delay_iput) | |||
| 8518 | 8541 | ||
| 8519 | INIT_LIST_HEAD(&splice); | 8542 | INIT_LIST_HEAD(&splice); |
| 8520 | 8543 | ||
| 8544 | mutex_lock(&fs_info->delalloc_root_mutex); | ||
| 8521 | spin_lock(&fs_info->delalloc_root_lock); | 8545 | spin_lock(&fs_info->delalloc_root_lock); |
| 8522 | list_splice_init(&fs_info->delalloc_roots, &splice); | 8546 | list_splice_init(&fs_info->delalloc_roots, &splice); |
| 8523 | while (!list_empty(&splice)) { | 8547 | while (!list_empty(&splice) && nr) { |
| 8524 | root = list_first_entry(&splice, struct btrfs_root, | 8548 | root = list_first_entry(&splice, struct btrfs_root, |
| 8525 | delalloc_root); | 8549 | delalloc_root); |
| 8526 | root = btrfs_grab_fs_root(root); | 8550 | root = btrfs_grab_fs_root(root); |
| @@ -8529,15 +8553,20 @@ int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int delay_iput) | |||
| 8529 | &fs_info->delalloc_roots); | 8553 | &fs_info->delalloc_roots); |
| 8530 | spin_unlock(&fs_info->delalloc_root_lock); | 8554 | spin_unlock(&fs_info->delalloc_root_lock); |
| 8531 | 8555 | ||
| 8532 | ret = __start_delalloc_inodes(root, delay_iput); | 8556 | ret = __start_delalloc_inodes(root, delay_iput, nr); |
| 8533 | btrfs_put_fs_root(root); | 8557 | btrfs_put_fs_root(root); |
| 8534 | if (ret) | 8558 | if (ret < 0) |
| 8535 | goto out; | 8559 | goto out; |
| 8536 | 8560 | ||
| 8561 | if (nr != -1) { | ||
| 8562 | nr -= ret; | ||
| 8563 | WARN_ON(nr < 0); | ||
| 8564 | } | ||
| 8537 | spin_lock(&fs_info->delalloc_root_lock); | 8565 | spin_lock(&fs_info->delalloc_root_lock); |
| 8538 | } | 8566 | } |
| 8539 | spin_unlock(&fs_info->delalloc_root_lock); | 8567 | spin_unlock(&fs_info->delalloc_root_lock); |
| 8540 | 8568 | ||
| 8569 | ret = 0; | ||
| 8541 | atomic_inc(&fs_info->async_submit_draining); | 8570 | atomic_inc(&fs_info->async_submit_draining); |
| 8542 | while (atomic_read(&fs_info->nr_async_submits) || | 8571 | while (atomic_read(&fs_info->nr_async_submits) || |
| 8543 | atomic_read(&fs_info->async_delalloc_pages)) { | 8572 | atomic_read(&fs_info->async_delalloc_pages)) { |
| @@ -8546,13 +8575,13 @@ int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int delay_iput) | |||
| 8546 | atomic_read(&fs_info->async_delalloc_pages) == 0)); | 8575 | atomic_read(&fs_info->async_delalloc_pages) == 0)); |
| 8547 | } | 8576 | } |
| 8548 | atomic_dec(&fs_info->async_submit_draining); | 8577 | atomic_dec(&fs_info->async_submit_draining); |
| 8549 | return 0; | ||
| 8550 | out: | 8578 | out: |
| 8551 | if (!list_empty_careful(&splice)) { | 8579 | if (!list_empty_careful(&splice)) { |
| 8552 | spin_lock(&fs_info->delalloc_root_lock); | 8580 | spin_lock(&fs_info->delalloc_root_lock); |
| 8553 | list_splice_tail(&splice, &fs_info->delalloc_roots); | 8581 | list_splice_tail(&splice, &fs_info->delalloc_roots); |
| 8554 | spin_unlock(&fs_info->delalloc_root_lock); | 8582 | spin_unlock(&fs_info->delalloc_root_lock); |
| 8555 | } | 8583 | } |
| 8584 | mutex_unlock(&fs_info->delalloc_root_mutex); | ||
| 8556 | return ret; | 8585 | return ret; |
| 8557 | } | 8586 | } |
| 8558 | 8587 | ||
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index a6d8efa46bfe..0401397b5c92 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
| @@ -59,6 +59,32 @@ | |||
| 59 | #include "props.h" | 59 | #include "props.h" |
| 60 | #include "sysfs.h" | 60 | #include "sysfs.h" |
| 61 | 61 | ||
| 62 | #ifdef CONFIG_64BIT | ||
| 63 | /* If we have a 32-bit userspace and 64-bit kernel, then the UAPI | ||
| 64 | * structures are incorrect, as the timespec structure from userspace | ||
| 65 | * is 4 bytes too small. We define these alternatives here to teach | ||
| 66 | * the kernel about the 32-bit struct packing. | ||
| 67 | */ | ||
| 68 | struct btrfs_ioctl_timespec_32 { | ||
| 69 | __u64 sec; | ||
| 70 | __u32 nsec; | ||
| 71 | } __attribute__ ((__packed__)); | ||
| 72 | |||
| 73 | struct btrfs_ioctl_received_subvol_args_32 { | ||
| 74 | char uuid[BTRFS_UUID_SIZE]; /* in */ | ||
| 75 | __u64 stransid; /* in */ | ||
| 76 | __u64 rtransid; /* out */ | ||
| 77 | struct btrfs_ioctl_timespec_32 stime; /* in */ | ||
| 78 | struct btrfs_ioctl_timespec_32 rtime; /* out */ | ||
| 79 | __u64 flags; /* in */ | ||
| 80 | __u64 reserved[16]; /* in */ | ||
| 81 | } __attribute__ ((__packed__)); | ||
| 82 | |||
| 83 | #define BTRFS_IOC_SET_RECEIVED_SUBVOL_32 _IOWR(BTRFS_IOCTL_MAGIC, 37, \ | ||
| 84 | struct btrfs_ioctl_received_subvol_args_32) | ||
| 85 | #endif | ||
| 86 | |||
| 87 | |||
| 62 | static int btrfs_clone(struct inode *src, struct inode *inode, | 88 | static int btrfs_clone(struct inode *src, struct inode *inode, |
| 63 | u64 off, u64 olen, u64 olen_aligned, u64 destoff); | 89 | u64 off, u64 olen, u64 olen_aligned, u64 destoff); |
| 64 | 90 | ||
| @@ -585,6 +611,23 @@ fail: | |||
| 585 | return ret; | 611 | return ret; |
| 586 | } | 612 | } |
| 587 | 613 | ||
| 614 | static void btrfs_wait_nocow_write(struct btrfs_root *root) | ||
| 615 | { | ||
| 616 | s64 writers; | ||
| 617 | DEFINE_WAIT(wait); | ||
| 618 | |||
| 619 | do { | ||
| 620 | prepare_to_wait(&root->subv_writers->wait, &wait, | ||
| 621 | TASK_UNINTERRUPTIBLE); | ||
| 622 | |||
| 623 | writers = percpu_counter_sum(&root->subv_writers->counter); | ||
| 624 | if (writers) | ||
| 625 | schedule(); | ||
| 626 | |||
| 627 | finish_wait(&root->subv_writers->wait, &wait); | ||
| 628 | } while (writers); | ||
| 629 | } | ||
| 630 | |||
| 588 | static int create_snapshot(struct btrfs_root *root, struct inode *dir, | 631 | static int create_snapshot(struct btrfs_root *root, struct inode *dir, |
| 589 | struct dentry *dentry, char *name, int namelen, | 632 | struct dentry *dentry, char *name, int namelen, |
| 590 | u64 *async_transid, bool readonly, | 633 | u64 *async_transid, bool readonly, |
| @@ -598,15 +641,21 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir, | |||
| 598 | if (!root->ref_cows) | 641 | if (!root->ref_cows) |
| 599 | return -EINVAL; | 642 | return -EINVAL; |
| 600 | 643 | ||
| 644 | atomic_inc(&root->will_be_snapshoted); | ||
| 645 | smp_mb__after_atomic_inc(); | ||
| 646 | btrfs_wait_nocow_write(root); | ||
| 647 | |||
| 601 | ret = btrfs_start_delalloc_inodes(root, 0); | 648 | ret = btrfs_start_delalloc_inodes(root, 0); |
| 602 | if (ret) | 649 | if (ret) |
| 603 | return ret; | 650 | goto out; |
| 604 | 651 | ||
| 605 | btrfs_wait_ordered_extents(root, -1); | 652 | btrfs_wait_ordered_extents(root, -1); |
| 606 | 653 | ||
| 607 | pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS); | 654 | pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS); |
| 608 | if (!pending_snapshot) | 655 | if (!pending_snapshot) { |
| 609 | return -ENOMEM; | 656 | ret = -ENOMEM; |
| 657 | goto out; | ||
| 658 | } | ||
| 610 | 659 | ||
| 611 | btrfs_init_block_rsv(&pending_snapshot->block_rsv, | 660 | btrfs_init_block_rsv(&pending_snapshot->block_rsv, |
| 612 | BTRFS_BLOCK_RSV_TEMP); | 661 | BTRFS_BLOCK_RSV_TEMP); |
| @@ -623,7 +672,7 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir, | |||
| 623 | &pending_snapshot->qgroup_reserved, | 672 | &pending_snapshot->qgroup_reserved, |
| 624 | false); | 673 | false); |
| 625 | if (ret) | 674 | if (ret) |
| 626 | goto out; | 675 | goto free; |
| 627 | 676 | ||
| 628 | pending_snapshot->dentry = dentry; | 677 | pending_snapshot->dentry = dentry; |
| 629 | pending_snapshot->root = root; | 678 | pending_snapshot->root = root; |
| @@ -674,8 +723,10 @@ fail: | |||
| 674 | btrfs_subvolume_release_metadata(BTRFS_I(dir)->root, | 723 | btrfs_subvolume_release_metadata(BTRFS_I(dir)->root, |
| 675 | &pending_snapshot->block_rsv, | 724 | &pending_snapshot->block_rsv, |
| 676 | pending_snapshot->qgroup_reserved); | 725 | pending_snapshot->qgroup_reserved); |
| 677 | out: | 726 | free: |
| 678 | kfree(pending_snapshot); | 727 | kfree(pending_snapshot); |
| 728 | out: | ||
| 729 | atomic_dec(&root->will_be_snapshoted); | ||
| 679 | return ret; | 730 | return ret; |
| 680 | } | 731 | } |
| 681 | 732 | ||
| @@ -884,12 +935,14 @@ static int find_new_extents(struct btrfs_root *root, | |||
| 884 | min_key.type = BTRFS_EXTENT_DATA_KEY; | 935 | min_key.type = BTRFS_EXTENT_DATA_KEY; |
| 885 | min_key.offset = *off; | 936 | min_key.offset = *off; |
| 886 | 937 | ||
| 887 | path->keep_locks = 1; | ||
| 888 | |||
| 889 | while (1) { | 938 | while (1) { |
| 939 | path->keep_locks = 1; | ||
| 890 | ret = btrfs_search_forward(root, &min_key, path, newer_than); | 940 | ret = btrfs_search_forward(root, &min_key, path, newer_than); |
| 891 | if (ret != 0) | 941 | if (ret != 0) |
| 892 | goto none; | 942 | goto none; |
| 943 | path->keep_locks = 0; | ||
| 944 | btrfs_unlock_up_safe(path, 1); | ||
| 945 | process_slot: | ||
| 893 | if (min_key.objectid != ino) | 946 | if (min_key.objectid != ino) |
| 894 | goto none; | 947 | goto none; |
| 895 | if (min_key.type != BTRFS_EXTENT_DATA_KEY) | 948 | if (min_key.type != BTRFS_EXTENT_DATA_KEY) |
| @@ -908,6 +961,12 @@ static int find_new_extents(struct btrfs_root *root, | |||
| 908 | return 0; | 961 | return 0; |
| 909 | } | 962 | } |
| 910 | 963 | ||
| 964 | path->slots[0]++; | ||
| 965 | if (path->slots[0] < btrfs_header_nritems(leaf)) { | ||
| 966 | btrfs_item_key_to_cpu(leaf, &min_key, path->slots[0]); | ||
| 967 | goto process_slot; | ||
| 968 | } | ||
| 969 | |||
| 911 | if (min_key.offset == (u64)-1) | 970 | if (min_key.offset == (u64)-1) |
| 912 | goto none; | 971 | goto none; |
| 913 | 972 | ||
| @@ -935,10 +994,13 @@ static struct extent_map *defrag_lookup_extent(struct inode *inode, u64 start) | |||
| 935 | read_unlock(&em_tree->lock); | 994 | read_unlock(&em_tree->lock); |
| 936 | 995 | ||
| 937 | if (!em) { | 996 | if (!em) { |
| 997 | struct extent_state *cached = NULL; | ||
| 998 | u64 end = start + len - 1; | ||
| 999 | |||
| 938 | /* get the big lock and read metadata off disk */ | 1000 | /* get the big lock and read metadata off disk */ |
| 939 | lock_extent(io_tree, start, start + len - 1); | 1001 | lock_extent_bits(io_tree, start, end, 0, &cached); |
| 940 | em = btrfs_get_extent(inode, NULL, 0, start, len, 0); | 1002 | em = btrfs_get_extent(inode, NULL, 0, start, len, 0); |
| 941 | unlock_extent(io_tree, start, start + len - 1); | 1003 | unlock_extent_cached(io_tree, start, end, &cached, GFP_NOFS); |
| 942 | 1004 | ||
| 943 | if (IS_ERR(em)) | 1005 | if (IS_ERR(em)) |
| 944 | return NULL; | 1006 | return NULL; |
| @@ -957,7 +1019,8 @@ static bool defrag_check_next_extent(struct inode *inode, struct extent_map *em) | |||
| 957 | return false; | 1019 | return false; |
| 958 | 1020 | ||
| 959 | next = defrag_lookup_extent(inode, em->start + em->len); | 1021 | next = defrag_lookup_extent(inode, em->start + em->len); |
| 960 | if (!next || next->block_start >= EXTENT_MAP_LAST_BYTE) | 1022 | if (!next || next->block_start >= EXTENT_MAP_LAST_BYTE || |
| 1023 | (em->block_start + em->block_len == next->block_start)) | ||
| 961 | ret = false; | 1024 | ret = false; |
| 962 | 1025 | ||
| 963 | free_extent_map(next); | 1026 | free_extent_map(next); |
| @@ -1076,10 +1139,12 @@ again: | |||
| 1076 | page_start = page_offset(page); | 1139 | page_start = page_offset(page); |
| 1077 | page_end = page_start + PAGE_CACHE_SIZE - 1; | 1140 | page_end = page_start + PAGE_CACHE_SIZE - 1; |
| 1078 | while (1) { | 1141 | while (1) { |
| 1079 | lock_extent(tree, page_start, page_end); | 1142 | lock_extent_bits(tree, page_start, page_end, |
| 1143 | 0, &cached_state); | ||
| 1080 | ordered = btrfs_lookup_ordered_extent(inode, | 1144 | ordered = btrfs_lookup_ordered_extent(inode, |
| 1081 | page_start); | 1145 | page_start); |
| 1082 | unlock_extent(tree, page_start, page_end); | 1146 | unlock_extent_cached(tree, page_start, page_end, |
| 1147 | &cached_state, GFP_NOFS); | ||
| 1083 | if (!ordered) | 1148 | if (!ordered) |
| 1084 | break; | 1149 | break; |
| 1085 | 1150 | ||
| @@ -1356,8 +1421,12 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, | |||
| 1356 | } | 1421 | } |
| 1357 | } | 1422 | } |
| 1358 | 1423 | ||
| 1359 | if ((range->flags & BTRFS_DEFRAG_RANGE_START_IO)) | 1424 | if ((range->flags & BTRFS_DEFRAG_RANGE_START_IO)) { |
| 1360 | filemap_flush(inode->i_mapping); | 1425 | filemap_flush(inode->i_mapping); |
| 1426 | if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, | ||
| 1427 | &BTRFS_I(inode)->runtime_flags)) | ||
| 1428 | filemap_flush(inode->i_mapping); | ||
| 1429 | } | ||
| 1361 | 1430 | ||
| 1362 | if ((range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)) { | 1431 | if ((range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)) { |
| 1363 | /* the filemap_flush will queue IO into the worker threads, but | 1432 | /* the filemap_flush will queue IO into the worker threads, but |
| @@ -1573,7 +1642,7 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file, | |||
| 1573 | if (src_inode->i_sb != file_inode(file)->i_sb) { | 1642 | if (src_inode->i_sb != file_inode(file)->i_sb) { |
| 1574 | btrfs_info(BTRFS_I(src_inode)->root->fs_info, | 1643 | btrfs_info(BTRFS_I(src_inode)->root->fs_info, |
| 1575 | "Snapshot src from another FS"); | 1644 | "Snapshot src from another FS"); |
| 1576 | ret = -EINVAL; | 1645 | ret = -EXDEV; |
| 1577 | } else if (!inode_owner_or_capable(src_inode)) { | 1646 | } else if (!inode_owner_or_capable(src_inode)) { |
| 1578 | /* | 1647 | /* |
| 1579 | * Subvolume creation is not restricted, but snapshots | 1648 | * Subvolume creation is not restricted, but snapshots |
| @@ -1797,7 +1866,9 @@ static noinline int may_destroy_subvol(struct btrfs_root *root) | |||
| 1797 | if (di && !IS_ERR(di)) { | 1866 | if (di && !IS_ERR(di)) { |
| 1798 | btrfs_dir_item_key_to_cpu(path->nodes[0], di, &key); | 1867 | btrfs_dir_item_key_to_cpu(path->nodes[0], di, &key); |
| 1799 | if (key.objectid == root->root_key.objectid) { | 1868 | if (key.objectid == root->root_key.objectid) { |
| 1800 | ret = -ENOTEMPTY; | 1869 | ret = -EPERM; |
| 1870 | btrfs_err(root->fs_info, "deleting default subvolume " | ||
| 1871 | "%llu is not allowed", key.objectid); | ||
| 1801 | goto out; | 1872 | goto out; |
| 1802 | } | 1873 | } |
| 1803 | btrfs_release_path(path); | 1874 | btrfs_release_path(path); |
| @@ -2994,8 +3065,9 @@ process_slot: | |||
| 2994 | new_key.offset + datal, | 3065 | new_key.offset + datal, |
| 2995 | 1); | 3066 | 1); |
| 2996 | if (ret) { | 3067 | if (ret) { |
| 2997 | btrfs_abort_transaction(trans, root, | 3068 | if (ret != -EINVAL) |
| 2998 | ret); | 3069 | btrfs_abort_transaction(trans, |
| 3070 | root, ret); | ||
| 2999 | btrfs_end_transaction(trans, root); | 3071 | btrfs_end_transaction(trans, root); |
| 3000 | goto out; | 3072 | goto out; |
| 3001 | } | 3073 | } |
| @@ -3153,8 +3225,9 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
| 3153 | * decompress into destination's address_space (the file offset | 3225 | * decompress into destination's address_space (the file offset |
| 3154 | * may change, so source mapping won't do), then recompress (or | 3226 | * may change, so source mapping won't do), then recompress (or |
| 3155 | * otherwise reinsert) a subrange. | 3227 | * otherwise reinsert) a subrange. |
| 3156 | * - allow ranges within the same file to be cloned (provided | 3228 | * |
| 3157 | * they don't overlap)? | 3229 | * - split destination inode's inline extents. The inline extents can |
| 3230 | * be either compressed or non-compressed. | ||
| 3158 | */ | 3231 | */ |
| 3159 | 3232 | ||
| 3160 | /* the destination must be opened for writing */ | 3233 | /* the destination must be opened for writing */ |
| @@ -4353,10 +4426,9 @@ static long btrfs_ioctl_quota_rescan_wait(struct file *file, void __user *arg) | |||
| 4353 | return btrfs_qgroup_wait_for_completion(root->fs_info); | 4426 | return btrfs_qgroup_wait_for_completion(root->fs_info); |
| 4354 | } | 4427 | } |
| 4355 | 4428 | ||
| 4356 | static long btrfs_ioctl_set_received_subvol(struct file *file, | 4429 | static long _btrfs_ioctl_set_received_subvol(struct file *file, |
| 4357 | void __user *arg) | 4430 | struct btrfs_ioctl_received_subvol_args *sa) |
| 4358 | { | 4431 | { |
| 4359 | struct btrfs_ioctl_received_subvol_args *sa = NULL; | ||
| 4360 | struct inode *inode = file_inode(file); | 4432 | struct inode *inode = file_inode(file); |
| 4361 | struct btrfs_root *root = BTRFS_I(inode)->root; | 4433 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 4362 | struct btrfs_root_item *root_item = &root->root_item; | 4434 | struct btrfs_root_item *root_item = &root->root_item; |
| @@ -4384,13 +4456,6 @@ static long btrfs_ioctl_set_received_subvol(struct file *file, | |||
| 4384 | goto out; | 4456 | goto out; |
| 4385 | } | 4457 | } |
| 4386 | 4458 | ||
| 4387 | sa = memdup_user(arg, sizeof(*sa)); | ||
| 4388 | if (IS_ERR(sa)) { | ||
| 4389 | ret = PTR_ERR(sa); | ||
| 4390 | sa = NULL; | ||
| 4391 | goto out; | ||
| 4392 | } | ||
| 4393 | |||
| 4394 | /* | 4459 | /* |
| 4395 | * 1 - root item | 4460 | * 1 - root item |
| 4396 | * 2 - uuid items (received uuid + subvol uuid) | 4461 | * 2 - uuid items (received uuid + subvol uuid) |
| @@ -4444,14 +4509,91 @@ static long btrfs_ioctl_set_received_subvol(struct file *file, | |||
| 4444 | goto out; | 4509 | goto out; |
| 4445 | } | 4510 | } |
| 4446 | 4511 | ||
| 4512 | out: | ||
| 4513 | up_write(&root->fs_info->subvol_sem); | ||
| 4514 | mnt_drop_write_file(file); | ||
| 4515 | return ret; | ||
| 4516 | } | ||
| 4517 | |||
| 4518 | #ifdef CONFIG_64BIT | ||
| 4519 | static long btrfs_ioctl_set_received_subvol_32(struct file *file, | ||
| 4520 | void __user *arg) | ||
| 4521 | { | ||
| 4522 | struct btrfs_ioctl_received_subvol_args_32 *args32 = NULL; | ||
| 4523 | struct btrfs_ioctl_received_subvol_args *args64 = NULL; | ||
| 4524 | int ret = 0; | ||
| 4525 | |||
| 4526 | args32 = memdup_user(arg, sizeof(*args32)); | ||
| 4527 | if (IS_ERR(args32)) { | ||
| 4528 | ret = PTR_ERR(args32); | ||
| 4529 | args32 = NULL; | ||
| 4530 | goto out; | ||
| 4531 | } | ||
| 4532 | |||
| 4533 | args64 = kmalloc(sizeof(*args64), GFP_NOFS); | ||
| 4534 | if (IS_ERR(args64)) { | ||
| 4535 | ret = PTR_ERR(args64); | ||
| 4536 | args64 = NULL; | ||
| 4537 | goto out; | ||
| 4538 | } | ||
| 4539 | |||
| 4540 | memcpy(args64->uuid, args32->uuid, BTRFS_UUID_SIZE); | ||
| 4541 | args64->stransid = args32->stransid; | ||
| 4542 | args64->rtransid = args32->rtransid; | ||
| 4543 | args64->stime.sec = args32->stime.sec; | ||
| 4544 | args64->stime.nsec = args32->stime.nsec; | ||
| 4545 | args64->rtime.sec = args32->rtime.sec; | ||
| 4546 | args64->rtime.nsec = args32->rtime.nsec; | ||
| 4547 | args64->flags = args32->flags; | ||
| 4548 | |||
| 4549 | ret = _btrfs_ioctl_set_received_subvol(file, args64); | ||
| 4550 | if (ret) | ||
| 4551 | goto out; | ||
| 4552 | |||
| 4553 | memcpy(args32->uuid, args64->uuid, BTRFS_UUID_SIZE); | ||
| 4554 | args32->stransid = args64->stransid; | ||
| 4555 | args32->rtransid = args64->rtransid; | ||
| 4556 | args32->stime.sec = args64->stime.sec; | ||
| 4557 | args32->stime.nsec = args64->stime.nsec; | ||
| 4558 | args32->rtime.sec = args64->rtime.sec; | ||
| 4559 | args32->rtime.nsec = args64->rtime.nsec; | ||
| 4560 | args32->flags = args64->flags; | ||
| 4561 | |||
| 4562 | ret = copy_to_user(arg, args32, sizeof(*args32)); | ||
| 4563 | if (ret) | ||
| 4564 | ret = -EFAULT; | ||
| 4565 | |||
| 4566 | out: | ||
| 4567 | kfree(args32); | ||
| 4568 | kfree(args64); | ||
| 4569 | return ret; | ||
| 4570 | } | ||
| 4571 | #endif | ||
| 4572 | |||
| 4573 | static long btrfs_ioctl_set_received_subvol(struct file *file, | ||
| 4574 | void __user *arg) | ||
| 4575 | { | ||
| 4576 | struct btrfs_ioctl_received_subvol_args *sa = NULL; | ||
| 4577 | int ret = 0; | ||
| 4578 | |||
| 4579 | sa = memdup_user(arg, sizeof(*sa)); | ||
| 4580 | if (IS_ERR(sa)) { | ||
| 4581 | ret = PTR_ERR(sa); | ||
| 4582 | sa = NULL; | ||
| 4583 | goto out; | ||
| 4584 | } | ||
| 4585 | |||
| 4586 | ret = _btrfs_ioctl_set_received_subvol(file, sa); | ||
| 4587 | |||
| 4588 | if (ret) | ||
| 4589 | goto out; | ||
| 4590 | |||
| 4447 | ret = copy_to_user(arg, sa, sizeof(*sa)); | 4591 | ret = copy_to_user(arg, sa, sizeof(*sa)); |
| 4448 | if (ret) | 4592 | if (ret) |
| 4449 | ret = -EFAULT; | 4593 | ret = -EFAULT; |
| 4450 | 4594 | ||
| 4451 | out: | 4595 | out: |
| 4452 | kfree(sa); | 4596 | kfree(sa); |
| 4453 | up_write(&root->fs_info->subvol_sem); | ||
| 4454 | mnt_drop_write_file(file); | ||
| 4455 | return ret; | 4597 | return ret; |
| 4456 | } | 4598 | } |
| 4457 | 4599 | ||
| @@ -4746,7 +4888,7 @@ long btrfs_ioctl(struct file *file, unsigned int | |||
| 4746 | case BTRFS_IOC_SYNC: { | 4888 | case BTRFS_IOC_SYNC: { |
| 4747 | int ret; | 4889 | int ret; |
| 4748 | 4890 | ||
| 4749 | ret = btrfs_start_delalloc_roots(root->fs_info, 0); | 4891 | ret = btrfs_start_delalloc_roots(root->fs_info, 0, -1); |
| 4750 | if (ret) | 4892 | if (ret) |
| 4751 | return ret; | 4893 | return ret; |
| 4752 | ret = btrfs_sync_fs(file->f_dentry->d_sb, 1); | 4894 | ret = btrfs_sync_fs(file->f_dentry->d_sb, 1); |
| @@ -4770,6 +4912,10 @@ long btrfs_ioctl(struct file *file, unsigned int | |||
| 4770 | return btrfs_ioctl_balance_progress(root, argp); | 4912 | return btrfs_ioctl_balance_progress(root, argp); |
| 4771 | case BTRFS_IOC_SET_RECEIVED_SUBVOL: | 4913 | case BTRFS_IOC_SET_RECEIVED_SUBVOL: |
| 4772 | return btrfs_ioctl_set_received_subvol(file, argp); | 4914 | return btrfs_ioctl_set_received_subvol(file, argp); |
| 4915 | #ifdef CONFIG_64BIT | ||
| 4916 | case BTRFS_IOC_SET_RECEIVED_SUBVOL_32: | ||
| 4917 | return btrfs_ioctl_set_received_subvol_32(file, argp); | ||
| 4918 | #endif | ||
| 4773 | case BTRFS_IOC_SEND: | 4919 | case BTRFS_IOC_SEND: |
| 4774 | return btrfs_ioctl_send(file, argp); | 4920 | return btrfs_ioctl_send(file, argp); |
| 4775 | case BTRFS_IOC_GET_DEV_STATS: | 4921 | case BTRFS_IOC_GET_DEV_STATS: |
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index b16450b840e7..a94b05f72869 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c | |||
| @@ -349,10 +349,13 @@ int btrfs_dec_test_first_ordered_pending(struct inode *inode, | |||
| 349 | if (!uptodate) | 349 | if (!uptodate) |
| 350 | set_bit(BTRFS_ORDERED_IOERR, &entry->flags); | 350 | set_bit(BTRFS_ORDERED_IOERR, &entry->flags); |
| 351 | 351 | ||
| 352 | if (entry->bytes_left == 0) | 352 | if (entry->bytes_left == 0) { |
| 353 | ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags); | 353 | ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags); |
| 354 | else | 354 | if (waitqueue_active(&entry->wait)) |
| 355 | wake_up(&entry->wait); | ||
| 356 | } else { | ||
| 355 | ret = 1; | 357 | ret = 1; |
| 358 | } | ||
| 356 | out: | 359 | out: |
| 357 | if (!ret && cached && entry) { | 360 | if (!ret && cached && entry) { |
| 358 | *cached = entry; | 361 | *cached = entry; |
| @@ -410,10 +413,13 @@ have_entry: | |||
| 410 | if (!uptodate) | 413 | if (!uptodate) |
| 411 | set_bit(BTRFS_ORDERED_IOERR, &entry->flags); | 414 | set_bit(BTRFS_ORDERED_IOERR, &entry->flags); |
| 412 | 415 | ||
| 413 | if (entry->bytes_left == 0) | 416 | if (entry->bytes_left == 0) { |
| 414 | ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags); | 417 | ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags); |
| 415 | else | 418 | if (waitqueue_active(&entry->wait)) |
| 419 | wake_up(&entry->wait); | ||
| 420 | } else { | ||
| 416 | ret = 1; | 421 | ret = 1; |
| 422 | } | ||
| 417 | out: | 423 | out: |
| 418 | if (!ret && cached && entry) { | 424 | if (!ret && cached && entry) { |
| 419 | *cached = entry; | 425 | *cached = entry; |
| @@ -424,27 +430,48 @@ out: | |||
| 424 | } | 430 | } |
| 425 | 431 | ||
| 426 | /* Needs to either be called under a log transaction or the log_mutex */ | 432 | /* Needs to either be called under a log transaction or the log_mutex */ |
| 427 | void btrfs_get_logged_extents(struct btrfs_root *log, struct inode *inode) | 433 | void btrfs_get_logged_extents(struct inode *inode, |
| 434 | struct list_head *logged_list) | ||
| 428 | { | 435 | { |
| 429 | struct btrfs_ordered_inode_tree *tree; | 436 | struct btrfs_ordered_inode_tree *tree; |
| 430 | struct btrfs_ordered_extent *ordered; | 437 | struct btrfs_ordered_extent *ordered; |
| 431 | struct rb_node *n; | 438 | struct rb_node *n; |
| 432 | int index = log->log_transid % 2; | ||
| 433 | 439 | ||
| 434 | tree = &BTRFS_I(inode)->ordered_tree; | 440 | tree = &BTRFS_I(inode)->ordered_tree; |
| 435 | spin_lock_irq(&tree->lock); | 441 | spin_lock_irq(&tree->lock); |
| 436 | for (n = rb_first(&tree->tree); n; n = rb_next(n)) { | 442 | for (n = rb_first(&tree->tree); n; n = rb_next(n)) { |
| 437 | ordered = rb_entry(n, struct btrfs_ordered_extent, rb_node); | 443 | ordered = rb_entry(n, struct btrfs_ordered_extent, rb_node); |
| 438 | spin_lock(&log->log_extents_lock[index]); | 444 | if (!list_empty(&ordered->log_list)) |
| 439 | if (list_empty(&ordered->log_list)) { | 445 | continue; |
| 440 | list_add_tail(&ordered->log_list, &log->logged_list[index]); | 446 | list_add_tail(&ordered->log_list, logged_list); |
| 441 | atomic_inc(&ordered->refs); | 447 | atomic_inc(&ordered->refs); |
| 442 | } | ||
| 443 | spin_unlock(&log->log_extents_lock[index]); | ||
| 444 | } | 448 | } |
| 445 | spin_unlock_irq(&tree->lock); | 449 | spin_unlock_irq(&tree->lock); |
| 446 | } | 450 | } |
| 447 | 451 | ||
| 452 | void btrfs_put_logged_extents(struct list_head *logged_list) | ||
| 453 | { | ||
| 454 | struct btrfs_ordered_extent *ordered; | ||
| 455 | |||
| 456 | while (!list_empty(logged_list)) { | ||
| 457 | ordered = list_first_entry(logged_list, | ||
| 458 | struct btrfs_ordered_extent, | ||
| 459 | log_list); | ||
| 460 | list_del_init(&ordered->log_list); | ||
| 461 | btrfs_put_ordered_extent(ordered); | ||
| 462 | } | ||
| 463 | } | ||
| 464 | |||
| 465 | void btrfs_submit_logged_extents(struct list_head *logged_list, | ||
| 466 | struct btrfs_root *log) | ||
| 467 | { | ||
| 468 | int index = log->log_transid % 2; | ||
| 469 | |||
| 470 | spin_lock_irq(&log->log_extents_lock[index]); | ||
| 471 | list_splice_tail(logged_list, &log->logged_list[index]); | ||
| 472 | spin_unlock_irq(&log->log_extents_lock[index]); | ||
| 473 | } | ||
| 474 | |||
| 448 | void btrfs_wait_logged_extents(struct btrfs_root *log, u64 transid) | 475 | void btrfs_wait_logged_extents(struct btrfs_root *log, u64 transid) |
| 449 | { | 476 | { |
| 450 | struct btrfs_ordered_extent *ordered; | 477 | struct btrfs_ordered_extent *ordered; |
| @@ -577,7 +604,7 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr) | |||
| 577 | INIT_LIST_HEAD(&splice); | 604 | INIT_LIST_HEAD(&splice); |
| 578 | INIT_LIST_HEAD(&works); | 605 | INIT_LIST_HEAD(&works); |
| 579 | 606 | ||
| 580 | mutex_lock(&root->fs_info->ordered_operations_mutex); | 607 | mutex_lock(&root->ordered_extent_mutex); |
| 581 | spin_lock(&root->ordered_extent_lock); | 608 | spin_lock(&root->ordered_extent_lock); |
| 582 | list_splice_init(&root->ordered_extents, &splice); | 609 | list_splice_init(&root->ordered_extents, &splice); |
| 583 | while (!list_empty(&splice) && nr) { | 610 | while (!list_empty(&splice) && nr) { |
| @@ -588,10 +615,11 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr) | |||
| 588 | atomic_inc(&ordered->refs); | 615 | atomic_inc(&ordered->refs); |
| 589 | spin_unlock(&root->ordered_extent_lock); | 616 | spin_unlock(&root->ordered_extent_lock); |
| 590 | 617 | ||
| 591 | ordered->flush_work.func = btrfs_run_ordered_extent_work; | 618 | btrfs_init_work(&ordered->flush_work, |
| 619 | btrfs_run_ordered_extent_work, NULL, NULL); | ||
| 592 | list_add_tail(&ordered->work_list, &works); | 620 | list_add_tail(&ordered->work_list, &works); |
| 593 | btrfs_queue_worker(&root->fs_info->flush_workers, | 621 | btrfs_queue_work(root->fs_info->flush_workers, |
| 594 | &ordered->flush_work); | 622 | &ordered->flush_work); |
| 595 | 623 | ||
| 596 | cond_resched(); | 624 | cond_resched(); |
| 597 | spin_lock(&root->ordered_extent_lock); | 625 | spin_lock(&root->ordered_extent_lock); |
| @@ -608,7 +636,7 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr) | |||
| 608 | btrfs_put_ordered_extent(ordered); | 636 | btrfs_put_ordered_extent(ordered); |
| 609 | cond_resched(); | 637 | cond_resched(); |
| 610 | } | 638 | } |
| 611 | mutex_unlock(&root->fs_info->ordered_operations_mutex); | 639 | mutex_unlock(&root->ordered_extent_mutex); |
| 612 | 640 | ||
| 613 | return count; | 641 | return count; |
| 614 | } | 642 | } |
| @@ -621,6 +649,7 @@ void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr) | |||
| 621 | 649 | ||
| 622 | INIT_LIST_HEAD(&splice); | 650 | INIT_LIST_HEAD(&splice); |
| 623 | 651 | ||
| 652 | mutex_lock(&fs_info->ordered_operations_mutex); | ||
| 624 | spin_lock(&fs_info->ordered_root_lock); | 653 | spin_lock(&fs_info->ordered_root_lock); |
| 625 | list_splice_init(&fs_info->ordered_roots, &splice); | 654 | list_splice_init(&fs_info->ordered_roots, &splice); |
| 626 | while (!list_empty(&splice) && nr) { | 655 | while (!list_empty(&splice) && nr) { |
| @@ -643,6 +672,7 @@ void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr) | |||
| 643 | } | 672 | } |
| 644 | list_splice_tail(&splice, &fs_info->ordered_roots); | 673 | list_splice_tail(&splice, &fs_info->ordered_roots); |
| 645 | spin_unlock(&fs_info->ordered_root_lock); | 674 | spin_unlock(&fs_info->ordered_root_lock); |
| 675 | mutex_unlock(&fs_info->ordered_operations_mutex); | ||
| 646 | } | 676 | } |
| 647 | 677 | ||
| 648 | /* | 678 | /* |
| @@ -704,8 +734,8 @@ int btrfs_run_ordered_operations(struct btrfs_trans_handle *trans, | |||
| 704 | goto out; | 734 | goto out; |
| 705 | } | 735 | } |
| 706 | list_add_tail(&work->list, &works); | 736 | list_add_tail(&work->list, &works); |
| 707 | btrfs_queue_worker(&root->fs_info->flush_workers, | 737 | btrfs_queue_work(root->fs_info->flush_workers, |
| 708 | &work->work); | 738 | &work->work); |
| 709 | 739 | ||
| 710 | cond_resched(); | 740 | cond_resched(); |
| 711 | spin_lock(&root->fs_info->ordered_root_lock); | 741 | spin_lock(&root->fs_info->ordered_root_lock); |
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index 9b0450f7ac20..246897058efb 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h | |||
| @@ -197,7 +197,11 @@ void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans, | |||
| 197 | struct inode *inode); | 197 | struct inode *inode); |
| 198 | int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr); | 198 | int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr); |
| 199 | void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr); | 199 | void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr); |
| 200 | void btrfs_get_logged_extents(struct btrfs_root *log, struct inode *inode); | 200 | void btrfs_get_logged_extents(struct inode *inode, |
| 201 | struct list_head *logged_list); | ||
| 202 | void btrfs_put_logged_extents(struct list_head *logged_list); | ||
| 203 | void btrfs_submit_logged_extents(struct list_head *logged_list, | ||
| 204 | struct btrfs_root *log); | ||
| 201 | void btrfs_wait_logged_extents(struct btrfs_root *log, u64 transid); | 205 | void btrfs_wait_logged_extents(struct btrfs_root *log, u64 transid); |
| 202 | void btrfs_free_logged_extents(struct btrfs_root *log, u64 transid); | 206 | void btrfs_free_logged_extents(struct btrfs_root *log, u64 transid); |
| 203 | int __init ordered_data_init(void); | 207 | int __init ordered_data_init(void); |
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 472302a2d745..2cf905877aaf 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c | |||
| @@ -1509,8 +1509,8 @@ int btrfs_run_qgroups(struct btrfs_trans_handle *trans, | |||
| 1509 | ret = qgroup_rescan_init(fs_info, 0, 1); | 1509 | ret = qgroup_rescan_init(fs_info, 0, 1); |
| 1510 | if (!ret) { | 1510 | if (!ret) { |
| 1511 | qgroup_rescan_zero_tracking(fs_info); | 1511 | qgroup_rescan_zero_tracking(fs_info); |
| 1512 | btrfs_queue_worker(&fs_info->qgroup_rescan_workers, | 1512 | btrfs_queue_work(fs_info->qgroup_rescan_workers, |
| 1513 | &fs_info->qgroup_rescan_work); | 1513 | &fs_info->qgroup_rescan_work); |
| 1514 | } | 1514 | } |
| 1515 | ret = 0; | 1515 | ret = 0; |
| 1516 | } | 1516 | } |
| @@ -2095,7 +2095,8 @@ qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid, | |||
| 2095 | 2095 | ||
| 2096 | memset(&fs_info->qgroup_rescan_work, 0, | 2096 | memset(&fs_info->qgroup_rescan_work, 0, |
| 2097 | sizeof(fs_info->qgroup_rescan_work)); | 2097 | sizeof(fs_info->qgroup_rescan_work)); |
| 2098 | fs_info->qgroup_rescan_work.func = btrfs_qgroup_rescan_worker; | 2098 | btrfs_init_work(&fs_info->qgroup_rescan_work, |
| 2099 | btrfs_qgroup_rescan_worker, NULL, NULL); | ||
| 2099 | 2100 | ||
| 2100 | if (ret) { | 2101 | if (ret) { |
| 2101 | err: | 2102 | err: |
| @@ -2158,8 +2159,8 @@ btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info) | |||
| 2158 | 2159 | ||
| 2159 | qgroup_rescan_zero_tracking(fs_info); | 2160 | qgroup_rescan_zero_tracking(fs_info); |
| 2160 | 2161 | ||
| 2161 | btrfs_queue_worker(&fs_info->qgroup_rescan_workers, | 2162 | btrfs_queue_work(fs_info->qgroup_rescan_workers, |
| 2162 | &fs_info->qgroup_rescan_work); | 2163 | &fs_info->qgroup_rescan_work); |
| 2163 | 2164 | ||
| 2164 | return 0; | 2165 | return 0; |
| 2165 | } | 2166 | } |
| @@ -2190,6 +2191,6 @@ void | |||
| 2190 | btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info) | 2191 | btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info) |
| 2191 | { | 2192 | { |
| 2192 | if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) | 2193 | if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) |
| 2193 | btrfs_queue_worker(&fs_info->qgroup_rescan_workers, | 2194 | btrfs_queue_work(fs_info->qgroup_rescan_workers, |
| 2194 | &fs_info->qgroup_rescan_work); | 2195 | &fs_info->qgroup_rescan_work); |
| 2195 | } | 2196 | } |
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index 9af0b25d991a..4055291a523e 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c | |||
| @@ -1416,20 +1416,18 @@ cleanup: | |||
| 1416 | 1416 | ||
| 1417 | static void async_rmw_stripe(struct btrfs_raid_bio *rbio) | 1417 | static void async_rmw_stripe(struct btrfs_raid_bio *rbio) |
| 1418 | { | 1418 | { |
| 1419 | rbio->work.flags = 0; | 1419 | btrfs_init_work(&rbio->work, rmw_work, NULL, NULL); |
| 1420 | rbio->work.func = rmw_work; | ||
| 1421 | 1420 | ||
| 1422 | btrfs_queue_worker(&rbio->fs_info->rmw_workers, | 1421 | btrfs_queue_work(rbio->fs_info->rmw_workers, |
| 1423 | &rbio->work); | 1422 | &rbio->work); |
| 1424 | } | 1423 | } |
| 1425 | 1424 | ||
| 1426 | static void async_read_rebuild(struct btrfs_raid_bio *rbio) | 1425 | static void async_read_rebuild(struct btrfs_raid_bio *rbio) |
| 1427 | { | 1426 | { |
| 1428 | rbio->work.flags = 0; | 1427 | btrfs_init_work(&rbio->work, read_rebuild_work, NULL, NULL); |
| 1429 | rbio->work.func = read_rebuild_work; | ||
| 1430 | 1428 | ||
| 1431 | btrfs_queue_worker(&rbio->fs_info->rmw_workers, | 1429 | btrfs_queue_work(rbio->fs_info->rmw_workers, |
| 1432 | &rbio->work); | 1430 | &rbio->work); |
| 1433 | } | 1431 | } |
| 1434 | 1432 | ||
| 1435 | /* | 1433 | /* |
| @@ -1667,10 +1665,9 @@ static void btrfs_raid_unplug(struct blk_plug_cb *cb, bool from_schedule) | |||
| 1667 | plug = container_of(cb, struct btrfs_plug_cb, cb); | 1665 | plug = container_of(cb, struct btrfs_plug_cb, cb); |
| 1668 | 1666 | ||
| 1669 | if (from_schedule) { | 1667 | if (from_schedule) { |
| 1670 | plug->work.flags = 0; | 1668 | btrfs_init_work(&plug->work, unplug_work, NULL, NULL); |
| 1671 | plug->work.func = unplug_work; | 1669 | btrfs_queue_work(plug->info->rmw_workers, |
| 1672 | btrfs_queue_worker(&plug->info->rmw_workers, | 1670 | &plug->work); |
| 1673 | &plug->work); | ||
| 1674 | return; | 1671 | return; |
| 1675 | } | 1672 | } |
| 1676 | run_plug(plug); | 1673 | run_plug(plug); |
diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c index 31c797c48c3e..30947f923620 100644 --- a/fs/btrfs/reada.c +++ b/fs/btrfs/reada.c | |||
| @@ -793,10 +793,10 @@ static void reada_start_machine(struct btrfs_fs_info *fs_info) | |||
| 793 | /* FIXME we cannot handle this properly right now */ | 793 | /* FIXME we cannot handle this properly right now */ |
| 794 | BUG(); | 794 | BUG(); |
| 795 | } | 795 | } |
| 796 | rmw->work.func = reada_start_machine_worker; | 796 | btrfs_init_work(&rmw->work, reada_start_machine_worker, NULL, NULL); |
| 797 | rmw->fs_info = fs_info; | 797 | rmw->fs_info = fs_info; |
| 798 | 798 | ||
| 799 | btrfs_queue_worker(&fs_info->readahead_workers, &rmw->work); | 799 | btrfs_queue_work(fs_info->readahead_workers, &rmw->work); |
| 800 | } | 800 | } |
| 801 | 801 | ||
| 802 | #ifdef DEBUG | 802 | #ifdef DEBUG |
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 07b3b36f40ee..def428a25b2a 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c | |||
| @@ -4248,7 +4248,7 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) | |||
| 4248 | btrfs_info(extent_root->fs_info, "relocating block group %llu flags %llu", | 4248 | btrfs_info(extent_root->fs_info, "relocating block group %llu flags %llu", |
| 4249 | rc->block_group->key.objectid, rc->block_group->flags); | 4249 | rc->block_group->key.objectid, rc->block_group->flags); |
| 4250 | 4250 | ||
| 4251 | ret = btrfs_start_delalloc_roots(fs_info, 0); | 4251 | ret = btrfs_start_delalloc_roots(fs_info, 0, -1); |
| 4252 | if (ret < 0) { | 4252 | if (ret < 0) { |
| 4253 | err = ret; | 4253 | err = ret; |
| 4254 | goto out; | 4254 | goto out; |
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 1389b69059de..38bb47e7d6b1 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c | |||
| @@ -16,6 +16,7 @@ | |||
| 16 | * Boston, MA 021110-1307, USA. | 16 | * Boston, MA 021110-1307, USA. |
| 17 | */ | 17 | */ |
| 18 | 18 | ||
| 19 | #include <linux/err.h> | ||
| 19 | #include <linux/uuid.h> | 20 | #include <linux/uuid.h> |
| 20 | #include "ctree.h" | 21 | #include "ctree.h" |
| 21 | #include "transaction.h" | 22 | #include "transaction.h" |
| @@ -271,7 +272,7 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root) | |||
| 271 | key.offset++; | 272 | key.offset++; |
| 272 | 273 | ||
| 273 | root = btrfs_read_fs_root(tree_root, &root_key); | 274 | root = btrfs_read_fs_root(tree_root, &root_key); |
| 274 | err = PTR_RET(root); | 275 | err = PTR_ERR_OR_ZERO(root); |
| 275 | if (err && err != -ENOENT) { | 276 | if (err && err != -ENOENT) { |
| 276 | break; | 277 | break; |
| 277 | } else if (err == -ENOENT) { | 278 | } else if (err == -ENOENT) { |
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index efba5d1282ee..93e6d7172844 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c | |||
| @@ -315,6 +315,16 @@ static void scrub_pending_trans_workers_inc(struct scrub_ctx *sctx) | |||
| 315 | atomic_inc(&fs_info->scrubs_running); | 315 | atomic_inc(&fs_info->scrubs_running); |
| 316 | atomic_inc(&fs_info->scrubs_paused); | 316 | atomic_inc(&fs_info->scrubs_paused); |
| 317 | mutex_unlock(&fs_info->scrub_lock); | 317 | mutex_unlock(&fs_info->scrub_lock); |
| 318 | |||
| 319 | /* | ||
| 320 | * check if @scrubs_running=@scrubs_paused condition | ||
| 321 | * inside wait_event() is not an atomic operation. | ||
| 322 | * which means we may inc/dec @scrub_running/paused | ||
| 323 | * at any time. Let's wake up @scrub_pause_wait as | ||
| 324 | * much as we can to let commit transaction blocked less. | ||
| 325 | */ | ||
| 326 | wake_up(&fs_info->scrub_pause_wait); | ||
| 327 | |||
| 318 | atomic_inc(&sctx->workers_pending); | 328 | atomic_inc(&sctx->workers_pending); |
| 319 | } | 329 | } |
| 320 | 330 | ||
| @@ -418,7 +428,8 @@ struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace) | |||
| 418 | sbio->index = i; | 428 | sbio->index = i; |
| 419 | sbio->sctx = sctx; | 429 | sbio->sctx = sctx; |
| 420 | sbio->page_count = 0; | 430 | sbio->page_count = 0; |
| 421 | sbio->work.func = scrub_bio_end_io_worker; | 431 | btrfs_init_work(&sbio->work, scrub_bio_end_io_worker, |
| 432 | NULL, NULL); | ||
| 422 | 433 | ||
| 423 | if (i != SCRUB_BIOS_PER_SCTX - 1) | 434 | if (i != SCRUB_BIOS_PER_SCTX - 1) |
| 424 | sctx->bios[i]->next_free = i + 1; | 435 | sctx->bios[i]->next_free = i + 1; |
| @@ -987,9 +998,10 @@ nodatasum_case: | |||
| 987 | fixup_nodatasum->root = fs_info->extent_root; | 998 | fixup_nodatasum->root = fs_info->extent_root; |
| 988 | fixup_nodatasum->mirror_num = failed_mirror_index + 1; | 999 | fixup_nodatasum->mirror_num = failed_mirror_index + 1; |
| 989 | scrub_pending_trans_workers_inc(sctx); | 1000 | scrub_pending_trans_workers_inc(sctx); |
| 990 | fixup_nodatasum->work.func = scrub_fixup_nodatasum; | 1001 | btrfs_init_work(&fixup_nodatasum->work, scrub_fixup_nodatasum, |
| 991 | btrfs_queue_worker(&fs_info->scrub_workers, | 1002 | NULL, NULL); |
| 992 | &fixup_nodatasum->work); | 1003 | btrfs_queue_work(fs_info->scrub_workers, |
| 1004 | &fixup_nodatasum->work); | ||
| 993 | goto out; | 1005 | goto out; |
| 994 | } | 1006 | } |
| 995 | 1007 | ||
| @@ -1603,8 +1615,8 @@ static void scrub_wr_bio_end_io(struct bio *bio, int err) | |||
| 1603 | sbio->err = err; | 1615 | sbio->err = err; |
| 1604 | sbio->bio = bio; | 1616 | sbio->bio = bio; |
| 1605 | 1617 | ||
| 1606 | sbio->work.func = scrub_wr_bio_end_io_worker; | 1618 | btrfs_init_work(&sbio->work, scrub_wr_bio_end_io_worker, NULL, NULL); |
| 1607 | btrfs_queue_worker(&fs_info->scrub_wr_completion_workers, &sbio->work); | 1619 | btrfs_queue_work(fs_info->scrub_wr_completion_workers, &sbio->work); |
| 1608 | } | 1620 | } |
| 1609 | 1621 | ||
| 1610 | static void scrub_wr_bio_end_io_worker(struct btrfs_work *work) | 1622 | static void scrub_wr_bio_end_io_worker(struct btrfs_work *work) |
| @@ -2072,7 +2084,7 @@ static void scrub_bio_end_io(struct bio *bio, int err) | |||
| 2072 | sbio->err = err; | 2084 | sbio->err = err; |
| 2073 | sbio->bio = bio; | 2085 | sbio->bio = bio; |
| 2074 | 2086 | ||
| 2075 | btrfs_queue_worker(&fs_info->scrub_workers, &sbio->work); | 2087 | btrfs_queue_work(fs_info->scrub_workers, &sbio->work); |
| 2076 | } | 2088 | } |
| 2077 | 2089 | ||
| 2078 | static void scrub_bio_end_io_worker(struct btrfs_work *work) | 2090 | static void scrub_bio_end_io_worker(struct btrfs_work *work) |
| @@ -2686,10 +2698,23 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx, | |||
| 2686 | 2698 | ||
| 2687 | wait_event(sctx->list_wait, | 2699 | wait_event(sctx->list_wait, |
| 2688 | atomic_read(&sctx->bios_in_flight) == 0); | 2700 | atomic_read(&sctx->bios_in_flight) == 0); |
| 2689 | atomic_set(&sctx->wr_ctx.flush_all_writes, 0); | 2701 | atomic_inc(&fs_info->scrubs_paused); |
| 2702 | wake_up(&fs_info->scrub_pause_wait); | ||
| 2703 | |||
| 2704 | /* | ||
| 2705 | * must be called before we decrease @scrub_paused. | ||
| 2706 | * make sure we don't block transaction commit while | ||
| 2707 | * we are waiting pending workers finished. | ||
| 2708 | */ | ||
| 2690 | wait_event(sctx->list_wait, | 2709 | wait_event(sctx->list_wait, |
| 2691 | atomic_read(&sctx->workers_pending) == 0); | 2710 | atomic_read(&sctx->workers_pending) == 0); |
| 2692 | scrub_blocked_if_needed(fs_info); | 2711 | atomic_set(&sctx->wr_ctx.flush_all_writes, 0); |
| 2712 | |||
| 2713 | mutex_lock(&fs_info->scrub_lock); | ||
| 2714 | __scrub_blocked_if_needed(fs_info); | ||
| 2715 | atomic_dec(&fs_info->scrubs_paused); | ||
| 2716 | mutex_unlock(&fs_info->scrub_lock); | ||
| 2717 | wake_up(&fs_info->scrub_pause_wait); | ||
| 2693 | 2718 | ||
| 2694 | btrfs_put_block_group(cache); | 2719 | btrfs_put_block_group(cache); |
| 2695 | if (ret) | 2720 | if (ret) |
| @@ -2757,33 +2782,35 @@ static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info, | |||
| 2757 | int is_dev_replace) | 2782 | int is_dev_replace) |
| 2758 | { | 2783 | { |
| 2759 | int ret = 0; | 2784 | int ret = 0; |
| 2785 | int flags = WQ_FREEZABLE | WQ_UNBOUND; | ||
| 2786 | int max_active = fs_info->thread_pool_size; | ||
| 2760 | 2787 | ||
| 2761 | if (fs_info->scrub_workers_refcnt == 0) { | 2788 | if (fs_info->scrub_workers_refcnt == 0) { |
| 2762 | if (is_dev_replace) | 2789 | if (is_dev_replace) |
| 2763 | btrfs_init_workers(&fs_info->scrub_workers, "scrub", 1, | 2790 | fs_info->scrub_workers = |
| 2764 | &fs_info->generic_worker); | 2791 | btrfs_alloc_workqueue("btrfs-scrub", flags, |
| 2792 | 1, 4); | ||
| 2765 | else | 2793 | else |
| 2766 | btrfs_init_workers(&fs_info->scrub_workers, "scrub", | 2794 | fs_info->scrub_workers = |
| 2767 | fs_info->thread_pool_size, | 2795 | btrfs_alloc_workqueue("btrfs-scrub", flags, |
| 2768 | &fs_info->generic_worker); | 2796 | max_active, 4); |
| 2769 | fs_info->scrub_workers.idle_thresh = 4; | 2797 | if (!fs_info->scrub_workers) { |
| 2770 | ret = btrfs_start_workers(&fs_info->scrub_workers); | 2798 | ret = -ENOMEM; |
| 2771 | if (ret) | ||
| 2772 | goto out; | 2799 | goto out; |
| 2773 | btrfs_init_workers(&fs_info->scrub_wr_completion_workers, | 2800 | } |
| 2774 | "scrubwrc", | 2801 | fs_info->scrub_wr_completion_workers = |
| 2775 | fs_info->thread_pool_size, | 2802 | btrfs_alloc_workqueue("btrfs-scrubwrc", flags, |
| 2776 | &fs_info->generic_worker); | 2803 | max_active, 2); |
| 2777 | fs_info->scrub_wr_completion_workers.idle_thresh = 2; | 2804 | if (!fs_info->scrub_wr_completion_workers) { |
| 2778 | ret = btrfs_start_workers( | 2805 | ret = -ENOMEM; |
| 2779 | &fs_info->scrub_wr_completion_workers); | ||
| 2780 | if (ret) | ||
| 2781 | goto out; | 2806 | goto out; |
| 2782 | btrfs_init_workers(&fs_info->scrub_nocow_workers, "scrubnc", 1, | 2807 | } |
| 2783 | &fs_info->generic_worker); | 2808 | fs_info->scrub_nocow_workers = |
| 2784 | ret = btrfs_start_workers(&fs_info->scrub_nocow_workers); | 2809 | btrfs_alloc_workqueue("btrfs-scrubnc", flags, 1, 0); |
| 2785 | if (ret) | 2810 | if (!fs_info->scrub_nocow_workers) { |
| 2811 | ret = -ENOMEM; | ||
| 2786 | goto out; | 2812 | goto out; |
| 2813 | } | ||
| 2787 | } | 2814 | } |
| 2788 | ++fs_info->scrub_workers_refcnt; | 2815 | ++fs_info->scrub_workers_refcnt; |
| 2789 | out: | 2816 | out: |
| @@ -2793,9 +2820,9 @@ out: | |||
| 2793 | static noinline_for_stack void scrub_workers_put(struct btrfs_fs_info *fs_info) | 2820 | static noinline_for_stack void scrub_workers_put(struct btrfs_fs_info *fs_info) |
| 2794 | { | 2821 | { |
| 2795 | if (--fs_info->scrub_workers_refcnt == 0) { | 2822 | if (--fs_info->scrub_workers_refcnt == 0) { |
| 2796 | btrfs_stop_workers(&fs_info->scrub_workers); | 2823 | btrfs_destroy_workqueue(fs_info->scrub_workers); |
| 2797 | btrfs_stop_workers(&fs_info->scrub_wr_completion_workers); | 2824 | btrfs_destroy_workqueue(fs_info->scrub_wr_completion_workers); |
| 2798 | btrfs_stop_workers(&fs_info->scrub_nocow_workers); | 2825 | btrfs_destroy_workqueue(fs_info->scrub_nocow_workers); |
| 2799 | } | 2826 | } |
| 2800 | WARN_ON(fs_info->scrub_workers_refcnt < 0); | 2827 | WARN_ON(fs_info->scrub_workers_refcnt < 0); |
| 2801 | } | 2828 | } |
| @@ -3106,10 +3133,10 @@ static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len, | |||
| 3106 | nocow_ctx->len = len; | 3133 | nocow_ctx->len = len; |
| 3107 | nocow_ctx->mirror_num = mirror_num; | 3134 | nocow_ctx->mirror_num = mirror_num; |
| 3108 | nocow_ctx->physical_for_dev_replace = physical_for_dev_replace; | 3135 | nocow_ctx->physical_for_dev_replace = physical_for_dev_replace; |
| 3109 | nocow_ctx->work.func = copy_nocow_pages_worker; | 3136 | btrfs_init_work(&nocow_ctx->work, copy_nocow_pages_worker, NULL, NULL); |
| 3110 | INIT_LIST_HEAD(&nocow_ctx->inodes); | 3137 | INIT_LIST_HEAD(&nocow_ctx->inodes); |
| 3111 | btrfs_queue_worker(&fs_info->scrub_nocow_workers, | 3138 | btrfs_queue_work(fs_info->scrub_nocow_workers, |
| 3112 | &nocow_ctx->work); | 3139 | &nocow_ctx->work); |
| 3113 | 3140 | ||
| 3114 | return 0; | 3141 | return 0; |
| 3115 | } | 3142 | } |
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 9dde9717c1b9..9b6da9d55f9a 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c | |||
| @@ -51,15 +51,18 @@ struct fs_path { | |||
| 51 | struct { | 51 | struct { |
| 52 | char *start; | 52 | char *start; |
| 53 | char *end; | 53 | char *end; |
| 54 | char *prepared; | ||
| 55 | 54 | ||
| 56 | char *buf; | 55 | char *buf; |
| 57 | int buf_len; | 56 | unsigned short buf_len:15; |
| 58 | unsigned int reversed:1; | 57 | unsigned short reversed:1; |
| 59 | unsigned int virtual_mem:1; | ||
| 60 | char inline_buf[]; | 58 | char inline_buf[]; |
| 61 | }; | 59 | }; |
| 62 | char pad[PAGE_SIZE]; | 60 | /* |
| 61 | * Average path length does not exceed 200 bytes, we'll have | ||
| 62 | * better packing in the slab and higher chance to satisfy | ||
| 63 | * a allocation later during send. | ||
| 64 | */ | ||
| 65 | char pad[256]; | ||
| 63 | }; | 66 | }; |
| 64 | }; | 67 | }; |
| 65 | #define FS_PATH_INLINE_SIZE \ | 68 | #define FS_PATH_INLINE_SIZE \ |
| @@ -109,6 +112,7 @@ struct send_ctx { | |||
| 109 | int cur_inode_deleted; | 112 | int cur_inode_deleted; |
| 110 | u64 cur_inode_size; | 113 | u64 cur_inode_size; |
| 111 | u64 cur_inode_mode; | 114 | u64 cur_inode_mode; |
| 115 | u64 cur_inode_rdev; | ||
| 112 | u64 cur_inode_last_extent; | 116 | u64 cur_inode_last_extent; |
| 113 | 117 | ||
| 114 | u64 send_progress; | 118 | u64 send_progress; |
| @@ -120,6 +124,8 @@ struct send_ctx { | |||
| 120 | struct list_head name_cache_list; | 124 | struct list_head name_cache_list; |
| 121 | int name_cache_size; | 125 | int name_cache_size; |
| 122 | 126 | ||
| 127 | struct file_ra_state ra; | ||
| 128 | |||
| 123 | char *read_buf; | 129 | char *read_buf; |
| 124 | 130 | ||
| 125 | /* | 131 | /* |
| @@ -175,6 +181,47 @@ struct send_ctx { | |||
| 175 | * own move/rename can be performed. | 181 | * own move/rename can be performed. |
| 176 | */ | 182 | */ |
| 177 | struct rb_root waiting_dir_moves; | 183 | struct rb_root waiting_dir_moves; |
| 184 | |||
| 185 | /* | ||
| 186 | * A directory that is going to be rm'ed might have a child directory | ||
| 187 | * which is in the pending directory moves index above. In this case, | ||
| 188 | * the directory can only be removed after the move/rename of its child | ||
| 189 | * is performed. Example: | ||
| 190 | * | ||
| 191 | * Parent snapshot: | ||
| 192 | * | ||
| 193 | * . (ino 256) | ||
| 194 | * |-- a/ (ino 257) | ||
| 195 | * |-- b/ (ino 258) | ||
| 196 | * |-- c/ (ino 259) | ||
| 197 | * | |-- x/ (ino 260) | ||
| 198 | * | | ||
| 199 | * |-- y/ (ino 261) | ||
| 200 | * | ||
| 201 | * Send snapshot: | ||
| 202 | * | ||
| 203 | * . (ino 256) | ||
| 204 | * |-- a/ (ino 257) | ||
| 205 | * |-- b/ (ino 258) | ||
| 206 | * |-- YY/ (ino 261) | ||
| 207 | * |-- x/ (ino 260) | ||
| 208 | * | ||
| 209 | * Sequence of steps that lead to the send snapshot: | ||
| 210 | * rm -f /a/b/c/foo.txt | ||
| 211 | * mv /a/b/y /a/b/YY | ||
| 212 | * mv /a/b/c/x /a/b/YY | ||
| 213 | * rmdir /a/b/c | ||
| 214 | * | ||
| 215 | * When the child is processed, its move/rename is delayed until its | ||
| 216 | * parent is processed (as explained above), but all other operations | ||
| 217 | * like update utimes, chown, chgrp, etc, are performed and the paths | ||
| 218 | * that it uses for those operations must use the orphanized name of | ||
| 219 | * its parent (the directory we're going to rm later), so we need to | ||
| 220 | * memorize that name. | ||
| 221 | * | ||
| 222 | * Indexed by the inode number of the directory to be deleted. | ||
| 223 | */ | ||
| 224 | struct rb_root orphan_dirs; | ||
| 178 | }; | 225 | }; |
| 179 | 226 | ||
| 180 | struct pending_dir_move { | 227 | struct pending_dir_move { |
| @@ -189,6 +236,18 @@ struct pending_dir_move { | |||
| 189 | struct waiting_dir_move { | 236 | struct waiting_dir_move { |
| 190 | struct rb_node node; | 237 | struct rb_node node; |
| 191 | u64 ino; | 238 | u64 ino; |
| 239 | /* | ||
| 240 | * There might be some directory that could not be removed because it | ||
| 241 | * was waiting for this directory inode to be moved first. Therefore | ||
| 242 | * after this directory is moved, we can try to rmdir the ino rmdir_ino. | ||
| 243 | */ | ||
| 244 | u64 rmdir_ino; | ||
| 245 | }; | ||
| 246 | |||
| 247 | struct orphan_dir_info { | ||
| 248 | struct rb_node node; | ||
| 249 | u64 ino; | ||
| 250 | u64 gen; | ||
| 192 | }; | 251 | }; |
| 193 | 252 | ||
| 194 | struct name_cache_entry { | 253 | struct name_cache_entry { |
| @@ -214,6 +273,11 @@ struct name_cache_entry { | |||
| 214 | 273 | ||
| 215 | static int is_waiting_for_move(struct send_ctx *sctx, u64 ino); | 274 | static int is_waiting_for_move(struct send_ctx *sctx, u64 ino); |
| 216 | 275 | ||
| 276 | static struct waiting_dir_move * | ||
| 277 | get_waiting_dir_move(struct send_ctx *sctx, u64 ino); | ||
| 278 | |||
| 279 | static int is_waiting_for_rm(struct send_ctx *sctx, u64 dir_ino); | ||
| 280 | |||
| 217 | static int need_send_hole(struct send_ctx *sctx) | 281 | static int need_send_hole(struct send_ctx *sctx) |
| 218 | { | 282 | { |
| 219 | return (sctx->parent_root && !sctx->cur_inode_new && | 283 | return (sctx->parent_root && !sctx->cur_inode_new && |
| @@ -242,7 +306,6 @@ static struct fs_path *fs_path_alloc(void) | |||
| 242 | if (!p) | 306 | if (!p) |
| 243 | return NULL; | 307 | return NULL; |
| 244 | p->reversed = 0; | 308 | p->reversed = 0; |
| 245 | p->virtual_mem = 0; | ||
| 246 | p->buf = p->inline_buf; | 309 | p->buf = p->inline_buf; |
| 247 | p->buf_len = FS_PATH_INLINE_SIZE; | 310 | p->buf_len = FS_PATH_INLINE_SIZE; |
| 248 | fs_path_reset(p); | 311 | fs_path_reset(p); |
| @@ -265,12 +328,8 @@ static void fs_path_free(struct fs_path *p) | |||
| 265 | { | 328 | { |
| 266 | if (!p) | 329 | if (!p) |
| 267 | return; | 330 | return; |
| 268 | if (p->buf != p->inline_buf) { | 331 | if (p->buf != p->inline_buf) |
| 269 | if (p->virtual_mem) | 332 | kfree(p->buf); |
| 270 | vfree(p->buf); | ||
| 271 | else | ||
| 272 | kfree(p->buf); | ||
| 273 | } | ||
| 274 | kfree(p); | 333 | kfree(p); |
| 275 | } | 334 | } |
| 276 | 335 | ||
| @@ -292,40 +351,23 @@ static int fs_path_ensure_buf(struct fs_path *p, int len) | |||
| 292 | 351 | ||
| 293 | path_len = p->end - p->start; | 352 | path_len = p->end - p->start; |
| 294 | old_buf_len = p->buf_len; | 353 | old_buf_len = p->buf_len; |
| 295 | len = PAGE_ALIGN(len); | 354 | |
| 296 | 355 | /* | |
| 297 | if (p->buf == p->inline_buf) { | 356 | * First time the inline_buf does not suffice |
| 298 | tmp_buf = kmalloc(len, GFP_NOFS | __GFP_NOWARN); | 357 | */ |
| 299 | if (!tmp_buf) { | 358 | if (p->buf == p->inline_buf) |
| 300 | tmp_buf = vmalloc(len); | 359 | tmp_buf = kmalloc(len, GFP_NOFS); |
| 301 | if (!tmp_buf) | 360 | else |
| 302 | return -ENOMEM; | 361 | tmp_buf = krealloc(p->buf, len, GFP_NOFS); |
| 303 | p->virtual_mem = 1; | 362 | if (!tmp_buf) |
| 304 | } | 363 | return -ENOMEM; |
| 305 | memcpy(tmp_buf, p->buf, p->buf_len); | 364 | p->buf = tmp_buf; |
| 306 | p->buf = tmp_buf; | 365 | /* |
| 307 | p->buf_len = len; | 366 | * The real size of the buffer is bigger, this will let the fast path |
| 308 | } else { | 367 | * happen most of the time |
| 309 | if (p->virtual_mem) { | 368 | */ |
| 310 | tmp_buf = vmalloc(len); | 369 | p->buf_len = ksize(p->buf); |
| 311 | if (!tmp_buf) | 370 | |
| 312 | return -ENOMEM; | ||
| 313 | memcpy(tmp_buf, p->buf, p->buf_len); | ||
| 314 | vfree(p->buf); | ||
| 315 | } else { | ||
| 316 | tmp_buf = krealloc(p->buf, len, GFP_NOFS); | ||
| 317 | if (!tmp_buf) { | ||
| 318 | tmp_buf = vmalloc(len); | ||
| 319 | if (!tmp_buf) | ||
| 320 | return -ENOMEM; | ||
| 321 | memcpy(tmp_buf, p->buf, p->buf_len); | ||
| 322 | kfree(p->buf); | ||
| 323 | p->virtual_mem = 1; | ||
| 324 | } | ||
| 325 | } | ||
| 326 | p->buf = tmp_buf; | ||
| 327 | p->buf_len = len; | ||
| 328 | } | ||
| 329 | if (p->reversed) { | 371 | if (p->reversed) { |
| 330 | tmp_buf = p->buf + old_buf_len - path_len - 1; | 372 | tmp_buf = p->buf + old_buf_len - path_len - 1; |
| 331 | p->end = p->buf + p->buf_len - 1; | 373 | p->end = p->buf + p->buf_len - 1; |
| @@ -338,7 +380,8 @@ static int fs_path_ensure_buf(struct fs_path *p, int len) | |||
| 338 | return 0; | 380 | return 0; |
| 339 | } | 381 | } |
| 340 | 382 | ||
| 341 | static int fs_path_prepare_for_add(struct fs_path *p, int name_len) | 383 | static int fs_path_prepare_for_add(struct fs_path *p, int name_len, |
| 384 | char **prepared) | ||
| 342 | { | 385 | { |
| 343 | int ret; | 386 | int ret; |
| 344 | int new_len; | 387 | int new_len; |
| @@ -354,11 +397,11 @@ static int fs_path_prepare_for_add(struct fs_path *p, int name_len) | |||
| 354 | if (p->start != p->end) | 397 | if (p->start != p->end) |
| 355 | *--p->start = '/'; | 398 | *--p->start = '/'; |
| 356 | p->start -= name_len; | 399 | p->start -= name_len; |
| 357 | p->prepared = p->start; | 400 | *prepared = p->start; |
| 358 | } else { | 401 | } else { |
| 359 | if (p->start != p->end) | 402 | if (p->start != p->end) |
| 360 | *p->end++ = '/'; | 403 | *p->end++ = '/'; |
| 361 | p->prepared = p->end; | 404 | *prepared = p->end; |
| 362 | p->end += name_len; | 405 | p->end += name_len; |
| 363 | *p->end = 0; | 406 | *p->end = 0; |
| 364 | } | 407 | } |
| @@ -370,12 +413,12 @@ out: | |||
| 370 | static int fs_path_add(struct fs_path *p, const char *name, int name_len) | 413 | static int fs_path_add(struct fs_path *p, const char *name, int name_len) |
| 371 | { | 414 | { |
| 372 | int ret; | 415 | int ret; |
| 416 | char *prepared; | ||
| 373 | 417 | ||
| 374 | ret = fs_path_prepare_for_add(p, name_len); | 418 | ret = fs_path_prepare_for_add(p, name_len, &prepared); |
| 375 | if (ret < 0) | 419 | if (ret < 0) |
| 376 | goto out; | 420 | goto out; |
| 377 | memcpy(p->prepared, name, name_len); | 421 | memcpy(prepared, name, name_len); |
| 378 | p->prepared = NULL; | ||
| 379 | 422 | ||
| 380 | out: | 423 | out: |
| 381 | return ret; | 424 | return ret; |
| @@ -384,12 +427,12 @@ out: | |||
| 384 | static int fs_path_add_path(struct fs_path *p, struct fs_path *p2) | 427 | static int fs_path_add_path(struct fs_path *p, struct fs_path *p2) |
| 385 | { | 428 | { |
| 386 | int ret; | 429 | int ret; |
| 430 | char *prepared; | ||
| 387 | 431 | ||
| 388 | ret = fs_path_prepare_for_add(p, p2->end - p2->start); | 432 | ret = fs_path_prepare_for_add(p, p2->end - p2->start, &prepared); |
| 389 | if (ret < 0) | 433 | if (ret < 0) |
| 390 | goto out; | 434 | goto out; |
| 391 | memcpy(p->prepared, p2->start, p2->end - p2->start); | 435 | memcpy(prepared, p2->start, p2->end - p2->start); |
| 392 | p->prepared = NULL; | ||
| 393 | 436 | ||
| 394 | out: | 437 | out: |
| 395 | return ret; | 438 | return ret; |
| @@ -400,13 +443,13 @@ static int fs_path_add_from_extent_buffer(struct fs_path *p, | |||
| 400 | unsigned long off, int len) | 443 | unsigned long off, int len) |
| 401 | { | 444 | { |
| 402 | int ret; | 445 | int ret; |
| 446 | char *prepared; | ||
| 403 | 447 | ||
| 404 | ret = fs_path_prepare_for_add(p, len); | 448 | ret = fs_path_prepare_for_add(p, len, &prepared); |
| 405 | if (ret < 0) | 449 | if (ret < 0) |
| 406 | goto out; | 450 | goto out; |
| 407 | 451 | ||
| 408 | read_extent_buffer(eb, p->prepared, off, len); | 452 | read_extent_buffer(eb, prepared, off, len); |
| 409 | p->prepared = NULL; | ||
| 410 | 453 | ||
| 411 | out: | 454 | out: |
| 412 | return ret; | 455 | return ret; |
| @@ -915,9 +958,7 @@ static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path, | |||
| 915 | struct btrfs_dir_item *di; | 958 | struct btrfs_dir_item *di; |
| 916 | struct btrfs_key di_key; | 959 | struct btrfs_key di_key; |
| 917 | char *buf = NULL; | 960 | char *buf = NULL; |
| 918 | char *buf2 = NULL; | 961 | const int buf_len = PATH_MAX; |
| 919 | int buf_len; | ||
| 920 | int buf_virtual = 0; | ||
| 921 | u32 name_len; | 962 | u32 name_len; |
| 922 | u32 data_len; | 963 | u32 data_len; |
| 923 | u32 cur; | 964 | u32 cur; |
| @@ -927,7 +968,6 @@ static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path, | |||
| 927 | int num; | 968 | int num; |
| 928 | u8 type; | 969 | u8 type; |
| 929 | 970 | ||
| 930 | buf_len = PAGE_SIZE; | ||
| 931 | buf = kmalloc(buf_len, GFP_NOFS); | 971 | buf = kmalloc(buf_len, GFP_NOFS); |
| 932 | if (!buf) { | 972 | if (!buf) { |
| 933 | ret = -ENOMEM; | 973 | ret = -ENOMEM; |
| @@ -949,30 +989,12 @@ static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path, | |||
| 949 | type = btrfs_dir_type(eb, di); | 989 | type = btrfs_dir_type(eb, di); |
| 950 | btrfs_dir_item_key_to_cpu(eb, di, &di_key); | 990 | btrfs_dir_item_key_to_cpu(eb, di, &di_key); |
| 951 | 991 | ||
| 992 | /* | ||
| 993 | * Path too long | ||
| 994 | */ | ||
| 952 | if (name_len + data_len > buf_len) { | 995 | if (name_len + data_len > buf_len) { |
| 953 | buf_len = PAGE_ALIGN(name_len + data_len); | 996 | ret = -ENAMETOOLONG; |
| 954 | if (buf_virtual) { | 997 | goto out; |
| 955 | buf2 = vmalloc(buf_len); | ||
| 956 | if (!buf2) { | ||
| 957 | ret = -ENOMEM; | ||
| 958 | goto out; | ||
| 959 | } | ||
| 960 | vfree(buf); | ||
| 961 | } else { | ||
| 962 | buf2 = krealloc(buf, buf_len, GFP_NOFS); | ||
| 963 | if (!buf2) { | ||
| 964 | buf2 = vmalloc(buf_len); | ||
| 965 | if (!buf2) { | ||
| 966 | ret = -ENOMEM; | ||
| 967 | goto out; | ||
| 968 | } | ||
| 969 | kfree(buf); | ||
| 970 | buf_virtual = 1; | ||
| 971 | } | ||
| 972 | } | ||
| 973 | |||
| 974 | buf = buf2; | ||
| 975 | buf2 = NULL; | ||
| 976 | } | 998 | } |
| 977 | 999 | ||
| 978 | read_extent_buffer(eb, buf, (unsigned long)(di + 1), | 1000 | read_extent_buffer(eb, buf, (unsigned long)(di + 1), |
| @@ -995,10 +1017,7 @@ static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path, | |||
| 995 | } | 1017 | } |
| 996 | 1018 | ||
| 997 | out: | 1019 | out: |
| 998 | if (buf_virtual) | 1020 | kfree(buf); |
| 999 | vfree(buf); | ||
| 1000 | else | ||
| 1001 | kfree(buf); | ||
| 1002 | return ret; | 1021 | return ret; |
| 1003 | } | 1022 | } |
| 1004 | 1023 | ||
| @@ -1292,8 +1311,6 @@ static int find_extent_clone(struct send_ctx *sctx, | |||
| 1292 | extent_item_pos = logical - found_key.objectid; | 1311 | extent_item_pos = logical - found_key.objectid; |
| 1293 | else | 1312 | else |
| 1294 | extent_item_pos = 0; | 1313 | extent_item_pos = 0; |
| 1295 | |||
| 1296 | extent_item_pos = logical - found_key.objectid; | ||
| 1297 | ret = iterate_extent_inodes(sctx->send_root->fs_info, | 1314 | ret = iterate_extent_inodes(sctx->send_root->fs_info, |
| 1298 | found_key.objectid, extent_item_pos, 1, | 1315 | found_key.objectid, extent_item_pos, 1, |
| 1299 | __iterate_backrefs, backref_ctx); | 1316 | __iterate_backrefs, backref_ctx); |
| @@ -1418,11 +1435,7 @@ static int gen_unique_name(struct send_ctx *sctx, | |||
| 1418 | while (1) { | 1435 | while (1) { |
| 1419 | len = snprintf(tmp, sizeof(tmp), "o%llu-%llu-%llu", | 1436 | len = snprintf(tmp, sizeof(tmp), "o%llu-%llu-%llu", |
| 1420 | ino, gen, idx); | 1437 | ino, gen, idx); |
| 1421 | if (len >= sizeof(tmp)) { | 1438 | ASSERT(len < sizeof(tmp)); |
| 1422 | /* should really not happen */ | ||
| 1423 | ret = -EOVERFLOW; | ||
| 1424 | goto out; | ||
| 1425 | } | ||
| 1426 | 1439 | ||
| 1427 | di = btrfs_lookup_dir_item(NULL, sctx->send_root, | 1440 | di = btrfs_lookup_dir_item(NULL, sctx->send_root, |
| 1428 | path, BTRFS_FIRST_FREE_OBJECTID, | 1441 | path, BTRFS_FIRST_FREE_OBJECTID, |
| @@ -1898,13 +1911,20 @@ static void name_cache_delete(struct send_ctx *sctx, | |||
| 1898 | 1911 | ||
| 1899 | nce_head = radix_tree_lookup(&sctx->name_cache, | 1912 | nce_head = radix_tree_lookup(&sctx->name_cache, |
| 1900 | (unsigned long)nce->ino); | 1913 | (unsigned long)nce->ino); |
| 1901 | BUG_ON(!nce_head); | 1914 | if (!nce_head) { |
| 1915 | btrfs_err(sctx->send_root->fs_info, | ||
| 1916 | "name_cache_delete lookup failed ino %llu cache size %d, leaking memory", | ||
| 1917 | nce->ino, sctx->name_cache_size); | ||
| 1918 | } | ||
| 1902 | 1919 | ||
| 1903 | list_del(&nce->radix_list); | 1920 | list_del(&nce->radix_list); |
| 1904 | list_del(&nce->list); | 1921 | list_del(&nce->list); |
| 1905 | sctx->name_cache_size--; | 1922 | sctx->name_cache_size--; |
| 1906 | 1923 | ||
| 1907 | if (list_empty(nce_head)) { | 1924 | /* |
| 1925 | * We may not get to the final release of nce_head if the lookup fails | ||
| 1926 | */ | ||
| 1927 | if (nce_head && list_empty(nce_head)) { | ||
| 1908 | radix_tree_delete(&sctx->name_cache, (unsigned long)nce->ino); | 1928 | radix_tree_delete(&sctx->name_cache, (unsigned long)nce->ino); |
| 1909 | kfree(nce_head); | 1929 | kfree(nce_head); |
| 1910 | } | 1930 | } |
| @@ -1977,7 +1997,6 @@ static void name_cache_free(struct send_ctx *sctx) | |||
| 1977 | */ | 1997 | */ |
| 1978 | static int __get_cur_name_and_parent(struct send_ctx *sctx, | 1998 | static int __get_cur_name_and_parent(struct send_ctx *sctx, |
| 1979 | u64 ino, u64 gen, | 1999 | u64 ino, u64 gen, |
| 1980 | int skip_name_cache, | ||
| 1981 | u64 *parent_ino, | 2000 | u64 *parent_ino, |
| 1982 | u64 *parent_gen, | 2001 | u64 *parent_gen, |
| 1983 | struct fs_path *dest) | 2002 | struct fs_path *dest) |
| @@ -1987,8 +2006,6 @@ static int __get_cur_name_and_parent(struct send_ctx *sctx, | |||
| 1987 | struct btrfs_path *path = NULL; | 2006 | struct btrfs_path *path = NULL; |
| 1988 | struct name_cache_entry *nce = NULL; | 2007 | struct name_cache_entry *nce = NULL; |
| 1989 | 2008 | ||
| 1990 | if (skip_name_cache) | ||
| 1991 | goto get_ref; | ||
| 1992 | /* | 2009 | /* |
| 1993 | * First check if we already did a call to this function with the same | 2010 | * First check if we already did a call to this function with the same |
| 1994 | * ino/gen. If yes, check if the cache entry is still up-to-date. If yes | 2011 | * ino/gen. If yes, check if the cache entry is still up-to-date. If yes |
| @@ -2033,12 +2050,11 @@ static int __get_cur_name_and_parent(struct send_ctx *sctx, | |||
| 2033 | goto out_cache; | 2050 | goto out_cache; |
| 2034 | } | 2051 | } |
| 2035 | 2052 | ||
| 2036 | get_ref: | ||
| 2037 | /* | 2053 | /* |
| 2038 | * Depending on whether the inode was already processed or not, use | 2054 | * Depending on whether the inode was already processed or not, use |
| 2039 | * send_root or parent_root for ref lookup. | 2055 | * send_root or parent_root for ref lookup. |
| 2040 | */ | 2056 | */ |
| 2041 | if (ino < sctx->send_progress && !skip_name_cache) | 2057 | if (ino < sctx->send_progress) |
| 2042 | ret = get_first_ref(sctx->send_root, ino, | 2058 | ret = get_first_ref(sctx->send_root, ino, |
| 2043 | parent_ino, parent_gen, dest); | 2059 | parent_ino, parent_gen, dest); |
| 2044 | else | 2060 | else |
| @@ -2062,8 +2078,6 @@ get_ref: | |||
| 2062 | goto out; | 2078 | goto out; |
| 2063 | ret = 1; | 2079 | ret = 1; |
| 2064 | } | 2080 | } |
| 2065 | if (skip_name_cache) | ||
| 2066 | goto out; | ||
| 2067 | 2081 | ||
| 2068 | out_cache: | 2082 | out_cache: |
| 2069 | /* | 2083 | /* |
| @@ -2131,9 +2145,6 @@ static int get_cur_path(struct send_ctx *sctx, u64 ino, u64 gen, | |||
| 2131 | u64 parent_inode = 0; | 2145 | u64 parent_inode = 0; |
| 2132 | u64 parent_gen = 0; | 2146 | u64 parent_gen = 0; |
| 2133 | int stop = 0; | 2147 | int stop = 0; |
| 2134 | u64 start_ino = ino; | ||
| 2135 | u64 start_gen = gen; | ||
| 2136 | int skip_name_cache = 0; | ||
| 2137 | 2148 | ||
| 2138 | name = fs_path_alloc(); | 2149 | name = fs_path_alloc(); |
| 2139 | if (!name) { | 2150 | if (!name) { |
| @@ -2141,31 +2152,33 @@ static int get_cur_path(struct send_ctx *sctx, u64 ino, u64 gen, | |||
| 2141 | goto out; | 2152 | goto out; |
| 2142 | } | 2153 | } |
| 2143 | 2154 | ||
| 2144 | if (is_waiting_for_move(sctx, ino)) | ||
| 2145 | skip_name_cache = 1; | ||
| 2146 | |||
| 2147 | again: | ||
| 2148 | dest->reversed = 1; | 2155 | dest->reversed = 1; |
| 2149 | fs_path_reset(dest); | 2156 | fs_path_reset(dest); |
| 2150 | 2157 | ||
| 2151 | while (!stop && ino != BTRFS_FIRST_FREE_OBJECTID) { | 2158 | while (!stop && ino != BTRFS_FIRST_FREE_OBJECTID) { |
| 2152 | fs_path_reset(name); | 2159 | fs_path_reset(name); |
| 2153 | 2160 | ||
| 2154 | ret = __get_cur_name_and_parent(sctx, ino, gen, skip_name_cache, | 2161 | if (is_waiting_for_rm(sctx, ino)) { |
| 2155 | &parent_inode, &parent_gen, name); | 2162 | ret = gen_unique_name(sctx, ino, gen, name); |
| 2163 | if (ret < 0) | ||
| 2164 | goto out; | ||
| 2165 | ret = fs_path_add_path(dest, name); | ||
| 2166 | break; | ||
| 2167 | } | ||
| 2168 | |||
| 2169 | if (is_waiting_for_move(sctx, ino)) { | ||
| 2170 | ret = get_first_ref(sctx->parent_root, ino, | ||
| 2171 | &parent_inode, &parent_gen, name); | ||
| 2172 | } else { | ||
| 2173 | ret = __get_cur_name_and_parent(sctx, ino, gen, | ||
| 2174 | &parent_inode, | ||
| 2175 | &parent_gen, name); | ||
| 2176 | if (ret) | ||
| 2177 | stop = 1; | ||
| 2178 | } | ||
| 2179 | |||
| 2156 | if (ret < 0) | 2180 | if (ret < 0) |
| 2157 | goto out; | 2181 | goto out; |
| 2158 | if (ret) | ||
| 2159 | stop = 1; | ||
| 2160 | |||
| 2161 | if (!skip_name_cache && | ||
| 2162 | is_waiting_for_move(sctx, parent_inode)) { | ||
| 2163 | ino = start_ino; | ||
| 2164 | gen = start_gen; | ||
| 2165 | stop = 0; | ||
| 2166 | skip_name_cache = 1; | ||
| 2167 | goto again; | ||
| 2168 | } | ||
| 2169 | 2182 | ||
| 2170 | ret = fs_path_add_path(dest, name); | 2183 | ret = fs_path_add_path(dest, name); |
| 2171 | if (ret < 0) | 2184 | if (ret < 0) |
| @@ -2429,10 +2442,16 @@ verbose_printk("btrfs: send_create_inode %llu\n", ino); | |||
| 2429 | if (!p) | 2442 | if (!p) |
| 2430 | return -ENOMEM; | 2443 | return -ENOMEM; |
| 2431 | 2444 | ||
| 2432 | ret = get_inode_info(sctx->send_root, ino, NULL, &gen, &mode, NULL, | 2445 | if (ino != sctx->cur_ino) { |
| 2433 | NULL, &rdev); | 2446 | ret = get_inode_info(sctx->send_root, ino, NULL, &gen, &mode, |
| 2434 | if (ret < 0) | 2447 | NULL, NULL, &rdev); |
| 2435 | goto out; | 2448 | if (ret < 0) |
| 2449 | goto out; | ||
| 2450 | } else { | ||
| 2451 | gen = sctx->cur_inode_gen; | ||
| 2452 | mode = sctx->cur_inode_mode; | ||
| 2453 | rdev = sctx->cur_inode_rdev; | ||
| 2454 | } | ||
| 2436 | 2455 | ||
| 2437 | if (S_ISREG(mode)) { | 2456 | if (S_ISREG(mode)) { |
| 2438 | cmd = BTRFS_SEND_C_MKFILE; | 2457 | cmd = BTRFS_SEND_C_MKFILE; |
| @@ -2512,17 +2531,26 @@ static int did_create_dir(struct send_ctx *sctx, u64 dir) | |||
| 2512 | key.objectid = dir; | 2531 | key.objectid = dir; |
| 2513 | key.type = BTRFS_DIR_INDEX_KEY; | 2532 | key.type = BTRFS_DIR_INDEX_KEY; |
| 2514 | key.offset = 0; | 2533 | key.offset = 0; |
| 2534 | ret = btrfs_search_slot(NULL, sctx->send_root, &key, path, 0, 0); | ||
| 2535 | if (ret < 0) | ||
| 2536 | goto out; | ||
| 2537 | |||
| 2515 | while (1) { | 2538 | while (1) { |
| 2516 | ret = btrfs_search_slot_for_read(sctx->send_root, &key, path, | 2539 | eb = path->nodes[0]; |
| 2517 | 1, 0); | 2540 | slot = path->slots[0]; |
| 2518 | if (ret < 0) | 2541 | if (slot >= btrfs_header_nritems(eb)) { |
| 2519 | goto out; | 2542 | ret = btrfs_next_leaf(sctx->send_root, path); |
| 2520 | if (!ret) { | 2543 | if (ret < 0) { |
| 2521 | eb = path->nodes[0]; | 2544 | goto out; |
| 2522 | slot = path->slots[0]; | 2545 | } else if (ret > 0) { |
| 2523 | btrfs_item_key_to_cpu(eb, &found_key, slot); | 2546 | ret = 0; |
| 2547 | break; | ||
| 2548 | } | ||
| 2549 | continue; | ||
| 2524 | } | 2550 | } |
| 2525 | if (ret || found_key.objectid != key.objectid || | 2551 | |
| 2552 | btrfs_item_key_to_cpu(eb, &found_key, slot); | ||
| 2553 | if (found_key.objectid != key.objectid || | ||
| 2526 | found_key.type != key.type) { | 2554 | found_key.type != key.type) { |
| 2527 | ret = 0; | 2555 | ret = 0; |
| 2528 | goto out; | 2556 | goto out; |
| @@ -2537,8 +2565,7 @@ static int did_create_dir(struct send_ctx *sctx, u64 dir) | |||
| 2537 | goto out; | 2565 | goto out; |
| 2538 | } | 2566 | } |
| 2539 | 2567 | ||
| 2540 | key.offset = found_key.offset + 1; | 2568 | path->slots[0]++; |
| 2541 | btrfs_release_path(path); | ||
| 2542 | } | 2569 | } |
| 2543 | 2570 | ||
| 2544 | out: | 2571 | out: |
| @@ -2590,7 +2617,7 @@ struct recorded_ref { | |||
| 2590 | * everything mixed. So we first record all refs and later process them. | 2617 | * everything mixed. So we first record all refs and later process them. |
| 2591 | * This function is a helper to record one ref. | 2618 | * This function is a helper to record one ref. |
| 2592 | */ | 2619 | */ |
| 2593 | static int record_ref(struct list_head *head, u64 dir, | 2620 | static int __record_ref(struct list_head *head, u64 dir, |
| 2594 | u64 dir_gen, struct fs_path *path) | 2621 | u64 dir_gen, struct fs_path *path) |
| 2595 | { | 2622 | { |
| 2596 | struct recorded_ref *ref; | 2623 | struct recorded_ref *ref; |
| @@ -2676,12 +2703,78 @@ out: | |||
| 2676 | return ret; | 2703 | return ret; |
| 2677 | } | 2704 | } |
| 2678 | 2705 | ||
| 2706 | static struct orphan_dir_info * | ||
| 2707 | add_orphan_dir_info(struct send_ctx *sctx, u64 dir_ino) | ||
| 2708 | { | ||
| 2709 | struct rb_node **p = &sctx->orphan_dirs.rb_node; | ||
| 2710 | struct rb_node *parent = NULL; | ||
| 2711 | struct orphan_dir_info *entry, *odi; | ||
| 2712 | |||
| 2713 | odi = kmalloc(sizeof(*odi), GFP_NOFS); | ||
| 2714 | if (!odi) | ||
| 2715 | return ERR_PTR(-ENOMEM); | ||
| 2716 | odi->ino = dir_ino; | ||
| 2717 | odi->gen = 0; | ||
| 2718 | |||
| 2719 | while (*p) { | ||
| 2720 | parent = *p; | ||
| 2721 | entry = rb_entry(parent, struct orphan_dir_info, node); | ||
| 2722 | if (dir_ino < entry->ino) { | ||
| 2723 | p = &(*p)->rb_left; | ||
| 2724 | } else if (dir_ino > entry->ino) { | ||
| 2725 | p = &(*p)->rb_right; | ||
| 2726 | } else { | ||
| 2727 | kfree(odi); | ||
| 2728 | return entry; | ||
| 2729 | } | ||
| 2730 | } | ||
| 2731 | |||
| 2732 | rb_link_node(&odi->node, parent, p); | ||
| 2733 | rb_insert_color(&odi->node, &sctx->orphan_dirs); | ||
| 2734 | return odi; | ||
| 2735 | } | ||
| 2736 | |||
| 2737 | static struct orphan_dir_info * | ||
| 2738 | get_orphan_dir_info(struct send_ctx *sctx, u64 dir_ino) | ||
| 2739 | { | ||
| 2740 | struct rb_node *n = sctx->orphan_dirs.rb_node; | ||
| 2741 | struct orphan_dir_info *entry; | ||
| 2742 | |||
| 2743 | while (n) { | ||
| 2744 | entry = rb_entry(n, struct orphan_dir_info, node); | ||
| 2745 | if (dir_ino < entry->ino) | ||
| 2746 | n = n->rb_left; | ||
| 2747 | else if (dir_ino > entry->ino) | ||
| 2748 | n = n->rb_right; | ||
| 2749 | else | ||
| 2750 | return entry; | ||
| 2751 | } | ||
| 2752 | return NULL; | ||
| 2753 | } | ||
| 2754 | |||
| 2755 | static int is_waiting_for_rm(struct send_ctx *sctx, u64 dir_ino) | ||
| 2756 | { | ||
| 2757 | struct orphan_dir_info *odi = get_orphan_dir_info(sctx, dir_ino); | ||
| 2758 | |||
| 2759 | return odi != NULL; | ||
| 2760 | } | ||
| 2761 | |||
| 2762 | static void free_orphan_dir_info(struct send_ctx *sctx, | ||
| 2763 | struct orphan_dir_info *odi) | ||
| 2764 | { | ||
| 2765 | if (!odi) | ||
| 2766 | return; | ||
| 2767 | rb_erase(&odi->node, &sctx->orphan_dirs); | ||
| 2768 | kfree(odi); | ||
| 2769 | } | ||
| 2770 | |||
| 2679 | /* | 2771 | /* |
| 2680 | * Returns 1 if a directory can be removed at this point in time. | 2772 | * Returns 1 if a directory can be removed at this point in time. |
| 2681 | * We check this by iterating all dir items and checking if the inode behind | 2773 | * We check this by iterating all dir items and checking if the inode behind |
| 2682 | * the dir item was already processed. | 2774 | * the dir item was already processed. |
| 2683 | */ | 2775 | */ |
| 2684 | static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 send_progress) | 2776 | static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 dir_gen, |
| 2777 | u64 send_progress) | ||
| 2685 | { | 2778 | { |
| 2686 | int ret = 0; | 2779 | int ret = 0; |
| 2687 | struct btrfs_root *root = sctx->parent_root; | 2780 | struct btrfs_root *root = sctx->parent_root; |
| @@ -2704,31 +2797,52 @@ static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 send_progress) | |||
| 2704 | key.objectid = dir; | 2797 | key.objectid = dir; |
| 2705 | key.type = BTRFS_DIR_INDEX_KEY; | 2798 | key.type = BTRFS_DIR_INDEX_KEY; |
| 2706 | key.offset = 0; | 2799 | key.offset = 0; |
| 2800 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | ||
| 2801 | if (ret < 0) | ||
| 2802 | goto out; | ||
| 2707 | 2803 | ||
| 2708 | while (1) { | 2804 | while (1) { |
| 2709 | ret = btrfs_search_slot_for_read(root, &key, path, 1, 0); | 2805 | struct waiting_dir_move *dm; |
| 2710 | if (ret < 0) | 2806 | |
| 2711 | goto out; | 2807 | if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { |
| 2712 | if (!ret) { | 2808 | ret = btrfs_next_leaf(root, path); |
| 2713 | btrfs_item_key_to_cpu(path->nodes[0], &found_key, | 2809 | if (ret < 0) |
| 2714 | path->slots[0]); | 2810 | goto out; |
| 2811 | else if (ret > 0) | ||
| 2812 | break; | ||
| 2813 | continue; | ||
| 2715 | } | 2814 | } |
| 2716 | if (ret || found_key.objectid != key.objectid || | 2815 | btrfs_item_key_to_cpu(path->nodes[0], &found_key, |
| 2717 | found_key.type != key.type) { | 2816 | path->slots[0]); |
| 2817 | if (found_key.objectid != key.objectid || | ||
| 2818 | found_key.type != key.type) | ||
| 2718 | break; | 2819 | break; |
| 2719 | } | ||
| 2720 | 2820 | ||
| 2721 | di = btrfs_item_ptr(path->nodes[0], path->slots[0], | 2821 | di = btrfs_item_ptr(path->nodes[0], path->slots[0], |
| 2722 | struct btrfs_dir_item); | 2822 | struct btrfs_dir_item); |
| 2723 | btrfs_dir_item_key_to_cpu(path->nodes[0], di, &loc); | 2823 | btrfs_dir_item_key_to_cpu(path->nodes[0], di, &loc); |
| 2724 | 2824 | ||
| 2825 | dm = get_waiting_dir_move(sctx, loc.objectid); | ||
| 2826 | if (dm) { | ||
| 2827 | struct orphan_dir_info *odi; | ||
| 2828 | |||
| 2829 | odi = add_orphan_dir_info(sctx, dir); | ||
| 2830 | if (IS_ERR(odi)) { | ||
| 2831 | ret = PTR_ERR(odi); | ||
| 2832 | goto out; | ||
| 2833 | } | ||
| 2834 | odi->gen = dir_gen; | ||
| 2835 | dm->rmdir_ino = dir; | ||
| 2836 | ret = 0; | ||
| 2837 | goto out; | ||
| 2838 | } | ||
| 2839 | |||
| 2725 | if (loc.objectid > send_progress) { | 2840 | if (loc.objectid > send_progress) { |
| 2726 | ret = 0; | 2841 | ret = 0; |
| 2727 | goto out; | 2842 | goto out; |
| 2728 | } | 2843 | } |
| 2729 | 2844 | ||
| 2730 | btrfs_release_path(path); | 2845 | path->slots[0]++; |
| 2731 | key.offset = found_key.offset + 1; | ||
| 2732 | } | 2846 | } |
| 2733 | 2847 | ||
| 2734 | ret = 1; | 2848 | ret = 1; |
| @@ -2740,19 +2854,9 @@ out: | |||
| 2740 | 2854 | ||
| 2741 | static int is_waiting_for_move(struct send_ctx *sctx, u64 ino) | 2855 | static int is_waiting_for_move(struct send_ctx *sctx, u64 ino) |
| 2742 | { | 2856 | { |
| 2743 | struct rb_node *n = sctx->waiting_dir_moves.rb_node; | 2857 | struct waiting_dir_move *entry = get_waiting_dir_move(sctx, ino); |
| 2744 | struct waiting_dir_move *entry; | ||
| 2745 | 2858 | ||
| 2746 | while (n) { | 2859 | return entry != NULL; |
| 2747 | entry = rb_entry(n, struct waiting_dir_move, node); | ||
| 2748 | if (ino < entry->ino) | ||
| 2749 | n = n->rb_left; | ||
| 2750 | else if (ino > entry->ino) | ||
| 2751 | n = n->rb_right; | ||
| 2752 | else | ||
| 2753 | return 1; | ||
| 2754 | } | ||
| 2755 | return 0; | ||
| 2756 | } | 2860 | } |
| 2757 | 2861 | ||
| 2758 | static int add_waiting_dir_move(struct send_ctx *sctx, u64 ino) | 2862 | static int add_waiting_dir_move(struct send_ctx *sctx, u64 ino) |
| @@ -2765,6 +2869,7 @@ static int add_waiting_dir_move(struct send_ctx *sctx, u64 ino) | |||
| 2765 | if (!dm) | 2869 | if (!dm) |
| 2766 | return -ENOMEM; | 2870 | return -ENOMEM; |
| 2767 | dm->ino = ino; | 2871 | dm->ino = ino; |
| 2872 | dm->rmdir_ino = 0; | ||
| 2768 | 2873 | ||
| 2769 | while (*p) { | 2874 | while (*p) { |
| 2770 | parent = *p; | 2875 | parent = *p; |
| @@ -2784,31 +2889,41 @@ static int add_waiting_dir_move(struct send_ctx *sctx, u64 ino) | |||
| 2784 | return 0; | 2889 | return 0; |
| 2785 | } | 2890 | } |
| 2786 | 2891 | ||
| 2787 | static int del_waiting_dir_move(struct send_ctx *sctx, u64 ino) | 2892 | static struct waiting_dir_move * |
| 2893 | get_waiting_dir_move(struct send_ctx *sctx, u64 ino) | ||
| 2788 | { | 2894 | { |
| 2789 | struct rb_node *n = sctx->waiting_dir_moves.rb_node; | 2895 | struct rb_node *n = sctx->waiting_dir_moves.rb_node; |
| 2790 | struct waiting_dir_move *entry; | 2896 | struct waiting_dir_move *entry; |
| 2791 | 2897 | ||
| 2792 | while (n) { | 2898 | while (n) { |
| 2793 | entry = rb_entry(n, struct waiting_dir_move, node); | 2899 | entry = rb_entry(n, struct waiting_dir_move, node); |
| 2794 | if (ino < entry->ino) { | 2900 | if (ino < entry->ino) |
| 2795 | n = n->rb_left; | 2901 | n = n->rb_left; |
| 2796 | } else if (ino > entry->ino) { | 2902 | else if (ino > entry->ino) |
| 2797 | n = n->rb_right; | 2903 | n = n->rb_right; |
| 2798 | } else { | 2904 | else |
| 2799 | rb_erase(&entry->node, &sctx->waiting_dir_moves); | 2905 | return entry; |
| 2800 | kfree(entry); | ||
| 2801 | return 0; | ||
| 2802 | } | ||
| 2803 | } | 2906 | } |
| 2804 | return -ENOENT; | 2907 | return NULL; |
| 2908 | } | ||
| 2909 | |||
| 2910 | static void free_waiting_dir_move(struct send_ctx *sctx, | ||
| 2911 | struct waiting_dir_move *dm) | ||
| 2912 | { | ||
| 2913 | if (!dm) | ||
| 2914 | return; | ||
| 2915 | rb_erase(&dm->node, &sctx->waiting_dir_moves); | ||
| 2916 | kfree(dm); | ||
| 2805 | } | 2917 | } |
| 2806 | 2918 | ||
| 2807 | static int add_pending_dir_move(struct send_ctx *sctx, u64 parent_ino) | 2919 | static int add_pending_dir_move(struct send_ctx *sctx, |
| 2920 | u64 ino, | ||
| 2921 | u64 ino_gen, | ||
| 2922 | u64 parent_ino) | ||
| 2808 | { | 2923 | { |
| 2809 | struct rb_node **p = &sctx->pending_dir_moves.rb_node; | 2924 | struct rb_node **p = &sctx->pending_dir_moves.rb_node; |
| 2810 | struct rb_node *parent = NULL; | 2925 | struct rb_node *parent = NULL; |
| 2811 | struct pending_dir_move *entry, *pm; | 2926 | struct pending_dir_move *entry = NULL, *pm; |
| 2812 | struct recorded_ref *cur; | 2927 | struct recorded_ref *cur; |
| 2813 | int exists = 0; | 2928 | int exists = 0; |
| 2814 | int ret; | 2929 | int ret; |
| @@ -2817,8 +2932,8 @@ static int add_pending_dir_move(struct send_ctx *sctx, u64 parent_ino) | |||
| 2817 | if (!pm) | 2932 | if (!pm) |
| 2818 | return -ENOMEM; | 2933 | return -ENOMEM; |
| 2819 | pm->parent_ino = parent_ino; | 2934 | pm->parent_ino = parent_ino; |
| 2820 | pm->ino = sctx->cur_ino; | 2935 | pm->ino = ino; |
| 2821 | pm->gen = sctx->cur_inode_gen; | 2936 | pm->gen = ino_gen; |
| 2822 | INIT_LIST_HEAD(&pm->list); | 2937 | INIT_LIST_HEAD(&pm->list); |
| 2823 | INIT_LIST_HEAD(&pm->update_refs); | 2938 | INIT_LIST_HEAD(&pm->update_refs); |
| 2824 | RB_CLEAR_NODE(&pm->node); | 2939 | RB_CLEAR_NODE(&pm->node); |
| @@ -2888,19 +3003,52 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm) | |||
| 2888 | { | 3003 | { |
| 2889 | struct fs_path *from_path = NULL; | 3004 | struct fs_path *from_path = NULL; |
| 2890 | struct fs_path *to_path = NULL; | 3005 | struct fs_path *to_path = NULL; |
| 3006 | struct fs_path *name = NULL; | ||
| 2891 | u64 orig_progress = sctx->send_progress; | 3007 | u64 orig_progress = sctx->send_progress; |
| 2892 | struct recorded_ref *cur; | 3008 | struct recorded_ref *cur; |
| 3009 | u64 parent_ino, parent_gen; | ||
| 3010 | struct waiting_dir_move *dm = NULL; | ||
| 3011 | u64 rmdir_ino = 0; | ||
| 2893 | int ret; | 3012 | int ret; |
| 2894 | 3013 | ||
| 3014 | name = fs_path_alloc(); | ||
| 2895 | from_path = fs_path_alloc(); | 3015 | from_path = fs_path_alloc(); |
| 2896 | if (!from_path) | 3016 | if (!name || !from_path) { |
| 2897 | return -ENOMEM; | 3017 | ret = -ENOMEM; |
| 3018 | goto out; | ||
| 3019 | } | ||
| 2898 | 3020 | ||
| 2899 | sctx->send_progress = pm->ino; | 3021 | dm = get_waiting_dir_move(sctx, pm->ino); |
| 2900 | ret = get_cur_path(sctx, pm->ino, pm->gen, from_path); | 3022 | ASSERT(dm); |
| 3023 | rmdir_ino = dm->rmdir_ino; | ||
| 3024 | free_waiting_dir_move(sctx, dm); | ||
| 3025 | |||
| 3026 | ret = get_first_ref(sctx->parent_root, pm->ino, | ||
| 3027 | &parent_ino, &parent_gen, name); | ||
| 2901 | if (ret < 0) | 3028 | if (ret < 0) |
| 2902 | goto out; | 3029 | goto out; |
| 2903 | 3030 | ||
| 3031 | if (parent_ino == sctx->cur_ino) { | ||
| 3032 | /* child only renamed, not moved */ | ||
| 3033 | ASSERT(parent_gen == sctx->cur_inode_gen); | ||
| 3034 | ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, | ||
| 3035 | from_path); | ||
| 3036 | if (ret < 0) | ||
| 3037 | goto out; | ||
| 3038 | ret = fs_path_add_path(from_path, name); | ||
| 3039 | if (ret < 0) | ||
| 3040 | goto out; | ||
| 3041 | } else { | ||
| 3042 | /* child moved and maybe renamed too */ | ||
| 3043 | sctx->send_progress = pm->ino; | ||
| 3044 | ret = get_cur_path(sctx, pm->ino, pm->gen, from_path); | ||
| 3045 | if (ret < 0) | ||
| 3046 | goto out; | ||
| 3047 | } | ||
| 3048 | |||
| 3049 | fs_path_free(name); | ||
| 3050 | name = NULL; | ||
| 3051 | |||
| 2904 | to_path = fs_path_alloc(); | 3052 | to_path = fs_path_alloc(); |
| 2905 | if (!to_path) { | 3053 | if (!to_path) { |
| 2906 | ret = -ENOMEM; | 3054 | ret = -ENOMEM; |
| @@ -2908,9 +3056,6 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm) | |||
| 2908 | } | 3056 | } |
| 2909 | 3057 | ||
| 2910 | sctx->send_progress = sctx->cur_ino + 1; | 3058 | sctx->send_progress = sctx->cur_ino + 1; |
| 2911 | ret = del_waiting_dir_move(sctx, pm->ino); | ||
| 2912 | ASSERT(ret == 0); | ||
| 2913 | |||
| 2914 | ret = get_cur_path(sctx, pm->ino, pm->gen, to_path); | 3059 | ret = get_cur_path(sctx, pm->ino, pm->gen, to_path); |
| 2915 | if (ret < 0) | 3060 | if (ret < 0) |
| 2916 | goto out; | 3061 | goto out; |
| @@ -2919,6 +3064,35 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm) | |||
| 2919 | if (ret < 0) | 3064 | if (ret < 0) |
| 2920 | goto out; | 3065 | goto out; |
| 2921 | 3066 | ||
| 3067 | if (rmdir_ino) { | ||
| 3068 | struct orphan_dir_info *odi; | ||
| 3069 | |||
| 3070 | odi = get_orphan_dir_info(sctx, rmdir_ino); | ||
| 3071 | if (!odi) { | ||
| 3072 | /* already deleted */ | ||
| 3073 | goto finish; | ||
| 3074 | } | ||
| 3075 | ret = can_rmdir(sctx, rmdir_ino, odi->gen, sctx->cur_ino + 1); | ||
| 3076 | if (ret < 0) | ||
| 3077 | goto out; | ||
| 3078 | if (!ret) | ||
| 3079 | goto finish; | ||
| 3080 | |||
| 3081 | name = fs_path_alloc(); | ||
| 3082 | if (!name) { | ||
| 3083 | ret = -ENOMEM; | ||
| 3084 | goto out; | ||
| 3085 | } | ||
| 3086 | ret = get_cur_path(sctx, rmdir_ino, odi->gen, name); | ||
| 3087 | if (ret < 0) | ||
| 3088 | goto out; | ||
| 3089 | ret = send_rmdir(sctx, name); | ||
| 3090 | if (ret < 0) | ||
| 3091 | goto out; | ||
| 3092 | free_orphan_dir_info(sctx, odi); | ||
| 3093 | } | ||
| 3094 | |||
| 3095 | finish: | ||
| 2922 | ret = send_utimes(sctx, pm->ino, pm->gen); | 3096 | ret = send_utimes(sctx, pm->ino, pm->gen); |
| 2923 | if (ret < 0) | 3097 | if (ret < 0) |
| 2924 | goto out; | 3098 | goto out; |
| @@ -2928,12 +3102,15 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm) | |||
| 2928 | * and old parent(s). | 3102 | * and old parent(s). |
| 2929 | */ | 3103 | */ |
| 2930 | list_for_each_entry(cur, &pm->update_refs, list) { | 3104 | list_for_each_entry(cur, &pm->update_refs, list) { |
| 3105 | if (cur->dir == rmdir_ino) | ||
| 3106 | continue; | ||
| 2931 | ret = send_utimes(sctx, cur->dir, cur->dir_gen); | 3107 | ret = send_utimes(sctx, cur->dir, cur->dir_gen); |
| 2932 | if (ret < 0) | 3108 | if (ret < 0) |
| 2933 | goto out; | 3109 | goto out; |
| 2934 | } | 3110 | } |
| 2935 | 3111 | ||
| 2936 | out: | 3112 | out: |
| 3113 | fs_path_free(name); | ||
| 2937 | fs_path_free(from_path); | 3114 | fs_path_free(from_path); |
| 2938 | fs_path_free(to_path); | 3115 | fs_path_free(to_path); |
| 2939 | sctx->send_progress = orig_progress; | 3116 | sctx->send_progress = orig_progress; |
| @@ -3005,17 +3182,19 @@ static int wait_for_parent_move(struct send_ctx *sctx, | |||
| 3005 | int ret; | 3182 | int ret; |
| 3006 | u64 ino = parent_ref->dir; | 3183 | u64 ino = parent_ref->dir; |
| 3007 | u64 parent_ino_before, parent_ino_after; | 3184 | u64 parent_ino_before, parent_ino_after; |
| 3008 | u64 new_gen, old_gen; | 3185 | u64 old_gen; |
| 3009 | struct fs_path *path_before = NULL; | 3186 | struct fs_path *path_before = NULL; |
| 3010 | struct fs_path *path_after = NULL; | 3187 | struct fs_path *path_after = NULL; |
| 3011 | int len1, len2; | 3188 | int len1, len2; |
| 3012 | 3189 | int register_upper_dirs; | |
| 3013 | if (parent_ref->dir <= sctx->cur_ino) | 3190 | u64 gen; |
| 3014 | return 0; | ||
| 3015 | 3191 | ||
| 3016 | if (is_waiting_for_move(sctx, ino)) | 3192 | if (is_waiting_for_move(sctx, ino)) |
| 3017 | return 1; | 3193 | return 1; |
| 3018 | 3194 | ||
| 3195 | if (parent_ref->dir <= sctx->cur_ino) | ||
| 3196 | return 0; | ||
| 3197 | |||
| 3019 | ret = get_inode_info(sctx->parent_root, ino, NULL, &old_gen, | 3198 | ret = get_inode_info(sctx->parent_root, ino, NULL, &old_gen, |
| 3020 | NULL, NULL, NULL, NULL); | 3199 | NULL, NULL, NULL, NULL); |
| 3021 | if (ret == -ENOENT) | 3200 | if (ret == -ENOENT) |
| @@ -3023,12 +3202,7 @@ static int wait_for_parent_move(struct send_ctx *sctx, | |||
| 3023 | else if (ret < 0) | 3202 | else if (ret < 0) |
| 3024 | return ret; | 3203 | return ret; |
| 3025 | 3204 | ||
| 3026 | ret = get_inode_info(sctx->send_root, ino, NULL, &new_gen, | 3205 | if (parent_ref->dir_gen != old_gen) |
| 3027 | NULL, NULL, NULL, NULL); | ||
| 3028 | if (ret < 0) | ||
| 3029 | return ret; | ||
| 3030 | |||
| 3031 | if (new_gen != old_gen) | ||
| 3032 | return 0; | 3206 | return 0; |
| 3033 | 3207 | ||
| 3034 | path_before = fs_path_alloc(); | 3208 | path_before = fs_path_alloc(); |
| @@ -3051,7 +3225,7 @@ static int wait_for_parent_move(struct send_ctx *sctx, | |||
| 3051 | } | 3225 | } |
| 3052 | 3226 | ||
| 3053 | ret = get_first_ref(sctx->send_root, ino, &parent_ino_after, | 3227 | ret = get_first_ref(sctx->send_root, ino, &parent_ino_after, |
| 3054 | NULL, path_after); | 3228 | &gen, path_after); |
| 3055 | if (ret == -ENOENT) { | 3229 | if (ret == -ENOENT) { |
| 3056 | ret = 0; | 3230 | ret = 0; |
| 3057 | goto out; | 3231 | goto out; |
| @@ -3061,13 +3235,67 @@ static int wait_for_parent_move(struct send_ctx *sctx, | |||
| 3061 | 3235 | ||
| 3062 | len1 = fs_path_len(path_before); | 3236 | len1 = fs_path_len(path_before); |
| 3063 | len2 = fs_path_len(path_after); | 3237 | len2 = fs_path_len(path_after); |
| 3064 | if ((parent_ino_before != parent_ino_after) && (len1 != len2 || | 3238 | if (parent_ino_before != parent_ino_after || len1 != len2 || |
| 3065 | memcmp(path_before->start, path_after->start, len1))) { | 3239 | memcmp(path_before->start, path_after->start, len1)) { |
| 3066 | ret = 1; | 3240 | ret = 1; |
| 3067 | goto out; | 3241 | goto out; |
| 3068 | } | 3242 | } |
| 3069 | ret = 0; | 3243 | ret = 0; |
| 3070 | 3244 | ||
| 3245 | /* | ||
| 3246 | * Ok, our new most direct ancestor has a higher inode number but | ||
| 3247 | * wasn't moved/renamed. So maybe some of the new ancestors higher in | ||
| 3248 | * the hierarchy have an higher inode number too *and* were renamed | ||
| 3249 | * or moved - in this case we need to wait for the ancestor's rename | ||
| 3250 | * or move operation before we can do the move/rename for the current | ||
| 3251 | * inode. | ||
| 3252 | */ | ||
| 3253 | register_upper_dirs = 0; | ||
| 3254 | ino = parent_ino_after; | ||
| 3255 | again: | ||
| 3256 | while ((ret == 0 || register_upper_dirs) && ino > sctx->cur_ino) { | ||
| 3257 | u64 parent_gen; | ||
| 3258 | |||
| 3259 | fs_path_reset(path_before); | ||
| 3260 | fs_path_reset(path_after); | ||
| 3261 | |||
| 3262 | ret = get_first_ref(sctx->send_root, ino, &parent_ino_after, | ||
| 3263 | &parent_gen, path_after); | ||
| 3264 | if (ret < 0) | ||
| 3265 | goto out; | ||
| 3266 | ret = get_first_ref(sctx->parent_root, ino, &parent_ino_before, | ||
| 3267 | NULL, path_before); | ||
| 3268 | if (ret == -ENOENT) { | ||
| 3269 | ret = 0; | ||
| 3270 | break; | ||
| 3271 | } else if (ret < 0) { | ||
| 3272 | goto out; | ||
| 3273 | } | ||
| 3274 | |||
| 3275 | len1 = fs_path_len(path_before); | ||
| 3276 | len2 = fs_path_len(path_after); | ||
| 3277 | if (parent_ino_before != parent_ino_after || len1 != len2 || | ||
| 3278 | memcmp(path_before->start, path_after->start, len1)) { | ||
| 3279 | ret = 1; | ||
| 3280 | if (register_upper_dirs) { | ||
| 3281 | break; | ||
| 3282 | } else { | ||
| 3283 | register_upper_dirs = 1; | ||
| 3284 | ino = parent_ref->dir; | ||
| 3285 | gen = parent_ref->dir_gen; | ||
| 3286 | goto again; | ||
| 3287 | } | ||
| 3288 | } else if (register_upper_dirs) { | ||
| 3289 | ret = add_pending_dir_move(sctx, ino, gen, | ||
| 3290 | parent_ino_after); | ||
| 3291 | if (ret < 0 && ret != -EEXIST) | ||
| 3292 | goto out; | ||
| 3293 | } | ||
| 3294 | |||
| 3295 | ino = parent_ino_after; | ||
| 3296 | gen = parent_gen; | ||
| 3297 | } | ||
| 3298 | |||
| 3071 | out: | 3299 | out: |
| 3072 | fs_path_free(path_before); | 3300 | fs_path_free(path_before); |
| 3073 | fs_path_free(path_after); | 3301 | fs_path_free(path_after); |
| @@ -3089,6 +3317,7 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move) | |||
| 3089 | u64 ow_gen; | 3317 | u64 ow_gen; |
| 3090 | int did_overwrite = 0; | 3318 | int did_overwrite = 0; |
| 3091 | int is_orphan = 0; | 3319 | int is_orphan = 0; |
| 3320 | u64 last_dir_ino_rm = 0; | ||
| 3092 | 3321 | ||
| 3093 | verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); | 3322 | verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); |
| 3094 | 3323 | ||
| @@ -3227,9 +3456,14 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); | |||
| 3227 | * dirs, we always have one new and one deleted | 3456 | * dirs, we always have one new and one deleted |
| 3228 | * ref. The deleted ref is ignored later. | 3457 | * ref. The deleted ref is ignored later. |
| 3229 | */ | 3458 | */ |
| 3230 | if (wait_for_parent_move(sctx, cur)) { | 3459 | ret = wait_for_parent_move(sctx, cur); |
| 3460 | if (ret < 0) | ||
| 3461 | goto out; | ||
| 3462 | if (ret) { | ||
| 3231 | ret = add_pending_dir_move(sctx, | 3463 | ret = add_pending_dir_move(sctx, |
| 3232 | cur->dir); | 3464 | sctx->cur_ino, |
| 3465 | sctx->cur_inode_gen, | ||
| 3466 | cur->dir); | ||
| 3233 | *pending_move = 1; | 3467 | *pending_move = 1; |
| 3234 | } else { | 3468 | } else { |
| 3235 | ret = send_rename(sctx, valid_path, | 3469 | ret = send_rename(sctx, valid_path, |
| @@ -3259,7 +3493,8 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); | |||
| 3259 | * later, we do this check again and rmdir it then if possible. | 3493 | * later, we do this check again and rmdir it then if possible. |
| 3260 | * See the use of check_dirs for more details. | 3494 | * See the use of check_dirs for more details. |
| 3261 | */ | 3495 | */ |
| 3262 | ret = can_rmdir(sctx, sctx->cur_ino, sctx->cur_ino); | 3496 | ret = can_rmdir(sctx, sctx->cur_ino, sctx->cur_inode_gen, |
| 3497 | sctx->cur_ino); | ||
| 3263 | if (ret < 0) | 3498 | if (ret < 0) |
| 3264 | goto out; | 3499 | goto out; |
| 3265 | if (ret) { | 3500 | if (ret) { |
| @@ -3350,8 +3585,10 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); | |||
| 3350 | ret = send_utimes(sctx, cur->dir, cur->dir_gen); | 3585 | ret = send_utimes(sctx, cur->dir, cur->dir_gen); |
| 3351 | if (ret < 0) | 3586 | if (ret < 0) |
| 3352 | goto out; | 3587 | goto out; |
| 3353 | } else if (ret == inode_state_did_delete) { | 3588 | } else if (ret == inode_state_did_delete && |
| 3354 | ret = can_rmdir(sctx, cur->dir, sctx->cur_ino); | 3589 | cur->dir != last_dir_ino_rm) { |
| 3590 | ret = can_rmdir(sctx, cur->dir, cur->dir_gen, | ||
| 3591 | sctx->cur_ino); | ||
| 3355 | if (ret < 0) | 3592 | if (ret < 0) |
| 3356 | goto out; | 3593 | goto out; |
| 3357 | if (ret) { | 3594 | if (ret) { |
| @@ -3362,6 +3599,7 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); | |||
| 3362 | ret = send_rmdir(sctx, valid_path); | 3599 | ret = send_rmdir(sctx, valid_path); |
| 3363 | if (ret < 0) | 3600 | if (ret < 0) |
| 3364 | goto out; | 3601 | goto out; |
| 3602 | last_dir_ino_rm = cur->dir; | ||
| 3365 | } | 3603 | } |
| 3366 | } | 3604 | } |
| 3367 | } | 3605 | } |
| @@ -3375,9 +3613,8 @@ out: | |||
| 3375 | return ret; | 3613 | return ret; |
| 3376 | } | 3614 | } |
| 3377 | 3615 | ||
| 3378 | static int __record_new_ref(int num, u64 dir, int index, | 3616 | static int record_ref(struct btrfs_root *root, int num, u64 dir, int index, |
| 3379 | struct fs_path *name, | 3617 | struct fs_path *name, void *ctx, struct list_head *refs) |
| 3380 | void *ctx) | ||
| 3381 | { | 3618 | { |
| 3382 | int ret = 0; | 3619 | int ret = 0; |
| 3383 | struct send_ctx *sctx = ctx; | 3620 | struct send_ctx *sctx = ctx; |
| @@ -3388,7 +3625,7 @@ static int __record_new_ref(int num, u64 dir, int index, | |||
| 3388 | if (!p) | 3625 | if (!p) |
| 3389 | return -ENOMEM; | 3626 | return -ENOMEM; |
| 3390 | 3627 | ||
| 3391 | ret = get_inode_info(sctx->send_root, dir, NULL, &gen, NULL, NULL, | 3628 | ret = get_inode_info(root, dir, NULL, &gen, NULL, NULL, |
| 3392 | NULL, NULL); | 3629 | NULL, NULL); |
| 3393 | if (ret < 0) | 3630 | if (ret < 0) |
| 3394 | goto out; | 3631 | goto out; |
| @@ -3400,7 +3637,7 @@ static int __record_new_ref(int num, u64 dir, int index, | |||
| 3400 | if (ret < 0) | 3637 | if (ret < 0) |
| 3401 | goto out; | 3638 | goto out; |
| 3402 | 3639 | ||
| 3403 | ret = record_ref(&sctx->new_refs, dir, gen, p); | 3640 | ret = __record_ref(refs, dir, gen, p); |
| 3404 | 3641 | ||
| 3405 | out: | 3642 | out: |
| 3406 | if (ret) | 3643 | if (ret) |
| @@ -3408,37 +3645,23 @@ out: | |||
| 3408 | return ret; | 3645 | return ret; |
| 3409 | } | 3646 | } |
| 3410 | 3647 | ||
| 3648 | static int __record_new_ref(int num, u64 dir, int index, | ||
| 3649 | struct fs_path *name, | ||
| 3650 | void *ctx) | ||
| 3651 | { | ||
| 3652 | struct send_ctx *sctx = ctx; | ||
| 3653 | return record_ref(sctx->send_root, num, dir, index, name, | ||
| 3654 | ctx, &sctx->new_refs); | ||
| 3655 | } | ||
| 3656 | |||
| 3657 | |||
| 3411 | static int __record_deleted_ref(int num, u64 dir, int index, | 3658 | static int __record_deleted_ref(int num, u64 dir, int index, |
| 3412 | struct fs_path *name, | 3659 | struct fs_path *name, |
| 3413 | void *ctx) | 3660 | void *ctx) |
| 3414 | { | 3661 | { |
| 3415 | int ret = 0; | ||
| 3416 | struct send_ctx *sctx = ctx; | 3662 | struct send_ctx *sctx = ctx; |
| 3417 | struct fs_path *p; | 3663 | return record_ref(sctx->parent_root, num, dir, index, name, |
| 3418 | u64 gen; | 3664 | ctx, &sctx->deleted_refs); |
| 3419 | |||
| 3420 | p = fs_path_alloc(); | ||
| 3421 | if (!p) | ||
| 3422 | return -ENOMEM; | ||
| 3423 | |||
| 3424 | ret = get_inode_info(sctx->parent_root, dir, NULL, &gen, NULL, NULL, | ||
| 3425 | NULL, NULL); | ||
| 3426 | if (ret < 0) | ||
| 3427 | goto out; | ||
| 3428 | |||
| 3429 | ret = get_cur_path(sctx, dir, gen, p); | ||
| 3430 | if (ret < 0) | ||
| 3431 | goto out; | ||
| 3432 | ret = fs_path_add_path(p, name); | ||
| 3433 | if (ret < 0) | ||
| 3434 | goto out; | ||
| 3435 | |||
| 3436 | ret = record_ref(&sctx->deleted_refs, dir, gen, p); | ||
| 3437 | |||
| 3438 | out: | ||
| 3439 | if (ret) | ||
| 3440 | fs_path_free(p); | ||
| 3441 | return ret; | ||
| 3442 | } | 3665 | } |
| 3443 | 3666 | ||
| 3444 | static int record_new_ref(struct send_ctx *sctx) | 3667 | static int record_new_ref(struct send_ctx *sctx) |
| @@ -3619,21 +3842,31 @@ static int process_all_refs(struct send_ctx *sctx, | |||
| 3619 | root = sctx->parent_root; | 3842 | root = sctx->parent_root; |
| 3620 | cb = __record_deleted_ref; | 3843 | cb = __record_deleted_ref; |
| 3621 | } else { | 3844 | } else { |
| 3622 | BUG(); | 3845 | btrfs_err(sctx->send_root->fs_info, |
| 3846 | "Wrong command %d in process_all_refs", cmd); | ||
| 3847 | ret = -EINVAL; | ||
| 3848 | goto out; | ||
| 3623 | } | 3849 | } |
| 3624 | 3850 | ||
| 3625 | key.objectid = sctx->cmp_key->objectid; | 3851 | key.objectid = sctx->cmp_key->objectid; |
| 3626 | key.type = BTRFS_INODE_REF_KEY; | 3852 | key.type = BTRFS_INODE_REF_KEY; |
| 3627 | key.offset = 0; | 3853 | key.offset = 0; |
| 3628 | while (1) { | 3854 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); |
| 3629 | ret = btrfs_search_slot_for_read(root, &key, path, 1, 0); | 3855 | if (ret < 0) |
| 3630 | if (ret < 0) | 3856 | goto out; |
| 3631 | goto out; | ||
| 3632 | if (ret) | ||
| 3633 | break; | ||
| 3634 | 3857 | ||
| 3858 | while (1) { | ||
| 3635 | eb = path->nodes[0]; | 3859 | eb = path->nodes[0]; |
| 3636 | slot = path->slots[0]; | 3860 | slot = path->slots[0]; |
| 3861 | if (slot >= btrfs_header_nritems(eb)) { | ||
| 3862 | ret = btrfs_next_leaf(root, path); | ||
| 3863 | if (ret < 0) | ||
| 3864 | goto out; | ||
| 3865 | else if (ret > 0) | ||
| 3866 | break; | ||
| 3867 | continue; | ||
| 3868 | } | ||
| 3869 | |||
| 3637 | btrfs_item_key_to_cpu(eb, &found_key, slot); | 3870 | btrfs_item_key_to_cpu(eb, &found_key, slot); |
| 3638 | 3871 | ||
| 3639 | if (found_key.objectid != key.objectid || | 3872 | if (found_key.objectid != key.objectid || |
| @@ -3642,11 +3875,10 @@ static int process_all_refs(struct send_ctx *sctx, | |||
| 3642 | break; | 3875 | break; |
| 3643 | 3876 | ||
| 3644 | ret = iterate_inode_ref(root, path, &found_key, 0, cb, sctx); | 3877 | ret = iterate_inode_ref(root, path, &found_key, 0, cb, sctx); |
| 3645 | btrfs_release_path(path); | ||
| 3646 | if (ret < 0) | 3878 | if (ret < 0) |
| 3647 | goto out; | 3879 | goto out; |
| 3648 | 3880 | ||
| 3649 | key.offset = found_key.offset + 1; | 3881 | path->slots[0]++; |
| 3650 | } | 3882 | } |
| 3651 | btrfs_release_path(path); | 3883 | btrfs_release_path(path); |
| 3652 | 3884 | ||
| @@ -3927,19 +4159,25 @@ static int process_all_new_xattrs(struct send_ctx *sctx) | |||
| 3927 | key.objectid = sctx->cmp_key->objectid; | 4159 | key.objectid = sctx->cmp_key->objectid; |
| 3928 | key.type = BTRFS_XATTR_ITEM_KEY; | 4160 | key.type = BTRFS_XATTR_ITEM_KEY; |
| 3929 | key.offset = 0; | 4161 | key.offset = 0; |
| 3930 | while (1) { | 4162 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); |
| 3931 | ret = btrfs_search_slot_for_read(root, &key, path, 1, 0); | 4163 | if (ret < 0) |
| 3932 | if (ret < 0) | 4164 | goto out; |
| 3933 | goto out; | ||
| 3934 | if (ret) { | ||
| 3935 | ret = 0; | ||
| 3936 | goto out; | ||
| 3937 | } | ||
| 3938 | 4165 | ||
| 4166 | while (1) { | ||
| 3939 | eb = path->nodes[0]; | 4167 | eb = path->nodes[0]; |
| 3940 | slot = path->slots[0]; | 4168 | slot = path->slots[0]; |
| 3941 | btrfs_item_key_to_cpu(eb, &found_key, slot); | 4169 | if (slot >= btrfs_header_nritems(eb)) { |
| 4170 | ret = btrfs_next_leaf(root, path); | ||
| 4171 | if (ret < 0) { | ||
| 4172 | goto out; | ||
| 4173 | } else if (ret > 0) { | ||
| 4174 | ret = 0; | ||
| 4175 | break; | ||
| 4176 | } | ||
| 4177 | continue; | ||
| 4178 | } | ||
| 3942 | 4179 | ||
| 4180 | btrfs_item_key_to_cpu(eb, &found_key, slot); | ||
| 3943 | if (found_key.objectid != key.objectid || | 4181 | if (found_key.objectid != key.objectid || |
| 3944 | found_key.type != key.type) { | 4182 | found_key.type != key.type) { |
| 3945 | ret = 0; | 4183 | ret = 0; |
| @@ -3951,8 +4189,7 @@ static int process_all_new_xattrs(struct send_ctx *sctx) | |||
| 3951 | if (ret < 0) | 4189 | if (ret < 0) |
| 3952 | goto out; | 4190 | goto out; |
| 3953 | 4191 | ||
| 3954 | btrfs_release_path(path); | 4192 | path->slots[0]++; |
| 3955 | key.offset = found_key.offset + 1; | ||
| 3956 | } | 4193 | } |
| 3957 | 4194 | ||
| 3958 | out: | 4195 | out: |
| @@ -3991,6 +4228,13 @@ static ssize_t fill_read_buf(struct send_ctx *sctx, u64 offset, u32 len) | |||
| 3991 | goto out; | 4228 | goto out; |
| 3992 | 4229 | ||
| 3993 | last_index = (offset + len - 1) >> PAGE_CACHE_SHIFT; | 4230 | last_index = (offset + len - 1) >> PAGE_CACHE_SHIFT; |
| 4231 | |||
| 4232 | /* initial readahead */ | ||
| 4233 | memset(&sctx->ra, 0, sizeof(struct file_ra_state)); | ||
| 4234 | file_ra_state_init(&sctx->ra, inode->i_mapping); | ||
| 4235 | btrfs_force_ra(inode->i_mapping, &sctx->ra, NULL, index, | ||
| 4236 | last_index - index + 1); | ||
| 4237 | |||
| 3994 | while (index <= last_index) { | 4238 | while (index <= last_index) { |
| 3995 | unsigned cur_len = min_t(unsigned, len, | 4239 | unsigned cur_len = min_t(unsigned, len, |
| 3996 | PAGE_CACHE_SIZE - pg_offset); | 4240 | PAGE_CACHE_SIZE - pg_offset); |
| @@ -4763,18 +5007,19 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end) | |||
| 4763 | ret = apply_children_dir_moves(sctx); | 5007 | ret = apply_children_dir_moves(sctx); |
| 4764 | if (ret) | 5008 | if (ret) |
| 4765 | goto out; | 5009 | goto out; |
| 5010 | /* | ||
| 5011 | * Need to send that every time, no matter if it actually | ||
| 5012 | * changed between the two trees as we have done changes to | ||
| 5013 | * the inode before. If our inode is a directory and it's | ||
| 5014 | * waiting to be moved/renamed, we will send its utimes when | ||
| 5015 | * it's moved/renamed, therefore we don't need to do it here. | ||
| 5016 | */ | ||
| 5017 | sctx->send_progress = sctx->cur_ino + 1; | ||
| 5018 | ret = send_utimes(sctx, sctx->cur_ino, sctx->cur_inode_gen); | ||
| 5019 | if (ret < 0) | ||
| 5020 | goto out; | ||
| 4766 | } | 5021 | } |
| 4767 | 5022 | ||
| 4768 | /* | ||
| 4769 | * Need to send that every time, no matter if it actually | ||
| 4770 | * changed between the two trees as we have done changes to | ||
| 4771 | * the inode before. | ||
| 4772 | */ | ||
| 4773 | sctx->send_progress = sctx->cur_ino + 1; | ||
| 4774 | ret = send_utimes(sctx, sctx->cur_ino, sctx->cur_inode_gen); | ||
| 4775 | if (ret < 0) | ||
| 4776 | goto out; | ||
| 4777 | |||
| 4778 | out: | 5023 | out: |
| 4779 | return ret; | 5024 | return ret; |
| 4780 | } | 5025 | } |
| @@ -4840,6 +5085,8 @@ static int changed_inode(struct send_ctx *sctx, | |||
| 4840 | sctx->left_path->nodes[0], left_ii); | 5085 | sctx->left_path->nodes[0], left_ii); |
| 4841 | sctx->cur_inode_mode = btrfs_inode_mode( | 5086 | sctx->cur_inode_mode = btrfs_inode_mode( |
| 4842 | sctx->left_path->nodes[0], left_ii); | 5087 | sctx->left_path->nodes[0], left_ii); |
| 5088 | sctx->cur_inode_rdev = btrfs_inode_rdev( | ||
| 5089 | sctx->left_path->nodes[0], left_ii); | ||
| 4843 | if (sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID) | 5090 | if (sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID) |
| 4844 | ret = send_create_inode_if_needed(sctx); | 5091 | ret = send_create_inode_if_needed(sctx); |
| 4845 | } else if (result == BTRFS_COMPARE_TREE_DELETED) { | 5092 | } else if (result == BTRFS_COMPARE_TREE_DELETED) { |
| @@ -4884,6 +5131,8 @@ static int changed_inode(struct send_ctx *sctx, | |||
| 4884 | sctx->left_path->nodes[0], left_ii); | 5131 | sctx->left_path->nodes[0], left_ii); |
| 4885 | sctx->cur_inode_mode = btrfs_inode_mode( | 5132 | sctx->cur_inode_mode = btrfs_inode_mode( |
| 4886 | sctx->left_path->nodes[0], left_ii); | 5133 | sctx->left_path->nodes[0], left_ii); |
| 5134 | sctx->cur_inode_rdev = btrfs_inode_rdev( | ||
| 5135 | sctx->left_path->nodes[0], left_ii); | ||
| 4887 | ret = send_create_inode_if_needed(sctx); | 5136 | ret = send_create_inode_if_needed(sctx); |
| 4888 | if (ret < 0) | 5137 | if (ret < 0) |
| 4889 | goto out; | 5138 | goto out; |
| @@ -5118,6 +5367,7 @@ out: | |||
| 5118 | static int full_send_tree(struct send_ctx *sctx) | 5367 | static int full_send_tree(struct send_ctx *sctx) |
| 5119 | { | 5368 | { |
| 5120 | int ret; | 5369 | int ret; |
| 5370 | struct btrfs_trans_handle *trans = NULL; | ||
| 5121 | struct btrfs_root *send_root = sctx->send_root; | 5371 | struct btrfs_root *send_root = sctx->send_root; |
| 5122 | struct btrfs_key key; | 5372 | struct btrfs_key key; |
| 5123 | struct btrfs_key found_key; | 5373 | struct btrfs_key found_key; |
| @@ -5139,6 +5389,19 @@ static int full_send_tree(struct send_ctx *sctx) | |||
| 5139 | key.type = BTRFS_INODE_ITEM_KEY; | 5389 | key.type = BTRFS_INODE_ITEM_KEY; |
| 5140 | key.offset = 0; | 5390 | key.offset = 0; |
| 5141 | 5391 | ||
| 5392 | join_trans: | ||
| 5393 | /* | ||
| 5394 | * We need to make sure the transaction does not get committed | ||
| 5395 | * while we do anything on commit roots. Join a transaction to prevent | ||
| 5396 | * this. | ||
| 5397 | */ | ||
| 5398 | trans = btrfs_join_transaction(send_root); | ||
| 5399 | if (IS_ERR(trans)) { | ||
| 5400 | ret = PTR_ERR(trans); | ||
| 5401 | trans = NULL; | ||
| 5402 | goto out; | ||
| 5403 | } | ||
| 5404 | |||
| 5142 | /* | 5405 | /* |
| 5143 | * Make sure the tree has not changed after re-joining. We detect this | 5406 | * Make sure the tree has not changed after re-joining. We detect this |
| 5144 | * by comparing start_ctransid and ctransid. They should always match. | 5407 | * by comparing start_ctransid and ctransid. They should always match. |
| @@ -5162,6 +5425,19 @@ static int full_send_tree(struct send_ctx *sctx) | |||
| 5162 | goto out_finish; | 5425 | goto out_finish; |
| 5163 | 5426 | ||
| 5164 | while (1) { | 5427 | while (1) { |
| 5428 | /* | ||
| 5429 | * When someone want to commit while we iterate, end the | ||
| 5430 | * joined transaction and rejoin. | ||
| 5431 | */ | ||
| 5432 | if (btrfs_should_end_transaction(trans, send_root)) { | ||
| 5433 | ret = btrfs_end_transaction(trans, send_root); | ||
| 5434 | trans = NULL; | ||
| 5435 | if (ret < 0) | ||
| 5436 | goto out; | ||
| 5437 | btrfs_release_path(path); | ||
| 5438 | goto join_trans; | ||
| 5439 | } | ||
| 5440 | |||
| 5165 | eb = path->nodes[0]; | 5441 | eb = path->nodes[0]; |
| 5166 | slot = path->slots[0]; | 5442 | slot = path->slots[0]; |
| 5167 | btrfs_item_key_to_cpu(eb, &found_key, slot); | 5443 | btrfs_item_key_to_cpu(eb, &found_key, slot); |
| @@ -5189,6 +5465,12 @@ out_finish: | |||
| 5189 | 5465 | ||
| 5190 | out: | 5466 | out: |
| 5191 | btrfs_free_path(path); | 5467 | btrfs_free_path(path); |
| 5468 | if (trans) { | ||
| 5469 | if (!ret) | ||
| 5470 | ret = btrfs_end_transaction(trans, send_root); | ||
| 5471 | else | ||
| 5472 | btrfs_end_transaction(trans, send_root); | ||
| 5473 | } | ||
| 5192 | return ret; | 5474 | return ret; |
| 5193 | } | 5475 | } |
| 5194 | 5476 | ||
| @@ -5340,6 +5622,7 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) | |||
| 5340 | 5622 | ||
| 5341 | sctx->pending_dir_moves = RB_ROOT; | 5623 | sctx->pending_dir_moves = RB_ROOT; |
| 5342 | sctx->waiting_dir_moves = RB_ROOT; | 5624 | sctx->waiting_dir_moves = RB_ROOT; |
| 5625 | sctx->orphan_dirs = RB_ROOT; | ||
| 5343 | 5626 | ||
| 5344 | sctx->clone_roots = vzalloc(sizeof(struct clone_root) * | 5627 | sctx->clone_roots = vzalloc(sizeof(struct clone_root) * |
| 5345 | (arg->clone_sources_count + 1)); | 5628 | (arg->clone_sources_count + 1)); |
| @@ -5477,6 +5760,16 @@ out: | |||
| 5477 | kfree(dm); | 5760 | kfree(dm); |
| 5478 | } | 5761 | } |
| 5479 | 5762 | ||
| 5763 | WARN_ON(sctx && !ret && !RB_EMPTY_ROOT(&sctx->orphan_dirs)); | ||
| 5764 | while (sctx && !RB_EMPTY_ROOT(&sctx->orphan_dirs)) { | ||
| 5765 | struct rb_node *n; | ||
| 5766 | struct orphan_dir_info *odi; | ||
| 5767 | |||
| 5768 | n = rb_first(&sctx->orphan_dirs); | ||
| 5769 | odi = rb_entry(n, struct orphan_dir_info, node); | ||
| 5770 | free_orphan_dir_info(sctx, odi); | ||
| 5771 | } | ||
| 5772 | |||
| 5480 | if (sort_clone_roots) { | 5773 | if (sort_clone_roots) { |
| 5481 | for (i = 0; i < sctx->clone_roots_cnt; i++) | 5774 | for (i = 0; i < sctx->clone_roots_cnt; i++) |
| 5482 | btrfs_root_dec_send_in_progress( | 5775 | btrfs_root_dec_send_in_progress( |
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index d04db817be5c..9dbf42395153 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c | |||
| @@ -1305,13 +1305,6 @@ error_fs_info: | |||
| 1305 | return ERR_PTR(error); | 1305 | return ERR_PTR(error); |
| 1306 | } | 1306 | } |
| 1307 | 1307 | ||
| 1308 | static void btrfs_set_max_workers(struct btrfs_workers *workers, int new_limit) | ||
| 1309 | { | ||
| 1310 | spin_lock_irq(&workers->lock); | ||
| 1311 | workers->max_workers = new_limit; | ||
| 1312 | spin_unlock_irq(&workers->lock); | ||
| 1313 | } | ||
| 1314 | |||
| 1315 | static void btrfs_resize_thread_pool(struct btrfs_fs_info *fs_info, | 1308 | static void btrfs_resize_thread_pool(struct btrfs_fs_info *fs_info, |
| 1316 | int new_pool_size, int old_pool_size) | 1309 | int new_pool_size, int old_pool_size) |
| 1317 | { | 1310 | { |
| @@ -1323,21 +1316,20 @@ static void btrfs_resize_thread_pool(struct btrfs_fs_info *fs_info, | |||
| 1323 | btrfs_info(fs_info, "resize thread pool %d -> %d", | 1316 | btrfs_info(fs_info, "resize thread pool %d -> %d", |
| 1324 | old_pool_size, new_pool_size); | 1317 | old_pool_size, new_pool_size); |
| 1325 | 1318 | ||
| 1326 | btrfs_set_max_workers(&fs_info->generic_worker, new_pool_size); | 1319 | btrfs_workqueue_set_max(fs_info->workers, new_pool_size); |
| 1327 | btrfs_set_max_workers(&fs_info->workers, new_pool_size); | 1320 | btrfs_workqueue_set_max(fs_info->delalloc_workers, new_pool_size); |
| 1328 | btrfs_set_max_workers(&fs_info->delalloc_workers, new_pool_size); | 1321 | btrfs_workqueue_set_max(fs_info->submit_workers, new_pool_size); |
| 1329 | btrfs_set_max_workers(&fs_info->submit_workers, new_pool_size); | 1322 | btrfs_workqueue_set_max(fs_info->caching_workers, new_pool_size); |
| 1330 | btrfs_set_max_workers(&fs_info->caching_workers, new_pool_size); | 1323 | btrfs_workqueue_set_max(fs_info->endio_workers, new_pool_size); |
| 1331 | btrfs_set_max_workers(&fs_info->fixup_workers, new_pool_size); | 1324 | btrfs_workqueue_set_max(fs_info->endio_meta_workers, new_pool_size); |
| 1332 | btrfs_set_max_workers(&fs_info->endio_workers, new_pool_size); | 1325 | btrfs_workqueue_set_max(fs_info->endio_meta_write_workers, |
| 1333 | btrfs_set_max_workers(&fs_info->endio_meta_workers, new_pool_size); | 1326 | new_pool_size); |
| 1334 | btrfs_set_max_workers(&fs_info->endio_meta_write_workers, new_pool_size); | 1327 | btrfs_workqueue_set_max(fs_info->endio_write_workers, new_pool_size); |
| 1335 | btrfs_set_max_workers(&fs_info->endio_write_workers, new_pool_size); | 1328 | btrfs_workqueue_set_max(fs_info->endio_freespace_worker, new_pool_size); |
| 1336 | btrfs_set_max_workers(&fs_info->endio_freespace_worker, new_pool_size); | 1329 | btrfs_workqueue_set_max(fs_info->delayed_workers, new_pool_size); |
| 1337 | btrfs_set_max_workers(&fs_info->delayed_workers, new_pool_size); | 1330 | btrfs_workqueue_set_max(fs_info->readahead_workers, new_pool_size); |
| 1338 | btrfs_set_max_workers(&fs_info->readahead_workers, new_pool_size); | 1331 | btrfs_workqueue_set_max(fs_info->scrub_wr_completion_workers, |
| 1339 | btrfs_set_max_workers(&fs_info->scrub_wr_completion_workers, | 1332 | new_pool_size); |
| 1340 | new_pool_size); | ||
| 1341 | } | 1333 | } |
| 1342 | 1334 | ||
| 1343 | static inline void btrfs_remount_prepare(struct btrfs_fs_info *fs_info) | 1335 | static inline void btrfs_remount_prepare(struct btrfs_fs_info *fs_info) |
| @@ -1388,6 +1380,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) | |||
| 1388 | unsigned int old_metadata_ratio = fs_info->metadata_ratio; | 1380 | unsigned int old_metadata_ratio = fs_info->metadata_ratio; |
| 1389 | int ret; | 1381 | int ret; |
| 1390 | 1382 | ||
| 1383 | sync_filesystem(sb); | ||
| 1391 | btrfs_remount_prepare(fs_info); | 1384 | btrfs_remount_prepare(fs_info); |
| 1392 | 1385 | ||
| 1393 | ret = btrfs_parse_options(root, data); | 1386 | ret = btrfs_parse_options(root, data); |
| @@ -1479,6 +1472,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) | |||
| 1479 | sb->s_flags &= ~MS_RDONLY; | 1472 | sb->s_flags &= ~MS_RDONLY; |
| 1480 | } | 1473 | } |
| 1481 | out: | 1474 | out: |
| 1475 | wake_up_process(fs_info->transaction_kthread); | ||
| 1482 | btrfs_remount_cleanup(fs_info, old_opts); | 1476 | btrfs_remount_cleanup(fs_info, old_opts); |
| 1483 | return 0; | 1477 | return 0; |
| 1484 | 1478 | ||
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index 865f4cf9a769..c5eb2143dc66 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c | |||
| @@ -24,6 +24,7 @@ | |||
| 24 | #include <linux/kobject.h> | 24 | #include <linux/kobject.h> |
| 25 | #include <linux/bug.h> | 25 | #include <linux/bug.h> |
| 26 | #include <linux/genhd.h> | 26 | #include <linux/genhd.h> |
| 27 | #include <linux/debugfs.h> | ||
| 27 | 28 | ||
| 28 | #include "ctree.h" | 29 | #include "ctree.h" |
| 29 | #include "disk-io.h" | 30 | #include "disk-io.h" |
| @@ -599,6 +600,12 @@ static int add_device_membership(struct btrfs_fs_info *fs_info) | |||
| 599 | /* /sys/fs/btrfs/ entry */ | 600 | /* /sys/fs/btrfs/ entry */ |
| 600 | static struct kset *btrfs_kset; | 601 | static struct kset *btrfs_kset; |
| 601 | 602 | ||
| 603 | /* /sys/kernel/debug/btrfs */ | ||
| 604 | static struct dentry *btrfs_debugfs_root_dentry; | ||
| 605 | |||
| 606 | /* Debugging tunables and exported data */ | ||
| 607 | u64 btrfs_debugfs_test; | ||
| 608 | |||
| 602 | int btrfs_sysfs_add_one(struct btrfs_fs_info *fs_info) | 609 | int btrfs_sysfs_add_one(struct btrfs_fs_info *fs_info) |
| 603 | { | 610 | { |
| 604 | int error; | 611 | int error; |
| @@ -642,27 +649,41 @@ failure: | |||
| 642 | return error; | 649 | return error; |
| 643 | } | 650 | } |
| 644 | 651 | ||
| 652 | static int btrfs_init_debugfs(void) | ||
| 653 | { | ||
| 654 | #ifdef CONFIG_DEBUG_FS | ||
| 655 | btrfs_debugfs_root_dentry = debugfs_create_dir("btrfs", NULL); | ||
| 656 | if (!btrfs_debugfs_root_dentry) | ||
| 657 | return -ENOMEM; | ||
| 658 | |||
| 659 | debugfs_create_u64("test", S_IRUGO | S_IWUGO, btrfs_debugfs_root_dentry, | ||
| 660 | &btrfs_debugfs_test); | ||
| 661 | #endif | ||
| 662 | return 0; | ||
| 663 | } | ||
| 664 | |||
| 645 | int btrfs_init_sysfs(void) | 665 | int btrfs_init_sysfs(void) |
| 646 | { | 666 | { |
| 647 | int ret; | 667 | int ret; |
| 668 | |||
| 648 | btrfs_kset = kset_create_and_add("btrfs", NULL, fs_kobj); | 669 | btrfs_kset = kset_create_and_add("btrfs", NULL, fs_kobj); |
| 649 | if (!btrfs_kset) | 670 | if (!btrfs_kset) |
| 650 | return -ENOMEM; | 671 | return -ENOMEM; |
| 651 | 672 | ||
| 652 | init_feature_attrs(); | 673 | ret = btrfs_init_debugfs(); |
| 674 | if (ret) | ||
| 675 | return ret; | ||
| 653 | 676 | ||
| 677 | init_feature_attrs(); | ||
| 654 | ret = sysfs_create_group(&btrfs_kset->kobj, &btrfs_feature_attr_group); | 678 | ret = sysfs_create_group(&btrfs_kset->kobj, &btrfs_feature_attr_group); |
| 655 | if (ret) { | ||
| 656 | kset_unregister(btrfs_kset); | ||
| 657 | return ret; | ||
| 658 | } | ||
| 659 | 679 | ||
| 660 | return 0; | 680 | return ret; |
| 661 | } | 681 | } |
| 662 | 682 | ||
| 663 | void btrfs_exit_sysfs(void) | 683 | void btrfs_exit_sysfs(void) |
| 664 | { | 684 | { |
| 665 | sysfs_remove_group(&btrfs_kset->kobj, &btrfs_feature_attr_group); | 685 | sysfs_remove_group(&btrfs_kset->kobj, &btrfs_feature_attr_group); |
| 666 | kset_unregister(btrfs_kset); | 686 | kset_unregister(btrfs_kset); |
| 687 | debugfs_remove_recursive(btrfs_debugfs_root_dentry); | ||
| 667 | } | 688 | } |
| 668 | 689 | ||
diff --git a/fs/btrfs/sysfs.h b/fs/btrfs/sysfs.h index f3cea3710d44..9ab576318a84 100644 --- a/fs/btrfs/sysfs.h +++ b/fs/btrfs/sysfs.h | |||
| @@ -1,6 +1,11 @@ | |||
| 1 | #ifndef _BTRFS_SYSFS_H_ | 1 | #ifndef _BTRFS_SYSFS_H_ |
| 2 | #define _BTRFS_SYSFS_H_ | 2 | #define _BTRFS_SYSFS_H_ |
| 3 | 3 | ||
| 4 | /* | ||
| 5 | * Data exported through sysfs | ||
| 6 | */ | ||
| 7 | extern u64 btrfs_debugfs_test; | ||
| 8 | |||
| 4 | enum btrfs_feature_set { | 9 | enum btrfs_feature_set { |
| 5 | FEAT_COMPAT, | 10 | FEAT_COMPAT, |
| 6 | FEAT_COMPAT_RO, | 11 | FEAT_COMPAT_RO, |
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 34cd83184c4a..a04707f740d6 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
| @@ -683,7 +683,8 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, | |||
| 683 | int lock = (trans->type != TRANS_JOIN_NOLOCK); | 683 | int lock = (trans->type != TRANS_JOIN_NOLOCK); |
| 684 | int err = 0; | 684 | int err = 0; |
| 685 | 685 | ||
| 686 | if (--trans->use_count) { | 686 | if (trans->use_count > 1) { |
| 687 | trans->use_count--; | ||
| 687 | trans->block_rsv = trans->orig_rsv; | 688 | trans->block_rsv = trans->orig_rsv; |
| 688 | return 0; | 689 | return 0; |
| 689 | } | 690 | } |
| @@ -731,17 +732,10 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, | |||
| 731 | } | 732 | } |
| 732 | 733 | ||
| 733 | if (lock && ACCESS_ONCE(cur_trans->state) == TRANS_STATE_BLOCKED) { | 734 | if (lock && ACCESS_ONCE(cur_trans->state) == TRANS_STATE_BLOCKED) { |
| 734 | if (throttle) { | 735 | if (throttle) |
| 735 | /* | ||
| 736 | * We may race with somebody else here so end up having | ||
| 737 | * to call end_transaction on ourselves again, so inc | ||
| 738 | * our use_count. | ||
| 739 | */ | ||
| 740 | trans->use_count++; | ||
| 741 | return btrfs_commit_transaction(trans, root); | 736 | return btrfs_commit_transaction(trans, root); |
| 742 | } else { | 737 | else |
| 743 | wake_up_process(info->transaction_kthread); | 738 | wake_up_process(info->transaction_kthread); |
| 744 | } | ||
| 745 | } | 739 | } |
| 746 | 740 | ||
| 747 | if (trans->type & __TRANS_FREEZABLE) | 741 | if (trans->type & __TRANS_FREEZABLE) |
| @@ -1578,10 +1572,9 @@ static void cleanup_transaction(struct btrfs_trans_handle *trans, | |||
| 1578 | 1572 | ||
| 1579 | trace_btrfs_transaction_commit(root); | 1573 | trace_btrfs_transaction_commit(root); |
| 1580 | 1574 | ||
| 1581 | btrfs_scrub_continue(root); | ||
| 1582 | |||
| 1583 | if (current->journal_info == trans) | 1575 | if (current->journal_info == trans) |
| 1584 | current->journal_info = NULL; | 1576 | current->journal_info = NULL; |
| 1577 | btrfs_scrub_cancel(root->fs_info); | ||
| 1585 | 1578 | ||
| 1586 | kmem_cache_free(btrfs_trans_handle_cachep, trans); | 1579 | kmem_cache_free(btrfs_trans_handle_cachep, trans); |
| 1587 | } | 1580 | } |
| @@ -1621,7 +1614,7 @@ static int btrfs_flush_all_pending_stuffs(struct btrfs_trans_handle *trans, | |||
| 1621 | static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info) | 1614 | static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info) |
| 1622 | { | 1615 | { |
| 1623 | if (btrfs_test_opt(fs_info->tree_root, FLUSHONCOMMIT)) | 1616 | if (btrfs_test_opt(fs_info->tree_root, FLUSHONCOMMIT)) |
| 1624 | return btrfs_start_delalloc_roots(fs_info, 1); | 1617 | return btrfs_start_delalloc_roots(fs_info, 1, -1); |
| 1625 | return 0; | 1618 | return 0; |
| 1626 | } | 1619 | } |
| 1627 | 1620 | ||
| @@ -1754,7 +1747,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
| 1754 | /* ->aborted might be set after the previous check, so check it */ | 1747 | /* ->aborted might be set after the previous check, so check it */ |
| 1755 | if (unlikely(ACCESS_ONCE(cur_trans->aborted))) { | 1748 | if (unlikely(ACCESS_ONCE(cur_trans->aborted))) { |
| 1756 | ret = cur_trans->aborted; | 1749 | ret = cur_trans->aborted; |
| 1757 | goto cleanup_transaction; | 1750 | goto scrub_continue; |
| 1758 | } | 1751 | } |
| 1759 | /* | 1752 | /* |
| 1760 | * the reloc mutex makes sure that we stop | 1753 | * the reloc mutex makes sure that we stop |
| @@ -1771,7 +1764,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
| 1771 | ret = create_pending_snapshots(trans, root->fs_info); | 1764 | ret = create_pending_snapshots(trans, root->fs_info); |
| 1772 | if (ret) { | 1765 | if (ret) { |
| 1773 | mutex_unlock(&root->fs_info->reloc_mutex); | 1766 | mutex_unlock(&root->fs_info->reloc_mutex); |
| 1774 | goto cleanup_transaction; | 1767 | goto scrub_continue; |
| 1775 | } | 1768 | } |
| 1776 | 1769 | ||
| 1777 | /* | 1770 | /* |
| @@ -1787,13 +1780,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
| 1787 | ret = btrfs_run_delayed_items(trans, root); | 1780 | ret = btrfs_run_delayed_items(trans, root); |
| 1788 | if (ret) { | 1781 | if (ret) { |
| 1789 | mutex_unlock(&root->fs_info->reloc_mutex); | 1782 | mutex_unlock(&root->fs_info->reloc_mutex); |
| 1790 | goto cleanup_transaction; | 1783 | goto scrub_continue; |
| 1791 | } | 1784 | } |
| 1792 | 1785 | ||
| 1793 | ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); | 1786 | ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); |
| 1794 | if (ret) { | 1787 | if (ret) { |
| 1795 | mutex_unlock(&root->fs_info->reloc_mutex); | 1788 | mutex_unlock(&root->fs_info->reloc_mutex); |
| 1796 | goto cleanup_transaction; | 1789 | goto scrub_continue; |
| 1797 | } | 1790 | } |
| 1798 | 1791 | ||
| 1799 | /* | 1792 | /* |
| @@ -1823,7 +1816,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
| 1823 | if (ret) { | 1816 | if (ret) { |
| 1824 | mutex_unlock(&root->fs_info->tree_log_mutex); | 1817 | mutex_unlock(&root->fs_info->tree_log_mutex); |
| 1825 | mutex_unlock(&root->fs_info->reloc_mutex); | 1818 | mutex_unlock(&root->fs_info->reloc_mutex); |
| 1826 | goto cleanup_transaction; | 1819 | goto scrub_continue; |
| 1827 | } | 1820 | } |
| 1828 | 1821 | ||
| 1829 | /* | 1822 | /* |
| @@ -1844,7 +1837,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
| 1844 | if (ret) { | 1837 | if (ret) { |
| 1845 | mutex_unlock(&root->fs_info->tree_log_mutex); | 1838 | mutex_unlock(&root->fs_info->tree_log_mutex); |
| 1846 | mutex_unlock(&root->fs_info->reloc_mutex); | 1839 | mutex_unlock(&root->fs_info->reloc_mutex); |
| 1847 | goto cleanup_transaction; | 1840 | goto scrub_continue; |
| 1848 | } | 1841 | } |
| 1849 | 1842 | ||
| 1850 | /* | 1843 | /* |
| @@ -1855,7 +1848,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
| 1855 | ret = cur_trans->aborted; | 1848 | ret = cur_trans->aborted; |
| 1856 | mutex_unlock(&root->fs_info->tree_log_mutex); | 1849 | mutex_unlock(&root->fs_info->tree_log_mutex); |
| 1857 | mutex_unlock(&root->fs_info->reloc_mutex); | 1850 | mutex_unlock(&root->fs_info->reloc_mutex); |
| 1858 | goto cleanup_transaction; | 1851 | goto scrub_continue; |
| 1859 | } | 1852 | } |
| 1860 | 1853 | ||
| 1861 | btrfs_prepare_extent_commit(trans, root); | 1854 | btrfs_prepare_extent_commit(trans, root); |
| @@ -1891,13 +1884,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
| 1891 | btrfs_error(root->fs_info, ret, | 1884 | btrfs_error(root->fs_info, ret, |
| 1892 | "Error while writing out transaction"); | 1885 | "Error while writing out transaction"); |
| 1893 | mutex_unlock(&root->fs_info->tree_log_mutex); | 1886 | mutex_unlock(&root->fs_info->tree_log_mutex); |
| 1894 | goto cleanup_transaction; | 1887 | goto scrub_continue; |
| 1895 | } | 1888 | } |
| 1896 | 1889 | ||
| 1897 | ret = write_ctree_super(trans, root, 0); | 1890 | ret = write_ctree_super(trans, root, 0); |
| 1898 | if (ret) { | 1891 | if (ret) { |
| 1899 | mutex_unlock(&root->fs_info->tree_log_mutex); | 1892 | mutex_unlock(&root->fs_info->tree_log_mutex); |
| 1900 | goto cleanup_transaction; | 1893 | goto scrub_continue; |
| 1901 | } | 1894 | } |
| 1902 | 1895 | ||
| 1903 | /* | 1896 | /* |
| @@ -1940,6 +1933,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
| 1940 | 1933 | ||
| 1941 | return ret; | 1934 | return ret; |
| 1942 | 1935 | ||
| 1936 | scrub_continue: | ||
| 1937 | btrfs_scrub_continue(root); | ||
| 1943 | cleanup_transaction: | 1938 | cleanup_transaction: |
| 1944 | btrfs_trans_release_metadata(trans, root); | 1939 | btrfs_trans_release_metadata(trans, root); |
| 1945 | trans->block_rsv = NULL; | 1940 | trans->block_rsv = NULL; |
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 39d83da03e03..e2f45fc02610 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c | |||
| @@ -136,13 +136,20 @@ static noinline int replay_dir_deletes(struct btrfs_trans_handle *trans, | |||
| 136 | * syncing the tree wait for us to finish | 136 | * syncing the tree wait for us to finish |
| 137 | */ | 137 | */ |
| 138 | static int start_log_trans(struct btrfs_trans_handle *trans, | 138 | static int start_log_trans(struct btrfs_trans_handle *trans, |
| 139 | struct btrfs_root *root) | 139 | struct btrfs_root *root, |
| 140 | struct btrfs_log_ctx *ctx) | ||
| 140 | { | 141 | { |
| 142 | int index; | ||
| 141 | int ret; | 143 | int ret; |
| 142 | int err = 0; | ||
| 143 | 144 | ||
| 144 | mutex_lock(&root->log_mutex); | 145 | mutex_lock(&root->log_mutex); |
| 145 | if (root->log_root) { | 146 | if (root->log_root) { |
| 147 | if (ACCESS_ONCE(root->fs_info->last_trans_log_full_commit) == | ||
| 148 | trans->transid) { | ||
| 149 | ret = -EAGAIN; | ||
| 150 | goto out; | ||
| 151 | } | ||
| 152 | |||
| 146 | if (!root->log_start_pid) { | 153 | if (!root->log_start_pid) { |
| 147 | root->log_start_pid = current->pid; | 154 | root->log_start_pid = current->pid; |
| 148 | root->log_multiple_pids = false; | 155 | root->log_multiple_pids = false; |
| @@ -152,27 +159,40 @@ static int start_log_trans(struct btrfs_trans_handle *trans, | |||
| 152 | 159 | ||
| 153 | atomic_inc(&root->log_batch); | 160 | atomic_inc(&root->log_batch); |
| 154 | atomic_inc(&root->log_writers); | 161 | atomic_inc(&root->log_writers); |
| 162 | if (ctx) { | ||
| 163 | index = root->log_transid % 2; | ||
| 164 | list_add_tail(&ctx->list, &root->log_ctxs[index]); | ||
| 165 | ctx->log_transid = root->log_transid; | ||
| 166 | } | ||
| 155 | mutex_unlock(&root->log_mutex); | 167 | mutex_unlock(&root->log_mutex); |
| 156 | return 0; | 168 | return 0; |
| 157 | } | 169 | } |
| 158 | root->log_multiple_pids = false; | 170 | |
| 159 | root->log_start_pid = current->pid; | 171 | ret = 0; |
| 160 | mutex_lock(&root->fs_info->tree_log_mutex); | 172 | mutex_lock(&root->fs_info->tree_log_mutex); |
| 161 | if (!root->fs_info->log_root_tree) { | 173 | if (!root->fs_info->log_root_tree) |
| 162 | ret = btrfs_init_log_root_tree(trans, root->fs_info); | 174 | ret = btrfs_init_log_root_tree(trans, root->fs_info); |
| 163 | if (ret) | 175 | mutex_unlock(&root->fs_info->tree_log_mutex); |
| 164 | err = ret; | 176 | if (ret) |
| 165 | } | 177 | goto out; |
| 166 | if (err == 0 && !root->log_root) { | 178 | |
| 179 | if (!root->log_root) { | ||
| 167 | ret = btrfs_add_log_tree(trans, root); | 180 | ret = btrfs_add_log_tree(trans, root); |
| 168 | if (ret) | 181 | if (ret) |
| 169 | err = ret; | 182 | goto out; |
| 170 | } | 183 | } |
| 171 | mutex_unlock(&root->fs_info->tree_log_mutex); | 184 | root->log_multiple_pids = false; |
| 185 | root->log_start_pid = current->pid; | ||
| 172 | atomic_inc(&root->log_batch); | 186 | atomic_inc(&root->log_batch); |
| 173 | atomic_inc(&root->log_writers); | 187 | atomic_inc(&root->log_writers); |
| 188 | if (ctx) { | ||
| 189 | index = root->log_transid % 2; | ||
| 190 | list_add_tail(&ctx->list, &root->log_ctxs[index]); | ||
| 191 | ctx->log_transid = root->log_transid; | ||
| 192 | } | ||
| 193 | out: | ||
| 174 | mutex_unlock(&root->log_mutex); | 194 | mutex_unlock(&root->log_mutex); |
| 175 | return err; | 195 | return ret; |
| 176 | } | 196 | } |
| 177 | 197 | ||
| 178 | /* | 198 | /* |
| @@ -2359,8 +2379,8 @@ static int update_log_root(struct btrfs_trans_handle *trans, | |||
| 2359 | return ret; | 2379 | return ret; |
| 2360 | } | 2380 | } |
| 2361 | 2381 | ||
| 2362 | static int wait_log_commit(struct btrfs_trans_handle *trans, | 2382 | static void wait_log_commit(struct btrfs_trans_handle *trans, |
| 2363 | struct btrfs_root *root, unsigned long transid) | 2383 | struct btrfs_root *root, int transid) |
| 2364 | { | 2384 | { |
| 2365 | DEFINE_WAIT(wait); | 2385 | DEFINE_WAIT(wait); |
| 2366 | int index = transid % 2; | 2386 | int index = transid % 2; |
| @@ -2375,36 +2395,63 @@ static int wait_log_commit(struct btrfs_trans_handle *trans, | |||
| 2375 | &wait, TASK_UNINTERRUPTIBLE); | 2395 | &wait, TASK_UNINTERRUPTIBLE); |
| 2376 | mutex_unlock(&root->log_mutex); | 2396 | mutex_unlock(&root->log_mutex); |
| 2377 | 2397 | ||
| 2378 | if (root->fs_info->last_trans_log_full_commit != | 2398 | if (root->log_transid_committed < transid && |
| 2379 | trans->transid && root->log_transid < transid + 2 && | ||
| 2380 | atomic_read(&root->log_commit[index])) | 2399 | atomic_read(&root->log_commit[index])) |
| 2381 | schedule(); | 2400 | schedule(); |
| 2382 | 2401 | ||
| 2383 | finish_wait(&root->log_commit_wait[index], &wait); | 2402 | finish_wait(&root->log_commit_wait[index], &wait); |
| 2384 | mutex_lock(&root->log_mutex); | 2403 | mutex_lock(&root->log_mutex); |
| 2385 | } while (root->fs_info->last_trans_log_full_commit != | 2404 | } while (root->log_transid_committed < transid && |
| 2386 | trans->transid && root->log_transid < transid + 2 && | ||
| 2387 | atomic_read(&root->log_commit[index])); | 2405 | atomic_read(&root->log_commit[index])); |
| 2388 | return 0; | ||
| 2389 | } | 2406 | } |
| 2390 | 2407 | ||
| 2391 | static void wait_for_writer(struct btrfs_trans_handle *trans, | 2408 | static void wait_for_writer(struct btrfs_trans_handle *trans, |
| 2392 | struct btrfs_root *root) | 2409 | struct btrfs_root *root) |
| 2393 | { | 2410 | { |
| 2394 | DEFINE_WAIT(wait); | 2411 | DEFINE_WAIT(wait); |
| 2395 | while (root->fs_info->last_trans_log_full_commit != | 2412 | |
| 2396 | trans->transid && atomic_read(&root->log_writers)) { | 2413 | while (atomic_read(&root->log_writers)) { |
| 2397 | prepare_to_wait(&root->log_writer_wait, | 2414 | prepare_to_wait(&root->log_writer_wait, |
| 2398 | &wait, TASK_UNINTERRUPTIBLE); | 2415 | &wait, TASK_UNINTERRUPTIBLE); |
| 2399 | mutex_unlock(&root->log_mutex); | 2416 | mutex_unlock(&root->log_mutex); |
| 2400 | if (root->fs_info->last_trans_log_full_commit != | 2417 | if (atomic_read(&root->log_writers)) |
| 2401 | trans->transid && atomic_read(&root->log_writers)) | ||
| 2402 | schedule(); | 2418 | schedule(); |
| 2403 | mutex_lock(&root->log_mutex); | 2419 | mutex_lock(&root->log_mutex); |
| 2404 | finish_wait(&root->log_writer_wait, &wait); | 2420 | finish_wait(&root->log_writer_wait, &wait); |
| 2405 | } | 2421 | } |
| 2406 | } | 2422 | } |
| 2407 | 2423 | ||
| 2424 | static inline void btrfs_remove_log_ctx(struct btrfs_root *root, | ||
| 2425 | struct btrfs_log_ctx *ctx) | ||
| 2426 | { | ||
| 2427 | if (!ctx) | ||
| 2428 | return; | ||
| 2429 | |||
| 2430 | mutex_lock(&root->log_mutex); | ||
| 2431 | list_del_init(&ctx->list); | ||
| 2432 | mutex_unlock(&root->log_mutex); | ||
| 2433 | } | ||
| 2434 | |||
| 2435 | /* | ||
| 2436 | * Invoked in log mutex context, or be sure there is no other task which | ||
| 2437 | * can access the list. | ||
| 2438 | */ | ||
| 2439 | static inline void btrfs_remove_all_log_ctxs(struct btrfs_root *root, | ||
| 2440 | int index, int error) | ||
| 2441 | { | ||
| 2442 | struct btrfs_log_ctx *ctx; | ||
| 2443 | |||
| 2444 | if (!error) { | ||
| 2445 | INIT_LIST_HEAD(&root->log_ctxs[index]); | ||
| 2446 | return; | ||
| 2447 | } | ||
| 2448 | |||
| 2449 | list_for_each_entry(ctx, &root->log_ctxs[index], list) | ||
| 2450 | ctx->log_ret = error; | ||
| 2451 | |||
| 2452 | INIT_LIST_HEAD(&root->log_ctxs[index]); | ||
| 2453 | } | ||
| 2454 | |||
| 2408 | /* | 2455 | /* |
| 2409 | * btrfs_sync_log does sends a given tree log down to the disk and | 2456 | * btrfs_sync_log does sends a given tree log down to the disk and |
| 2410 | * updates the super blocks to record it. When this call is done, | 2457 | * updates the super blocks to record it. When this call is done, |
| @@ -2418,7 +2465,7 @@ static void wait_for_writer(struct btrfs_trans_handle *trans, | |||
| 2418 | * that has happened. | 2465 | * that has happened. |
| 2419 | */ | 2466 | */ |
| 2420 | int btrfs_sync_log(struct btrfs_trans_handle *trans, | 2467 | int btrfs_sync_log(struct btrfs_trans_handle *trans, |
| 2421 | struct btrfs_root *root) | 2468 | struct btrfs_root *root, struct btrfs_log_ctx *ctx) |
| 2422 | { | 2469 | { |
| 2423 | int index1; | 2470 | int index1; |
| 2424 | int index2; | 2471 | int index2; |
| @@ -2426,22 +2473,30 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
| 2426 | int ret; | 2473 | int ret; |
| 2427 | struct btrfs_root *log = root->log_root; | 2474 | struct btrfs_root *log = root->log_root; |
| 2428 | struct btrfs_root *log_root_tree = root->fs_info->log_root_tree; | 2475 | struct btrfs_root *log_root_tree = root->fs_info->log_root_tree; |
| 2429 | unsigned long log_transid = 0; | 2476 | int log_transid = 0; |
| 2477 | struct btrfs_log_ctx root_log_ctx; | ||
| 2430 | struct blk_plug plug; | 2478 | struct blk_plug plug; |
| 2431 | 2479 | ||
| 2432 | mutex_lock(&root->log_mutex); | 2480 | mutex_lock(&root->log_mutex); |
| 2433 | log_transid = root->log_transid; | 2481 | log_transid = ctx->log_transid; |
| 2434 | index1 = root->log_transid % 2; | 2482 | if (root->log_transid_committed >= log_transid) { |
| 2483 | mutex_unlock(&root->log_mutex); | ||
| 2484 | return ctx->log_ret; | ||
| 2485 | } | ||
| 2486 | |||
| 2487 | index1 = log_transid % 2; | ||
| 2435 | if (atomic_read(&root->log_commit[index1])) { | 2488 | if (atomic_read(&root->log_commit[index1])) { |
| 2436 | wait_log_commit(trans, root, root->log_transid); | 2489 | wait_log_commit(trans, root, log_transid); |
| 2437 | mutex_unlock(&root->log_mutex); | 2490 | mutex_unlock(&root->log_mutex); |
| 2438 | return 0; | 2491 | return ctx->log_ret; |
| 2439 | } | 2492 | } |
| 2493 | ASSERT(log_transid == root->log_transid); | ||
| 2440 | atomic_set(&root->log_commit[index1], 1); | 2494 | atomic_set(&root->log_commit[index1], 1); |
| 2441 | 2495 | ||
| 2442 | /* wait for previous tree log sync to complete */ | 2496 | /* wait for previous tree log sync to complete */ |
| 2443 | if (atomic_read(&root->log_commit[(index1 + 1) % 2])) | 2497 | if (atomic_read(&root->log_commit[(index1 + 1) % 2])) |
| 2444 | wait_log_commit(trans, root, root->log_transid - 1); | 2498 | wait_log_commit(trans, root, log_transid - 1); |
| 2499 | |||
| 2445 | while (1) { | 2500 | while (1) { |
| 2446 | int batch = atomic_read(&root->log_batch); | 2501 | int batch = atomic_read(&root->log_batch); |
| 2447 | /* when we're on an ssd, just kick the log commit out */ | 2502 | /* when we're on an ssd, just kick the log commit out */ |
| @@ -2456,7 +2511,8 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
| 2456 | } | 2511 | } |
| 2457 | 2512 | ||
| 2458 | /* bail out if we need to do a full commit */ | 2513 | /* bail out if we need to do a full commit */ |
| 2459 | if (root->fs_info->last_trans_log_full_commit == trans->transid) { | 2514 | if (ACCESS_ONCE(root->fs_info->last_trans_log_full_commit) == |
| 2515 | trans->transid) { | ||
| 2460 | ret = -EAGAIN; | 2516 | ret = -EAGAIN; |
| 2461 | btrfs_free_logged_extents(log, log_transid); | 2517 | btrfs_free_logged_extents(log, log_transid); |
| 2462 | mutex_unlock(&root->log_mutex); | 2518 | mutex_unlock(&root->log_mutex); |
| @@ -2477,6 +2533,8 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
| 2477 | blk_finish_plug(&plug); | 2533 | blk_finish_plug(&plug); |
| 2478 | btrfs_abort_transaction(trans, root, ret); | 2534 | btrfs_abort_transaction(trans, root, ret); |
| 2479 | btrfs_free_logged_extents(log, log_transid); | 2535 | btrfs_free_logged_extents(log, log_transid); |
| 2536 | ACCESS_ONCE(root->fs_info->last_trans_log_full_commit) = | ||
| 2537 | trans->transid; | ||
| 2480 | mutex_unlock(&root->log_mutex); | 2538 | mutex_unlock(&root->log_mutex); |
| 2481 | goto out; | 2539 | goto out; |
| 2482 | } | 2540 | } |
| @@ -2486,7 +2544,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
| 2486 | root->log_transid++; | 2544 | root->log_transid++; |
| 2487 | log->log_transid = root->log_transid; | 2545 | log->log_transid = root->log_transid; |
| 2488 | root->log_start_pid = 0; | 2546 | root->log_start_pid = 0; |
| 2489 | smp_mb(); | ||
| 2490 | /* | 2547 | /* |
| 2491 | * IO has been started, blocks of the log tree have WRITTEN flag set | 2548 | * IO has been started, blocks of the log tree have WRITTEN flag set |
| 2492 | * in their headers. new modifications of the log will be written to | 2549 | * in their headers. new modifications of the log will be written to |
| @@ -2494,9 +2551,16 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
| 2494 | */ | 2551 | */ |
| 2495 | mutex_unlock(&root->log_mutex); | 2552 | mutex_unlock(&root->log_mutex); |
| 2496 | 2553 | ||
| 2554 | btrfs_init_log_ctx(&root_log_ctx); | ||
| 2555 | |||
| 2497 | mutex_lock(&log_root_tree->log_mutex); | 2556 | mutex_lock(&log_root_tree->log_mutex); |
| 2498 | atomic_inc(&log_root_tree->log_batch); | 2557 | atomic_inc(&log_root_tree->log_batch); |
| 2499 | atomic_inc(&log_root_tree->log_writers); | 2558 | atomic_inc(&log_root_tree->log_writers); |
| 2559 | |||
| 2560 | index2 = log_root_tree->log_transid % 2; | ||
| 2561 | list_add_tail(&root_log_ctx.list, &log_root_tree->log_ctxs[index2]); | ||
| 2562 | root_log_ctx.log_transid = log_root_tree->log_transid; | ||
| 2563 | |||
| 2500 | mutex_unlock(&log_root_tree->log_mutex); | 2564 | mutex_unlock(&log_root_tree->log_mutex); |
| 2501 | 2565 | ||
| 2502 | ret = update_log_root(trans, log); | 2566 | ret = update_log_root(trans, log); |
| @@ -2509,13 +2573,17 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
| 2509 | } | 2573 | } |
| 2510 | 2574 | ||
| 2511 | if (ret) { | 2575 | if (ret) { |
| 2576 | if (!list_empty(&root_log_ctx.list)) | ||
| 2577 | list_del_init(&root_log_ctx.list); | ||
| 2578 | |||
| 2512 | blk_finish_plug(&plug); | 2579 | blk_finish_plug(&plug); |
| 2580 | ACCESS_ONCE(root->fs_info->last_trans_log_full_commit) = | ||
| 2581 | trans->transid; | ||
| 2513 | if (ret != -ENOSPC) { | 2582 | if (ret != -ENOSPC) { |
| 2514 | btrfs_abort_transaction(trans, root, ret); | 2583 | btrfs_abort_transaction(trans, root, ret); |
| 2515 | mutex_unlock(&log_root_tree->log_mutex); | 2584 | mutex_unlock(&log_root_tree->log_mutex); |
| 2516 | goto out; | 2585 | goto out; |
| 2517 | } | 2586 | } |
| 2518 | root->fs_info->last_trans_log_full_commit = trans->transid; | ||
| 2519 | btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); | 2587 | btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); |
| 2520 | btrfs_free_logged_extents(log, log_transid); | 2588 | btrfs_free_logged_extents(log, log_transid); |
| 2521 | mutex_unlock(&log_root_tree->log_mutex); | 2589 | mutex_unlock(&log_root_tree->log_mutex); |
| @@ -2523,22 +2591,29 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
| 2523 | goto out; | 2591 | goto out; |
| 2524 | } | 2592 | } |
| 2525 | 2593 | ||
| 2526 | index2 = log_root_tree->log_transid % 2; | 2594 | if (log_root_tree->log_transid_committed >= root_log_ctx.log_transid) { |
| 2595 | mutex_unlock(&log_root_tree->log_mutex); | ||
| 2596 | ret = root_log_ctx.log_ret; | ||
| 2597 | goto out; | ||
| 2598 | } | ||
| 2599 | |||
| 2600 | index2 = root_log_ctx.log_transid % 2; | ||
| 2527 | if (atomic_read(&log_root_tree->log_commit[index2])) { | 2601 | if (atomic_read(&log_root_tree->log_commit[index2])) { |
| 2528 | blk_finish_plug(&plug); | 2602 | blk_finish_plug(&plug); |
| 2529 | btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); | 2603 | btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); |
| 2530 | wait_log_commit(trans, log_root_tree, | 2604 | wait_log_commit(trans, log_root_tree, |
| 2531 | log_root_tree->log_transid); | 2605 | root_log_ctx.log_transid); |
| 2532 | btrfs_free_logged_extents(log, log_transid); | 2606 | btrfs_free_logged_extents(log, log_transid); |
| 2533 | mutex_unlock(&log_root_tree->log_mutex); | 2607 | mutex_unlock(&log_root_tree->log_mutex); |
| 2534 | ret = 0; | 2608 | ret = root_log_ctx.log_ret; |
| 2535 | goto out; | 2609 | goto out; |
| 2536 | } | 2610 | } |
| 2611 | ASSERT(root_log_ctx.log_transid == log_root_tree->log_transid); | ||
| 2537 | atomic_set(&log_root_tree->log_commit[index2], 1); | 2612 | atomic_set(&log_root_tree->log_commit[index2], 1); |
| 2538 | 2613 | ||
| 2539 | if (atomic_read(&log_root_tree->log_commit[(index2 + 1) % 2])) { | 2614 | if (atomic_read(&log_root_tree->log_commit[(index2 + 1) % 2])) { |
| 2540 | wait_log_commit(trans, log_root_tree, | 2615 | wait_log_commit(trans, log_root_tree, |
| 2541 | log_root_tree->log_transid - 1); | 2616 | root_log_ctx.log_transid - 1); |
| 2542 | } | 2617 | } |
| 2543 | 2618 | ||
| 2544 | wait_for_writer(trans, log_root_tree); | 2619 | wait_for_writer(trans, log_root_tree); |
| @@ -2547,7 +2622,8 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
| 2547 | * now that we've moved on to the tree of log tree roots, | 2622 | * now that we've moved on to the tree of log tree roots, |
| 2548 | * check the full commit flag again | 2623 | * check the full commit flag again |
| 2549 | */ | 2624 | */ |
| 2550 | if (root->fs_info->last_trans_log_full_commit == trans->transid) { | 2625 | if (ACCESS_ONCE(root->fs_info->last_trans_log_full_commit) == |
| 2626 | trans->transid) { | ||
| 2551 | blk_finish_plug(&plug); | 2627 | blk_finish_plug(&plug); |
| 2552 | btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); | 2628 | btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); |
| 2553 | btrfs_free_logged_extents(log, log_transid); | 2629 | btrfs_free_logged_extents(log, log_transid); |
| @@ -2561,6 +2637,8 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
| 2561 | EXTENT_DIRTY | EXTENT_NEW); | 2637 | EXTENT_DIRTY | EXTENT_NEW); |
| 2562 | blk_finish_plug(&plug); | 2638 | blk_finish_plug(&plug); |
| 2563 | if (ret) { | 2639 | if (ret) { |
| 2640 | ACCESS_ONCE(root->fs_info->last_trans_log_full_commit) = | ||
| 2641 | trans->transid; | ||
| 2564 | btrfs_abort_transaction(trans, root, ret); | 2642 | btrfs_abort_transaction(trans, root, ret); |
| 2565 | btrfs_free_logged_extents(log, log_transid); | 2643 | btrfs_free_logged_extents(log, log_transid); |
| 2566 | mutex_unlock(&log_root_tree->log_mutex); | 2644 | mutex_unlock(&log_root_tree->log_mutex); |
| @@ -2578,8 +2656,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
| 2578 | btrfs_header_level(log_root_tree->node)); | 2656 | btrfs_header_level(log_root_tree->node)); |
| 2579 | 2657 | ||
| 2580 | log_root_tree->log_transid++; | 2658 | log_root_tree->log_transid++; |
| 2581 | smp_mb(); | ||
| 2582 | |||
| 2583 | mutex_unlock(&log_root_tree->log_mutex); | 2659 | mutex_unlock(&log_root_tree->log_mutex); |
| 2584 | 2660 | ||
| 2585 | /* | 2661 | /* |
| @@ -2591,6 +2667,8 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
| 2591 | */ | 2667 | */ |
| 2592 | ret = write_ctree_super(trans, root->fs_info->tree_root, 1); | 2668 | ret = write_ctree_super(trans, root->fs_info->tree_root, 1); |
| 2593 | if (ret) { | 2669 | if (ret) { |
| 2670 | ACCESS_ONCE(root->fs_info->last_trans_log_full_commit) = | ||
| 2671 | trans->transid; | ||
| 2594 | btrfs_abort_transaction(trans, root, ret); | 2672 | btrfs_abort_transaction(trans, root, ret); |
| 2595 | goto out_wake_log_root; | 2673 | goto out_wake_log_root; |
| 2596 | } | 2674 | } |
| @@ -2601,13 +2679,28 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
| 2601 | mutex_unlock(&root->log_mutex); | 2679 | mutex_unlock(&root->log_mutex); |
| 2602 | 2680 | ||
| 2603 | out_wake_log_root: | 2681 | out_wake_log_root: |
| 2682 | /* | ||
| 2683 | * We needn't get log_mutex here because we are sure all | ||
| 2684 | * the other tasks are blocked. | ||
| 2685 | */ | ||
| 2686 | btrfs_remove_all_log_ctxs(log_root_tree, index2, ret); | ||
| 2687 | |||
| 2688 | mutex_lock(&log_root_tree->log_mutex); | ||
| 2689 | log_root_tree->log_transid_committed++; | ||
| 2604 | atomic_set(&log_root_tree->log_commit[index2], 0); | 2690 | atomic_set(&log_root_tree->log_commit[index2], 0); |
| 2605 | smp_mb(); | 2691 | mutex_unlock(&log_root_tree->log_mutex); |
| 2692 | |||
| 2606 | if (waitqueue_active(&log_root_tree->log_commit_wait[index2])) | 2693 | if (waitqueue_active(&log_root_tree->log_commit_wait[index2])) |
| 2607 | wake_up(&log_root_tree->log_commit_wait[index2]); | 2694 | wake_up(&log_root_tree->log_commit_wait[index2]); |
| 2608 | out: | 2695 | out: |
| 2696 | /* See above. */ | ||
| 2697 | btrfs_remove_all_log_ctxs(root, index1, ret); | ||
| 2698 | |||
| 2699 | mutex_lock(&root->log_mutex); | ||
| 2700 | root->log_transid_committed++; | ||
| 2609 | atomic_set(&root->log_commit[index1], 0); | 2701 | atomic_set(&root->log_commit[index1], 0); |
| 2610 | smp_mb(); | 2702 | mutex_unlock(&root->log_mutex); |
| 2703 | |||
| 2611 | if (waitqueue_active(&root->log_commit_wait[index1])) | 2704 | if (waitqueue_active(&root->log_commit_wait[index1])) |
| 2612 | wake_up(&root->log_commit_wait[index1]); | 2705 | wake_up(&root->log_commit_wait[index1]); |
| 2613 | return ret; | 2706 | return ret; |
| @@ -3479,7 +3572,8 @@ static int extent_cmp(void *priv, struct list_head *a, struct list_head *b) | |||
| 3479 | 3572 | ||
| 3480 | static int log_one_extent(struct btrfs_trans_handle *trans, | 3573 | static int log_one_extent(struct btrfs_trans_handle *trans, |
| 3481 | struct inode *inode, struct btrfs_root *root, | 3574 | struct inode *inode, struct btrfs_root *root, |
| 3482 | struct extent_map *em, struct btrfs_path *path) | 3575 | struct extent_map *em, struct btrfs_path *path, |
| 3576 | struct list_head *logged_list) | ||
| 3483 | { | 3577 | { |
| 3484 | struct btrfs_root *log = root->log_root; | 3578 | struct btrfs_root *log = root->log_root; |
| 3485 | struct btrfs_file_extent_item *fi; | 3579 | struct btrfs_file_extent_item *fi; |
| @@ -3495,7 +3589,6 @@ static int log_one_extent(struct btrfs_trans_handle *trans, | |||
| 3495 | u64 extent_offset = em->start - em->orig_start; | 3589 | u64 extent_offset = em->start - em->orig_start; |
| 3496 | u64 block_len; | 3590 | u64 block_len; |
| 3497 | int ret; | 3591 | int ret; |
| 3498 | int index = log->log_transid % 2; | ||
| 3499 | bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; | 3592 | bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; |
| 3500 | int extent_inserted = 0; | 3593 | int extent_inserted = 0; |
| 3501 | 3594 | ||
| @@ -3579,17 +3672,12 @@ static int log_one_extent(struct btrfs_trans_handle *trans, | |||
| 3579 | * First check and see if our csums are on our outstanding ordered | 3672 | * First check and see if our csums are on our outstanding ordered |
| 3580 | * extents. | 3673 | * extents. |
| 3581 | */ | 3674 | */ |
| 3582 | again: | 3675 | list_for_each_entry(ordered, logged_list, log_list) { |
| 3583 | spin_lock_irq(&log->log_extents_lock[index]); | ||
| 3584 | list_for_each_entry(ordered, &log->logged_list[index], log_list) { | ||
| 3585 | struct btrfs_ordered_sum *sum; | 3676 | struct btrfs_ordered_sum *sum; |
| 3586 | 3677 | ||
| 3587 | if (!mod_len) | 3678 | if (!mod_len) |
| 3588 | break; | 3679 | break; |
| 3589 | 3680 | ||
| 3590 | if (ordered->inode != inode) | ||
| 3591 | continue; | ||
| 3592 | |||
| 3593 | if (ordered->file_offset + ordered->len <= mod_start || | 3681 | if (ordered->file_offset + ordered->len <= mod_start || |
| 3594 | mod_start + mod_len <= ordered->file_offset) | 3682 | mod_start + mod_len <= ordered->file_offset) |
| 3595 | continue; | 3683 | continue; |
| @@ -3632,12 +3720,6 @@ again: | |||
| 3632 | if (test_and_set_bit(BTRFS_ORDERED_LOGGED_CSUM, | 3720 | if (test_and_set_bit(BTRFS_ORDERED_LOGGED_CSUM, |
| 3633 | &ordered->flags)) | 3721 | &ordered->flags)) |
| 3634 | continue; | 3722 | continue; |
| 3635 | atomic_inc(&ordered->refs); | ||
| 3636 | spin_unlock_irq(&log->log_extents_lock[index]); | ||
| 3637 | /* | ||
| 3638 | * we've dropped the lock, we must either break or | ||
| 3639 | * start over after this. | ||
| 3640 | */ | ||
| 3641 | 3723 | ||
| 3642 | if (ordered->csum_bytes_left) { | 3724 | if (ordered->csum_bytes_left) { |
| 3643 | btrfs_start_ordered_extent(inode, ordered, 0); | 3725 | btrfs_start_ordered_extent(inode, ordered, 0); |
| @@ -3647,16 +3729,11 @@ again: | |||
| 3647 | 3729 | ||
| 3648 | list_for_each_entry(sum, &ordered->list, list) { | 3730 | list_for_each_entry(sum, &ordered->list, list) { |
| 3649 | ret = btrfs_csum_file_blocks(trans, log, sum); | 3731 | ret = btrfs_csum_file_blocks(trans, log, sum); |
| 3650 | if (ret) { | 3732 | if (ret) |
| 3651 | btrfs_put_ordered_extent(ordered); | ||
| 3652 | goto unlocked; | 3733 | goto unlocked; |
| 3653 | } | ||
| 3654 | } | 3734 | } |
| 3655 | btrfs_put_ordered_extent(ordered); | ||
| 3656 | goto again; | ||
| 3657 | 3735 | ||
| 3658 | } | 3736 | } |
| 3659 | spin_unlock_irq(&log->log_extents_lock[index]); | ||
| 3660 | unlocked: | 3737 | unlocked: |
| 3661 | 3738 | ||
| 3662 | if (!mod_len || ret) | 3739 | if (!mod_len || ret) |
| @@ -3694,7 +3771,8 @@ unlocked: | |||
| 3694 | static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans, | 3771 | static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans, |
| 3695 | struct btrfs_root *root, | 3772 | struct btrfs_root *root, |
| 3696 | struct inode *inode, | 3773 | struct inode *inode, |
| 3697 | struct btrfs_path *path) | 3774 | struct btrfs_path *path, |
| 3775 | struct list_head *logged_list) | ||
| 3698 | { | 3776 | { |
| 3699 | struct extent_map *em, *n; | 3777 | struct extent_map *em, *n; |
| 3700 | struct list_head extents; | 3778 | struct list_head extents; |
| @@ -3752,7 +3830,7 @@ process: | |||
| 3752 | 3830 | ||
| 3753 | write_unlock(&tree->lock); | 3831 | write_unlock(&tree->lock); |
| 3754 | 3832 | ||
| 3755 | ret = log_one_extent(trans, inode, root, em, path); | 3833 | ret = log_one_extent(trans, inode, root, em, path, logged_list); |
| 3756 | write_lock(&tree->lock); | 3834 | write_lock(&tree->lock); |
| 3757 | clear_em_logging(tree, em); | 3835 | clear_em_logging(tree, em); |
| 3758 | free_extent_map(em); | 3836 | free_extent_map(em); |
| @@ -3788,6 +3866,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, | |||
| 3788 | struct btrfs_key max_key; | 3866 | struct btrfs_key max_key; |
| 3789 | struct btrfs_root *log = root->log_root; | 3867 | struct btrfs_root *log = root->log_root; |
| 3790 | struct extent_buffer *src = NULL; | 3868 | struct extent_buffer *src = NULL; |
| 3869 | LIST_HEAD(logged_list); | ||
| 3791 | u64 last_extent = 0; | 3870 | u64 last_extent = 0; |
| 3792 | int err = 0; | 3871 | int err = 0; |
| 3793 | int ret; | 3872 | int ret; |
| @@ -3836,7 +3915,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, | |||
| 3836 | 3915 | ||
| 3837 | mutex_lock(&BTRFS_I(inode)->log_mutex); | 3916 | mutex_lock(&BTRFS_I(inode)->log_mutex); |
| 3838 | 3917 | ||
| 3839 | btrfs_get_logged_extents(log, inode); | 3918 | btrfs_get_logged_extents(inode, &logged_list); |
| 3840 | 3919 | ||
| 3841 | /* | 3920 | /* |
| 3842 | * a brute force approach to making sure we get the most uptodate | 3921 | * a brute force approach to making sure we get the most uptodate |
| @@ -3962,7 +4041,8 @@ log_extents: | |||
| 3962 | btrfs_release_path(path); | 4041 | btrfs_release_path(path); |
| 3963 | btrfs_release_path(dst_path); | 4042 | btrfs_release_path(dst_path); |
| 3964 | if (fast_search) { | 4043 | if (fast_search) { |
| 3965 | ret = btrfs_log_changed_extents(trans, root, inode, dst_path); | 4044 | ret = btrfs_log_changed_extents(trans, root, inode, dst_path, |
| 4045 | &logged_list); | ||
| 3966 | if (ret) { | 4046 | if (ret) { |
| 3967 | err = ret; | 4047 | err = ret; |
| 3968 | goto out_unlock; | 4048 | goto out_unlock; |
| @@ -3987,8 +4067,10 @@ log_extents: | |||
| 3987 | BTRFS_I(inode)->logged_trans = trans->transid; | 4067 | BTRFS_I(inode)->logged_trans = trans->transid; |
| 3988 | BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->last_sub_trans; | 4068 | BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->last_sub_trans; |
| 3989 | out_unlock: | 4069 | out_unlock: |
| 3990 | if (err) | 4070 | if (unlikely(err)) |
| 3991 | btrfs_free_logged_extents(log, log->log_transid); | 4071 | btrfs_put_logged_extents(&logged_list); |
| 4072 | else | ||
| 4073 | btrfs_submit_logged_extents(&logged_list, log); | ||
| 3992 | mutex_unlock(&BTRFS_I(inode)->log_mutex); | 4074 | mutex_unlock(&BTRFS_I(inode)->log_mutex); |
| 3993 | 4075 | ||
| 3994 | btrfs_free_path(path); | 4076 | btrfs_free_path(path); |
| @@ -4079,7 +4161,8 @@ out: | |||
| 4079 | */ | 4161 | */ |
| 4080 | static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, | 4162 | static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, |
| 4081 | struct btrfs_root *root, struct inode *inode, | 4163 | struct btrfs_root *root, struct inode *inode, |
| 4082 | struct dentry *parent, int exists_only) | 4164 | struct dentry *parent, int exists_only, |
| 4165 | struct btrfs_log_ctx *ctx) | ||
| 4083 | { | 4166 | { |
| 4084 | int inode_only = exists_only ? LOG_INODE_EXISTS : LOG_INODE_ALL; | 4167 | int inode_only = exists_only ? LOG_INODE_EXISTS : LOG_INODE_ALL; |
| 4085 | struct super_block *sb; | 4168 | struct super_block *sb; |
| @@ -4116,9 +4199,9 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, | |||
| 4116 | goto end_no_trans; | 4199 | goto end_no_trans; |
| 4117 | } | 4200 | } |
| 4118 | 4201 | ||
| 4119 | ret = start_log_trans(trans, root); | 4202 | ret = start_log_trans(trans, root, ctx); |
| 4120 | if (ret) | 4203 | if (ret) |
| 4121 | goto end_trans; | 4204 | goto end_no_trans; |
| 4122 | 4205 | ||
| 4123 | ret = btrfs_log_inode(trans, root, inode, inode_only); | 4206 | ret = btrfs_log_inode(trans, root, inode, inode_only); |
| 4124 | if (ret) | 4207 | if (ret) |
| @@ -4166,6 +4249,9 @@ end_trans: | |||
| 4166 | root->fs_info->last_trans_log_full_commit = trans->transid; | 4249 | root->fs_info->last_trans_log_full_commit = trans->transid; |
| 4167 | ret = 1; | 4250 | ret = 1; |
| 4168 | } | 4251 | } |
| 4252 | |||
| 4253 | if (ret) | ||
| 4254 | btrfs_remove_log_ctx(root, ctx); | ||
| 4169 | btrfs_end_log_trans(root); | 4255 | btrfs_end_log_trans(root); |
| 4170 | end_no_trans: | 4256 | end_no_trans: |
| 4171 | return ret; | 4257 | return ret; |
| @@ -4178,12 +4264,14 @@ end_no_trans: | |||
| 4178 | * data on disk. | 4264 | * data on disk. |
| 4179 | */ | 4265 | */ |
| 4180 | int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans, | 4266 | int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans, |
| 4181 | struct btrfs_root *root, struct dentry *dentry) | 4267 | struct btrfs_root *root, struct dentry *dentry, |
| 4268 | struct btrfs_log_ctx *ctx) | ||
| 4182 | { | 4269 | { |
| 4183 | struct dentry *parent = dget_parent(dentry); | 4270 | struct dentry *parent = dget_parent(dentry); |
| 4184 | int ret; | 4271 | int ret; |
| 4185 | 4272 | ||
| 4186 | ret = btrfs_log_inode_parent(trans, root, dentry->d_inode, parent, 0); | 4273 | ret = btrfs_log_inode_parent(trans, root, dentry->d_inode, parent, |
| 4274 | 0, ctx); | ||
| 4187 | dput(parent); | 4275 | dput(parent); |
| 4188 | 4276 | ||
| 4189 | return ret; | 4277 | return ret; |
| @@ -4420,6 +4508,6 @@ int btrfs_log_new_name(struct btrfs_trans_handle *trans, | |||
| 4420 | root->fs_info->last_trans_committed)) | 4508 | root->fs_info->last_trans_committed)) |
| 4421 | return 0; | 4509 | return 0; |
| 4422 | 4510 | ||
| 4423 | return btrfs_log_inode_parent(trans, root, inode, parent, 1); | 4511 | return btrfs_log_inode_parent(trans, root, inode, parent, 1, NULL); |
| 4424 | } | 4512 | } |
| 4425 | 4513 | ||
diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h index 1d4ae0d15a70..91b145fce333 100644 --- a/fs/btrfs/tree-log.h +++ b/fs/btrfs/tree-log.h | |||
| @@ -22,14 +22,28 @@ | |||
| 22 | /* return value for btrfs_log_dentry_safe that means we don't need to log it at all */ | 22 | /* return value for btrfs_log_dentry_safe that means we don't need to log it at all */ |
| 23 | #define BTRFS_NO_LOG_SYNC 256 | 23 | #define BTRFS_NO_LOG_SYNC 256 |
| 24 | 24 | ||
| 25 | struct btrfs_log_ctx { | ||
| 26 | int log_ret; | ||
| 27 | int log_transid; | ||
| 28 | struct list_head list; | ||
| 29 | }; | ||
| 30 | |||
| 31 | static inline void btrfs_init_log_ctx(struct btrfs_log_ctx *ctx) | ||
| 32 | { | ||
| 33 | ctx->log_ret = 0; | ||
| 34 | ctx->log_transid = 0; | ||
| 35 | INIT_LIST_HEAD(&ctx->list); | ||
| 36 | } | ||
| 37 | |||
| 25 | int btrfs_sync_log(struct btrfs_trans_handle *trans, | 38 | int btrfs_sync_log(struct btrfs_trans_handle *trans, |
| 26 | struct btrfs_root *root); | 39 | struct btrfs_root *root, struct btrfs_log_ctx *ctx); |
| 27 | int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root); | 40 | int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root); |
| 28 | int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans, | 41 | int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans, |
| 29 | struct btrfs_fs_info *fs_info); | 42 | struct btrfs_fs_info *fs_info); |
| 30 | int btrfs_recover_log_trees(struct btrfs_root *tree_root); | 43 | int btrfs_recover_log_trees(struct btrfs_root *tree_root); |
| 31 | int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans, | 44 | int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans, |
| 32 | struct btrfs_root *root, struct dentry *dentry); | 45 | struct btrfs_root *root, struct dentry *dentry, |
| 46 | struct btrfs_log_ctx *ctx); | ||
| 33 | int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, | 47 | int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, |
| 34 | struct btrfs_root *root, | 48 | struct btrfs_root *root, |
| 35 | const char *name, int name_len, | 49 | const char *name, int name_len, |
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index bab0b84d8f80..d241130a32fd 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
| @@ -415,7 +415,8 @@ loop_lock: | |||
| 415 | device->running_pending = 1; | 415 | device->running_pending = 1; |
| 416 | 416 | ||
| 417 | spin_unlock(&device->io_lock); | 417 | spin_unlock(&device->io_lock); |
| 418 | btrfs_requeue_work(&device->work); | 418 | btrfs_queue_work(fs_info->submit_workers, |
| 419 | &device->work); | ||
| 419 | goto done; | 420 | goto done; |
| 420 | } | 421 | } |
| 421 | /* unplug every 64 requests just for good measure */ | 422 | /* unplug every 64 requests just for good measure */ |
| @@ -5263,6 +5264,7 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, | |||
| 5263 | static void btrfs_end_bio(struct bio *bio, int err) | 5264 | static void btrfs_end_bio(struct bio *bio, int err) |
| 5264 | { | 5265 | { |
| 5265 | struct btrfs_bio *bbio = bio->bi_private; | 5266 | struct btrfs_bio *bbio = bio->bi_private; |
| 5267 | struct btrfs_device *dev = bbio->stripes[0].dev; | ||
| 5266 | int is_orig_bio = 0; | 5268 | int is_orig_bio = 0; |
| 5267 | 5269 | ||
| 5268 | if (err) { | 5270 | if (err) { |
| @@ -5270,7 +5272,6 @@ static void btrfs_end_bio(struct bio *bio, int err) | |||
| 5270 | if (err == -EIO || err == -EREMOTEIO) { | 5272 | if (err == -EIO || err == -EREMOTEIO) { |
| 5271 | unsigned int stripe_index = | 5273 | unsigned int stripe_index = |
| 5272 | btrfs_io_bio(bio)->stripe_index; | 5274 | btrfs_io_bio(bio)->stripe_index; |
| 5273 | struct btrfs_device *dev; | ||
| 5274 | 5275 | ||
| 5275 | BUG_ON(stripe_index >= bbio->num_stripes); | 5276 | BUG_ON(stripe_index >= bbio->num_stripes); |
| 5276 | dev = bbio->stripes[stripe_index].dev; | 5277 | dev = bbio->stripes[stripe_index].dev; |
| @@ -5292,6 +5293,8 @@ static void btrfs_end_bio(struct bio *bio, int err) | |||
| 5292 | if (bio == bbio->orig_bio) | 5293 | if (bio == bbio->orig_bio) |
| 5293 | is_orig_bio = 1; | 5294 | is_orig_bio = 1; |
| 5294 | 5295 | ||
| 5296 | btrfs_bio_counter_dec(bbio->fs_info); | ||
| 5297 | |||
| 5295 | if (atomic_dec_and_test(&bbio->stripes_pending)) { | 5298 | if (atomic_dec_and_test(&bbio->stripes_pending)) { |
| 5296 | if (!is_orig_bio) { | 5299 | if (!is_orig_bio) { |
| 5297 | bio_put(bio); | 5300 | bio_put(bio); |
| @@ -5328,13 +5331,6 @@ static void btrfs_end_bio(struct bio *bio, int err) | |||
| 5328 | } | 5331 | } |
| 5329 | } | 5332 | } |
| 5330 | 5333 | ||
| 5331 | struct async_sched { | ||
| 5332 | struct bio *bio; | ||
| 5333 | int rw; | ||
| 5334 | struct btrfs_fs_info *info; | ||
| 5335 | struct btrfs_work work; | ||
| 5336 | }; | ||
| 5337 | |||
| 5338 | /* | 5334 | /* |
| 5339 | * see run_scheduled_bios for a description of why bios are collected for | 5335 | * see run_scheduled_bios for a description of why bios are collected for |
| 5340 | * async submit. | 5336 | * async submit. |
| @@ -5391,8 +5387,8 @@ static noinline void btrfs_schedule_bio(struct btrfs_root *root, | |||
| 5391 | spin_unlock(&device->io_lock); | 5387 | spin_unlock(&device->io_lock); |
| 5392 | 5388 | ||
| 5393 | if (should_queue) | 5389 | if (should_queue) |
| 5394 | btrfs_queue_worker(&root->fs_info->submit_workers, | 5390 | btrfs_queue_work(root->fs_info->submit_workers, |
| 5395 | &device->work); | 5391 | &device->work); |
| 5396 | } | 5392 | } |
| 5397 | 5393 | ||
| 5398 | static int bio_size_ok(struct block_device *bdev, struct bio *bio, | 5394 | static int bio_size_ok(struct block_device *bdev, struct bio *bio, |
| @@ -5447,6 +5443,9 @@ static void submit_stripe_bio(struct btrfs_root *root, struct btrfs_bio *bbio, | |||
| 5447 | } | 5443 | } |
| 5448 | #endif | 5444 | #endif |
| 5449 | bio->bi_bdev = dev->bdev; | 5445 | bio->bi_bdev = dev->bdev; |
| 5446 | |||
| 5447 | btrfs_bio_counter_inc_noblocked(root->fs_info); | ||
| 5448 | |||
| 5450 | if (async) | 5449 | if (async) |
| 5451 | btrfs_schedule_bio(root, dev, rw, bio); | 5450 | btrfs_schedule_bio(root, dev, rw, bio); |
| 5452 | else | 5451 | else |
| @@ -5515,28 +5514,38 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, | |||
| 5515 | length = bio->bi_iter.bi_size; | 5514 | length = bio->bi_iter.bi_size; |
| 5516 | map_length = length; | 5515 | map_length = length; |
| 5517 | 5516 | ||
| 5517 | btrfs_bio_counter_inc_blocked(root->fs_info); | ||
| 5518 | ret = __btrfs_map_block(root->fs_info, rw, logical, &map_length, &bbio, | 5518 | ret = __btrfs_map_block(root->fs_info, rw, logical, &map_length, &bbio, |
| 5519 | mirror_num, &raid_map); | 5519 | mirror_num, &raid_map); |
| 5520 | if (ret) /* -ENOMEM */ | 5520 | if (ret) { |
| 5521 | btrfs_bio_counter_dec(root->fs_info); | ||
| 5521 | return ret; | 5522 | return ret; |
| 5523 | } | ||
| 5522 | 5524 | ||
| 5523 | total_devs = bbio->num_stripes; | 5525 | total_devs = bbio->num_stripes; |
| 5524 | bbio->orig_bio = first_bio; | 5526 | bbio->orig_bio = first_bio; |
| 5525 | bbio->private = first_bio->bi_private; | 5527 | bbio->private = first_bio->bi_private; |
| 5526 | bbio->end_io = first_bio->bi_end_io; | 5528 | bbio->end_io = first_bio->bi_end_io; |
| 5529 | bbio->fs_info = root->fs_info; | ||
| 5527 | atomic_set(&bbio->stripes_pending, bbio->num_stripes); | 5530 | atomic_set(&bbio->stripes_pending, bbio->num_stripes); |
| 5528 | 5531 | ||
| 5529 | if (raid_map) { | 5532 | if (raid_map) { |
| 5530 | /* In this case, map_length has been set to the length of | 5533 | /* In this case, map_length has been set to the length of |
| 5531 | a single stripe; not the whole write */ | 5534 | a single stripe; not the whole write */ |
| 5532 | if (rw & WRITE) { | 5535 | if (rw & WRITE) { |
| 5533 | return raid56_parity_write(root, bio, bbio, | 5536 | ret = raid56_parity_write(root, bio, bbio, |
| 5534 | raid_map, map_length); | 5537 | raid_map, map_length); |
| 5535 | } else { | 5538 | } else { |
| 5536 | return raid56_parity_recover(root, bio, bbio, | 5539 | ret = raid56_parity_recover(root, bio, bbio, |
| 5537 | raid_map, map_length, | 5540 | raid_map, map_length, |
| 5538 | mirror_num); | 5541 | mirror_num); |
| 5539 | } | 5542 | } |
| 5543 | /* | ||
| 5544 | * FIXME, replace dosen't support raid56 yet, please fix | ||
| 5545 | * it in the future. | ||
| 5546 | */ | ||
| 5547 | btrfs_bio_counter_dec(root->fs_info); | ||
| 5548 | return ret; | ||
| 5540 | } | 5549 | } |
| 5541 | 5550 | ||
| 5542 | if (map_length < length) { | 5551 | if (map_length < length) { |
| @@ -5578,6 +5587,7 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, | |||
| 5578 | async_submit); | 5587 | async_submit); |
| 5579 | dev_nr++; | 5588 | dev_nr++; |
| 5580 | } | 5589 | } |
| 5590 | btrfs_bio_counter_dec(root->fs_info); | ||
| 5581 | return 0; | 5591 | return 0; |
| 5582 | } | 5592 | } |
| 5583 | 5593 | ||
| @@ -5666,7 +5676,7 @@ struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info, | |||
| 5666 | else | 5676 | else |
| 5667 | generate_random_uuid(dev->uuid); | 5677 | generate_random_uuid(dev->uuid); |
| 5668 | 5678 | ||
| 5669 | dev->work.func = pending_bios_fn; | 5679 | btrfs_init_work(&dev->work, pending_bios_fn, NULL, NULL); |
| 5670 | 5680 | ||
| 5671 | return dev; | 5681 | return dev; |
| 5672 | } | 5682 | } |
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 8b3cd142b373..80754f9dd3df 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h | |||
| @@ -192,6 +192,7 @@ typedef void (btrfs_bio_end_io_t) (struct btrfs_bio *bio, int err); | |||
| 192 | 192 | ||
| 193 | struct btrfs_bio { | 193 | struct btrfs_bio { |
| 194 | atomic_t stripes_pending; | 194 | atomic_t stripes_pending; |
| 195 | struct btrfs_fs_info *fs_info; | ||
| 195 | bio_end_io_t *end_io; | 196 | bio_end_io_t *end_io; |
| 196 | struct bio *orig_bio; | 197 | struct bio *orig_bio; |
| 197 | void *private; | 198 | void *private; |
