diff options
Diffstat (limited to 'fs/btrfs')
38 files changed, 2344 insertions, 1879 deletions
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index c1e0b0caf9cc..5a201d81049c 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c | |||
| @@ -1,5 +1,6 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * Copyright (C) 2007 Oracle. All rights reserved. | 2 | * Copyright (C) 2007 Oracle. All rights reserved. |
| 3 | * Copyright (C) 2014 Fujitsu. All rights reserved. | ||
| 3 | * | 4 | * |
| 4 | * This program is free software; you can redistribute it and/or | 5 | * This program is free software; you can redistribute it and/or |
| 5 | * modify it under the terms of the GNU General Public | 6 | * modify it under the terms of the GNU General Public |
| @@ -21,708 +22,315 @@ | |||
| 21 | #include <linux/list.h> | 22 | #include <linux/list.h> |
| 22 | #include <linux/spinlock.h> | 23 | #include <linux/spinlock.h> |
| 23 | #include <linux/freezer.h> | 24 | #include <linux/freezer.h> |
| 25 | #include <linux/workqueue.h> | ||
| 24 | #include "async-thread.h" | 26 | #include "async-thread.h" |
| 27 | #include "ctree.h" | ||
| 28 | |||
| 29 | #define WORK_DONE_BIT 0 | ||
| 30 | #define WORK_ORDER_DONE_BIT 1 | ||
| 31 | #define WORK_HIGH_PRIO_BIT 2 | ||
| 32 | |||
| 33 | #define NO_THRESHOLD (-1) | ||
| 34 | #define DFT_THRESHOLD (32) | ||
| 35 | |||
| 36 | struct __btrfs_workqueue { | ||
| 37 | struct workqueue_struct *normal_wq; | ||
| 38 | /* List head pointing to ordered work list */ | ||
| 39 | struct list_head ordered_list; | ||
| 40 | |||
| 41 | /* Spinlock for ordered_list */ | ||
| 42 | spinlock_t list_lock; | ||
| 43 | |||
| 44 | /* Thresholding related variants */ | ||
| 45 | atomic_t pending; | ||
| 46 | int max_active; | ||
| 47 | int current_max; | ||
| 48 | int thresh; | ||
| 49 | unsigned int count; | ||
| 50 | spinlock_t thres_lock; | ||
| 51 | }; | ||
| 25 | 52 | ||
| 26 | #define WORK_QUEUED_BIT 0 | 53 | struct btrfs_workqueue { |
| 27 | #define WORK_DONE_BIT 1 | 54 | struct __btrfs_workqueue *normal; |
| 28 | #define WORK_ORDER_DONE_BIT 2 | 55 | struct __btrfs_workqueue *high; |
| 29 | #define WORK_HIGH_PRIO_BIT 3 | 56 | }; |
| 30 | |||
| 31 | /* | ||
| 32 | * container for the kthread task pointer and the list of pending work | ||
| 33 | * One of these is allocated per thread. | ||
| 34 | */ | ||
| 35 | struct btrfs_worker_thread { | ||
| 36 | /* pool we belong to */ | ||
| 37 | struct btrfs_workers *workers; | ||
| 38 | |||
| 39 | /* list of struct btrfs_work that are waiting for service */ | ||
| 40 | struct list_head pending; | ||
| 41 | struct list_head prio_pending; | ||
| 42 | |||
| 43 | /* list of worker threads from struct btrfs_workers */ | ||
| 44 | struct list_head worker_list; | ||
| 45 | |||
| 46 | /* kthread */ | ||
| 47 | struct task_struct *task; | ||
| 48 | 57 | ||
| 49 | /* number of things on the pending list */ | 58 | static inline struct __btrfs_workqueue |
| 50 | atomic_t num_pending; | 59 | *__btrfs_alloc_workqueue(const char *name, int flags, int max_active, |
| 60 | int thresh) | ||
| 61 | { | ||
| 62 | struct __btrfs_workqueue *ret = kzalloc(sizeof(*ret), GFP_NOFS); | ||
| 51 | 63 | ||
| 52 | /* reference counter for this struct */ | 64 | if (unlikely(!ret)) |
| 53 | atomic_t refs; | 65 | return NULL; |
| 54 | 66 | ||
| 55 | unsigned long sequence; | 67 | ret->max_active = max_active; |
| 68 | atomic_set(&ret->pending, 0); | ||
| 69 | if (thresh == 0) | ||
| 70 | thresh = DFT_THRESHOLD; | ||
| 71 | /* For low threshold, disabling threshold is a better choice */ | ||
| 72 | if (thresh < DFT_THRESHOLD) { | ||
| 73 | ret->current_max = max_active; | ||
| 74 | ret->thresh = NO_THRESHOLD; | ||
| 75 | } else { | ||
| 76 | ret->current_max = 1; | ||
| 77 | ret->thresh = thresh; | ||
| 78 | } | ||
| 56 | 79 | ||
| 57 | /* protects the pending list. */ | 80 | if (flags & WQ_HIGHPRI) |
| 58 | spinlock_t lock; | 81 | ret->normal_wq = alloc_workqueue("%s-%s-high", flags, |
| 82 | ret->max_active, | ||
| 83 | "btrfs", name); | ||
| 84 | else | ||
| 85 | ret->normal_wq = alloc_workqueue("%s-%s", flags, | ||
| 86 | ret->max_active, "btrfs", | ||
| 87 | name); | ||
| 88 | if (unlikely(!ret->normal_wq)) { | ||
| 89 | kfree(ret); | ||
| 90 | return NULL; | ||
| 91 | } | ||
| 59 | 92 | ||
| 60 | /* set to non-zero when this thread is already awake and kicking */ | 93 | INIT_LIST_HEAD(&ret->ordered_list); |
| 61 | int working; | 94 | spin_lock_init(&ret->list_lock); |
| 95 | spin_lock_init(&ret->thres_lock); | ||
| 96 | trace_btrfs_workqueue_alloc(ret, name, flags & WQ_HIGHPRI); | ||
| 97 | return ret; | ||
| 98 | } | ||
| 62 | 99 | ||
| 63 | /* are we currently idle */ | 100 | static inline void |
| 64 | int idle; | 101 | __btrfs_destroy_workqueue(struct __btrfs_workqueue *wq); |
| 65 | }; | ||
| 66 | 102 | ||
| 67 | static int __btrfs_start_workers(struct btrfs_workers *workers); | 103 | struct btrfs_workqueue *btrfs_alloc_workqueue(const char *name, |
| 104 | int flags, | ||
| 105 | int max_active, | ||
| 106 | int thresh) | ||
| 107 | { | ||
| 108 | struct btrfs_workqueue *ret = kzalloc(sizeof(*ret), GFP_NOFS); | ||
| 68 | 109 | ||
| 69 | /* | 110 | if (unlikely(!ret)) |
| 70 | * btrfs_start_workers uses kthread_run, which can block waiting for memory | 111 | return NULL; |
| 71 | * for a very long time. It will actually throttle on page writeback, | ||
| 72 | * and so it may not make progress until after our btrfs worker threads | ||
| 73 | * process all of the pending work structs in their queue | ||
| 74 | * | ||
| 75 | * This means we can't use btrfs_start_workers from inside a btrfs worker | ||
| 76 | * thread that is used as part of cleaning dirty memory, which pretty much | ||
| 77 | * involves all of the worker threads. | ||
| 78 | * | ||
| 79 | * Instead we have a helper queue who never has more than one thread | ||
| 80 | * where we scheduler thread start operations. This worker_start struct | ||
| 81 | * is used to contain the work and hold a pointer to the queue that needs | ||
| 82 | * another worker. | ||
| 83 | */ | ||
| 84 | struct worker_start { | ||
| 85 | struct btrfs_work work; | ||
| 86 | struct btrfs_workers *queue; | ||
| 87 | }; | ||
| 88 | 112 | ||
| 89 | static void start_new_worker_func(struct btrfs_work *work) | 113 | ret->normal = __btrfs_alloc_workqueue(name, flags & ~WQ_HIGHPRI, |
| 90 | { | 114 | max_active, thresh); |
| 91 | struct worker_start *start; | 115 | if (unlikely(!ret->normal)) { |
| 92 | start = container_of(work, struct worker_start, work); | 116 | kfree(ret); |
| 93 | __btrfs_start_workers(start->queue); | 117 | return NULL; |
| 94 | kfree(start); | 118 | } |
| 95 | } | ||
| 96 | 119 | ||
| 97 | /* | 120 | if (flags & WQ_HIGHPRI) { |
| 98 | * helper function to move a thread onto the idle list after it | 121 | ret->high = __btrfs_alloc_workqueue(name, flags, max_active, |
| 99 | * has finished some requests. | 122 | thresh); |
| 100 | */ | 123 | if (unlikely(!ret->high)) { |
| 101 | static void check_idle_worker(struct btrfs_worker_thread *worker) | 124 | __btrfs_destroy_workqueue(ret->normal); |
| 102 | { | 125 | kfree(ret); |
| 103 | if (!worker->idle && atomic_read(&worker->num_pending) < | 126 | return NULL; |
| 104 | worker->workers->idle_thresh / 2) { | ||
| 105 | unsigned long flags; | ||
| 106 | spin_lock_irqsave(&worker->workers->lock, flags); | ||
| 107 | worker->idle = 1; | ||
| 108 | |||
| 109 | /* the list may be empty if the worker is just starting */ | ||
| 110 | if (!list_empty(&worker->worker_list) && | ||
| 111 | !worker->workers->stopping) { | ||
| 112 | list_move(&worker->worker_list, | ||
| 113 | &worker->workers->idle_list); | ||
| 114 | } | 127 | } |
| 115 | spin_unlock_irqrestore(&worker->workers->lock, flags); | ||
| 116 | } | 128 | } |
| 129 | return ret; | ||
| 117 | } | 130 | } |
| 118 | 131 | ||
| 119 | /* | 132 | /* |
| 120 | * helper function to move a thread off the idle list after new | 133 | * Hook for threshold which will be called in btrfs_queue_work. |
| 121 | * pending work is added. | 134 | * This hook WILL be called in IRQ handler context, |
| 135 | * so workqueue_set_max_active MUST NOT be called in this hook | ||
| 122 | */ | 136 | */ |
| 123 | static void check_busy_worker(struct btrfs_worker_thread *worker) | 137 | static inline void thresh_queue_hook(struct __btrfs_workqueue *wq) |
| 124 | { | 138 | { |
| 125 | if (worker->idle && atomic_read(&worker->num_pending) >= | 139 | if (wq->thresh == NO_THRESHOLD) |
| 126 | worker->workers->idle_thresh) { | 140 | return; |
| 127 | unsigned long flags; | 141 | atomic_inc(&wq->pending); |
| 128 | spin_lock_irqsave(&worker->workers->lock, flags); | ||
| 129 | worker->idle = 0; | ||
| 130 | |||
| 131 | if (!list_empty(&worker->worker_list) && | ||
| 132 | !worker->workers->stopping) { | ||
| 133 | list_move_tail(&worker->worker_list, | ||
| 134 | &worker->workers->worker_list); | ||
| 135 | } | ||
| 136 | spin_unlock_irqrestore(&worker->workers->lock, flags); | ||
| 137 | } | ||
| 138 | } | 142 | } |
| 139 | 143 | ||
| 140 | static void check_pending_worker_creates(struct btrfs_worker_thread *worker) | 144 | /* |
| 145 | * Hook for threshold which will be called before executing the work, | ||
| 146 | * This hook is called in kthread content. | ||
| 147 | * So workqueue_set_max_active is called here. | ||
| 148 | */ | ||
| 149 | static inline void thresh_exec_hook(struct __btrfs_workqueue *wq) | ||
| 141 | { | 150 | { |
| 142 | struct btrfs_workers *workers = worker->workers; | 151 | int new_max_active; |
| 143 | struct worker_start *start; | 152 | long pending; |
| 144 | unsigned long flags; | 153 | int need_change = 0; |
| 145 | |||
| 146 | rmb(); | ||
| 147 | if (!workers->atomic_start_pending) | ||
| 148 | return; | ||
| 149 | 154 | ||
| 150 | start = kzalloc(sizeof(*start), GFP_NOFS); | 155 | if (wq->thresh == NO_THRESHOLD) |
| 151 | if (!start) | ||
| 152 | return; | 156 | return; |
| 153 | 157 | ||
| 154 | start->work.func = start_new_worker_func; | 158 | atomic_dec(&wq->pending); |
| 155 | start->queue = workers; | 159 | spin_lock(&wq->thres_lock); |
| 156 | 160 | /* | |
| 157 | spin_lock_irqsave(&workers->lock, flags); | 161 | * Use wq->count to limit the calling frequency of |
| 158 | if (!workers->atomic_start_pending) | 162 | * workqueue_set_max_active. |
| 159 | goto out; | 163 | */ |
| 160 | 164 | wq->count++; | |
| 161 | workers->atomic_start_pending = 0; | 165 | wq->count %= (wq->thresh / 4); |
| 162 | if (workers->num_workers + workers->num_workers_starting >= | 166 | if (!wq->count) |
| 163 | workers->max_workers) | 167 | goto out; |
| 164 | goto out; | 168 | new_max_active = wq->current_max; |
| 165 | |||
| 166 | workers->num_workers_starting += 1; | ||
| 167 | spin_unlock_irqrestore(&workers->lock, flags); | ||
| 168 | btrfs_queue_worker(workers->atomic_worker_start, &start->work); | ||
| 169 | return; | ||
| 170 | 169 | ||
| 170 | /* | ||
| 171 | * pending may be changed later, but it's OK since we really | ||
| 172 | * don't need it so accurate to calculate new_max_active. | ||
| 173 | */ | ||
| 174 | pending = atomic_read(&wq->pending); | ||
| 175 | if (pending > wq->thresh) | ||
| 176 | new_max_active++; | ||
| 177 | if (pending < wq->thresh / 2) | ||
| 178 | new_max_active--; | ||
| 179 | new_max_active = clamp_val(new_max_active, 1, wq->max_active); | ||
| 180 | if (new_max_active != wq->current_max) { | ||
| 181 | need_change = 1; | ||
| 182 | wq->current_max = new_max_active; | ||
| 183 | } | ||
| 171 | out: | 184 | out: |
| 172 | kfree(start); | 185 | spin_unlock(&wq->thres_lock); |
| 173 | spin_unlock_irqrestore(&workers->lock, flags); | 186 | |
| 187 | if (need_change) { | ||
| 188 | workqueue_set_max_active(wq->normal_wq, wq->current_max); | ||
| 189 | } | ||
| 174 | } | 190 | } |
| 175 | 191 | ||
| 176 | static noinline void run_ordered_completions(struct btrfs_workers *workers, | 192 | static void run_ordered_work(struct __btrfs_workqueue *wq) |
| 177 | struct btrfs_work *work) | ||
| 178 | { | 193 | { |
| 179 | if (!workers->ordered) | 194 | struct list_head *list = &wq->ordered_list; |
| 180 | return; | 195 | struct btrfs_work *work; |
| 181 | 196 | spinlock_t *lock = &wq->list_lock; | |
| 182 | set_bit(WORK_DONE_BIT, &work->flags); | 197 | unsigned long flags; |
| 183 | |||
| 184 | spin_lock(&workers->order_lock); | ||
| 185 | 198 | ||
| 186 | while (1) { | 199 | while (1) { |
| 187 | if (!list_empty(&workers->prio_order_list)) { | 200 | spin_lock_irqsave(lock, flags); |
| 188 | work = list_entry(workers->prio_order_list.next, | 201 | if (list_empty(list)) |
| 189 | struct btrfs_work, order_list); | ||
| 190 | } else if (!list_empty(&workers->order_list)) { | ||
| 191 | work = list_entry(workers->order_list.next, | ||
| 192 | struct btrfs_work, order_list); | ||
| 193 | } else { | ||
| 194 | break; | 202 | break; |
| 195 | } | 203 | work = list_entry(list->next, struct btrfs_work, |
| 204 | ordered_list); | ||
| 196 | if (!test_bit(WORK_DONE_BIT, &work->flags)) | 205 | if (!test_bit(WORK_DONE_BIT, &work->flags)) |
| 197 | break; | 206 | break; |
| 198 | 207 | ||
| 199 | /* we are going to call the ordered done function, but | 208 | /* |
| 209 | * we are going to call the ordered done function, but | ||
| 200 | * we leave the work item on the list as a barrier so | 210 | * we leave the work item on the list as a barrier so |
| 201 | * that later work items that are done don't have their | 211 | * that later work items that are done don't have their |
| 202 | * functions called before this one returns | 212 | * functions called before this one returns |
| 203 | */ | 213 | */ |
| 204 | if (test_and_set_bit(WORK_ORDER_DONE_BIT, &work->flags)) | 214 | if (test_and_set_bit(WORK_ORDER_DONE_BIT, &work->flags)) |
| 205 | break; | 215 | break; |
| 206 | 216 | trace_btrfs_ordered_sched(work); | |
| 207 | spin_unlock(&workers->order_lock); | 217 | spin_unlock_irqrestore(lock, flags); |
| 208 | |||
| 209 | work->ordered_func(work); | 218 | work->ordered_func(work); |
| 210 | 219 | ||
| 211 | /* now take the lock again and drop our item from the list */ | 220 | /* now take the lock again and drop our item from the list */ |
| 212 | spin_lock(&workers->order_lock); | 221 | spin_lock_irqsave(lock, flags); |
| 213 | list_del(&work->order_list); | 222 | list_del(&work->ordered_list); |
| 214 | spin_unlock(&workers->order_lock); | 223 | spin_unlock_irqrestore(lock, flags); |
| 215 | 224 | ||
| 216 | /* | 225 | /* |
| 217 | * we don't want to call the ordered free functions | 226 | * we don't want to call the ordered free functions |
| 218 | * with the lock held though | 227 | * with the lock held though |
| 219 | */ | 228 | */ |
| 220 | work->ordered_free(work); | 229 | work->ordered_free(work); |
| 221 | spin_lock(&workers->order_lock); | 230 | trace_btrfs_all_work_done(work); |
| 222 | } | ||
| 223 | |||
| 224 | spin_unlock(&workers->order_lock); | ||
| 225 | } | ||
| 226 | |||
| 227 | static void put_worker(struct btrfs_worker_thread *worker) | ||
| 228 | { | ||
| 229 | if (atomic_dec_and_test(&worker->refs)) | ||
| 230 | kfree(worker); | ||
| 231 | } | ||
| 232 | |||
| 233 | static int try_worker_shutdown(struct btrfs_worker_thread *worker) | ||
| 234 | { | ||
| 235 | int freeit = 0; | ||
| 236 | |||
| 237 | spin_lock_irq(&worker->lock); | ||
| 238 | spin_lock(&worker->workers->lock); | ||
| 239 | if (worker->workers->num_workers > 1 && | ||
| 240 | worker->idle && | ||
| 241 | !worker->working && | ||
| 242 | !list_empty(&worker->worker_list) && | ||
| 243 | list_empty(&worker->prio_pending) && | ||
| 244 | list_empty(&worker->pending) && | ||
| 245 | atomic_read(&worker->num_pending) == 0) { | ||
| 246 | freeit = 1; | ||
| 247 | list_del_init(&worker->worker_list); | ||
| 248 | worker->workers->num_workers--; | ||
| 249 | } | 231 | } |
| 250 | spin_unlock(&worker->workers->lock); | 232 | spin_unlock_irqrestore(lock, flags); |
| 251 | spin_unlock_irq(&worker->lock); | ||
| 252 | |||
| 253 | if (freeit) | ||
| 254 | put_worker(worker); | ||
| 255 | return freeit; | ||
| 256 | } | 233 | } |
| 257 | 234 | ||
| 258 | static struct btrfs_work *get_next_work(struct btrfs_worker_thread *worker, | 235 | static void normal_work_helper(struct work_struct *arg) |
| 259 | struct list_head *prio_head, | ||
| 260 | struct list_head *head) | ||
| 261 | { | 236 | { |
| 262 | struct btrfs_work *work = NULL; | ||
| 263 | struct list_head *cur = NULL; | ||
| 264 | |||
| 265 | if (!list_empty(prio_head)) | ||
| 266 | cur = prio_head->next; | ||
| 267 | |||
| 268 | smp_mb(); | ||
| 269 | if (!list_empty(&worker->prio_pending)) | ||
| 270 | goto refill; | ||
| 271 | |||
| 272 | if (!list_empty(head)) | ||
| 273 | cur = head->next; | ||
| 274 | |||
| 275 | if (cur) | ||
| 276 | goto out; | ||
| 277 | |||
| 278 | refill: | ||
| 279 | spin_lock_irq(&worker->lock); | ||
| 280 | list_splice_tail_init(&worker->prio_pending, prio_head); | ||
| 281 | list_splice_tail_init(&worker->pending, head); | ||
| 282 | |||
| 283 | if (!list_empty(prio_head)) | ||
| 284 | cur = prio_head->next; | ||
| 285 | else if (!list_empty(head)) | ||
| 286 | cur = head->next; | ||
| 287 | spin_unlock_irq(&worker->lock); | ||
| 288 | |||
| 289 | if (!cur) | ||
| 290 | goto out_fail; | ||
| 291 | |||
| 292 | out: | ||
| 293 | work = list_entry(cur, struct btrfs_work, list); | ||
| 294 | |||
| 295 | out_fail: | ||
| 296 | return work; | ||
| 297 | } | ||
| 298 | |||
| 299 | /* | ||
| 300 | * main loop for servicing work items | ||
| 301 | */ | ||
| 302 | static int worker_loop(void *arg) | ||
| 303 | { | ||
| 304 | struct btrfs_worker_thread *worker = arg; | ||
| 305 | struct list_head head; | ||
| 306 | struct list_head prio_head; | ||
| 307 | struct btrfs_work *work; | 237 | struct btrfs_work *work; |
| 238 | struct __btrfs_workqueue *wq; | ||
| 239 | int need_order = 0; | ||
| 308 | 240 | ||
| 309 | INIT_LIST_HEAD(&head); | 241 | work = container_of(arg, struct btrfs_work, normal_work); |
| 310 | INIT_LIST_HEAD(&prio_head); | 242 | /* |
| 311 | 243 | * We should not touch things inside work in the following cases: | |
| 312 | do { | 244 | * 1) after work->func() if it has no ordered_free |
| 313 | again: | 245 | * Since the struct is freed in work->func(). |
| 314 | while (1) { | 246 | * 2) after setting WORK_DONE_BIT |
| 315 | 247 | * The work may be freed in other threads almost instantly. | |
| 316 | 248 | * So we save the needed things here. | |
| 317 | work = get_next_work(worker, &prio_head, &head); | 249 | */ |
| 318 | if (!work) | 250 | if (work->ordered_func) |
| 319 | break; | 251 | need_order = 1; |
| 320 | 252 | wq = work->wq; | |
| 321 | list_del(&work->list); | 253 | |
| 322 | clear_bit(WORK_QUEUED_BIT, &work->flags); | 254 | trace_btrfs_work_sched(work); |
| 323 | 255 | thresh_exec_hook(wq); | |
| 324 | work->worker = worker; | 256 | work->func(work); |
| 325 | 257 | if (need_order) { | |
| 326 | work->func(work); | 258 | set_bit(WORK_DONE_BIT, &work->flags); |
| 327 | 259 | run_ordered_work(wq); | |
| 328 | atomic_dec(&worker->num_pending); | ||
| 329 | /* | ||
| 330 | * unless this is an ordered work queue, | ||
| 331 | * 'work' was probably freed by func above. | ||
| 332 | */ | ||
| 333 | run_ordered_completions(worker->workers, work); | ||
| 334 | |||
| 335 | check_pending_worker_creates(worker); | ||
| 336 | cond_resched(); | ||
| 337 | } | ||
| 338 | |||
| 339 | spin_lock_irq(&worker->lock); | ||
| 340 | check_idle_worker(worker); | ||
| 341 | |||
| 342 | if (freezing(current)) { | ||
| 343 | worker->working = 0; | ||
| 344 | spin_unlock_irq(&worker->lock); | ||
| 345 | try_to_freeze(); | ||
| 346 | } else { | ||
| 347 | spin_unlock_irq(&worker->lock); | ||
| 348 | if (!kthread_should_stop()) { | ||
| 349 | cpu_relax(); | ||
| 350 | /* | ||
| 351 | * we've dropped the lock, did someone else | ||
| 352 | * jump_in? | ||
| 353 | */ | ||
| 354 | smp_mb(); | ||
| 355 | if (!list_empty(&worker->pending) || | ||
| 356 | !list_empty(&worker->prio_pending)) | ||
| 357 | continue; | ||
| 358 | |||
| 359 | /* | ||
| 360 | * this short schedule allows more work to | ||
| 361 | * come in without the queue functions | ||
| 362 | * needing to go through wake_up_process() | ||
| 363 | * | ||
| 364 | * worker->working is still 1, so nobody | ||
| 365 | * is going to try and wake us up | ||
| 366 | */ | ||
| 367 | schedule_timeout(1); | ||
| 368 | smp_mb(); | ||
| 369 | if (!list_empty(&worker->pending) || | ||
| 370 | !list_empty(&worker->prio_pending)) | ||
| 371 | continue; | ||
| 372 | |||
| 373 | if (kthread_should_stop()) | ||
| 374 | break; | ||
| 375 | |||
| 376 | /* still no more work?, sleep for real */ | ||
| 377 | spin_lock_irq(&worker->lock); | ||
| 378 | set_current_state(TASK_INTERRUPTIBLE); | ||
| 379 | if (!list_empty(&worker->pending) || | ||
| 380 | !list_empty(&worker->prio_pending)) { | ||
| 381 | spin_unlock_irq(&worker->lock); | ||
| 382 | set_current_state(TASK_RUNNING); | ||
| 383 | goto again; | ||
| 384 | } | ||
| 385 | |||
| 386 | /* | ||
| 387 | * this makes sure we get a wakeup when someone | ||
| 388 | * adds something new to the queue | ||
| 389 | */ | ||
| 390 | worker->working = 0; | ||
| 391 | spin_unlock_irq(&worker->lock); | ||
| 392 | |||
| 393 | if (!kthread_should_stop()) { | ||
| 394 | schedule_timeout(HZ * 120); | ||
| 395 | if (!worker->working && | ||
| 396 | try_worker_shutdown(worker)) { | ||
| 397 | return 0; | ||
| 398 | } | ||
| 399 | } | ||
| 400 | } | ||
| 401 | __set_current_state(TASK_RUNNING); | ||
| 402 | } | ||
| 403 | } while (!kthread_should_stop()); | ||
| 404 | return 0; | ||
| 405 | } | ||
| 406 | |||
| 407 | /* | ||
| 408 | * this will wait for all the worker threads to shutdown | ||
| 409 | */ | ||
| 410 | void btrfs_stop_workers(struct btrfs_workers *workers) | ||
| 411 | { | ||
| 412 | struct list_head *cur; | ||
| 413 | struct btrfs_worker_thread *worker; | ||
| 414 | int can_stop; | ||
| 415 | |||
| 416 | spin_lock_irq(&workers->lock); | ||
| 417 | workers->stopping = 1; | ||
| 418 | list_splice_init(&workers->idle_list, &workers->worker_list); | ||
| 419 | while (!list_empty(&workers->worker_list)) { | ||
| 420 | cur = workers->worker_list.next; | ||
| 421 | worker = list_entry(cur, struct btrfs_worker_thread, | ||
| 422 | worker_list); | ||
| 423 | |||
| 424 | atomic_inc(&worker->refs); | ||
| 425 | workers->num_workers -= 1; | ||
| 426 | if (!list_empty(&worker->worker_list)) { | ||
| 427 | list_del_init(&worker->worker_list); | ||
| 428 | put_worker(worker); | ||
| 429 | can_stop = 1; | ||
| 430 | } else | ||
| 431 | can_stop = 0; | ||
| 432 | spin_unlock_irq(&workers->lock); | ||
| 433 | if (can_stop) | ||
| 434 | kthread_stop(worker->task); | ||
| 435 | spin_lock_irq(&workers->lock); | ||
| 436 | put_worker(worker); | ||
| 437 | } | 260 | } |
| 438 | spin_unlock_irq(&workers->lock); | 261 | if (!need_order) |
| 262 | trace_btrfs_all_work_done(work); | ||
| 439 | } | 263 | } |
| 440 | 264 | ||
| 441 | /* | 265 | void btrfs_init_work(struct btrfs_work *work, |
| 442 | * simple init on struct btrfs_workers | 266 | btrfs_func_t func, |
| 443 | */ | 267 | btrfs_func_t ordered_func, |
| 444 | void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max, | 268 | btrfs_func_t ordered_free) |
| 445 | struct btrfs_workers *async_helper) | ||
| 446 | { | 269 | { |
| 447 | workers->num_workers = 0; | 270 | work->func = func; |
| 448 | workers->num_workers_starting = 0; | 271 | work->ordered_func = ordered_func; |
| 449 | INIT_LIST_HEAD(&workers->worker_list); | 272 | work->ordered_free = ordered_free; |
| 450 | INIT_LIST_HEAD(&workers->idle_list); | 273 | INIT_WORK(&work->normal_work, normal_work_helper); |
| 451 | INIT_LIST_HEAD(&workers->order_list); | 274 | INIT_LIST_HEAD(&work->ordered_list); |
| 452 | INIT_LIST_HEAD(&workers->prio_order_list); | 275 | work->flags = 0; |
| 453 | spin_lock_init(&workers->lock); | ||
| 454 | spin_lock_init(&workers->order_lock); | ||
| 455 | workers->max_workers = max; | ||
| 456 | workers->idle_thresh = 32; | ||
| 457 | workers->name = name; | ||
| 458 | workers->ordered = 0; | ||
| 459 | workers->atomic_start_pending = 0; | ||
| 460 | workers->atomic_worker_start = async_helper; | ||
| 461 | workers->stopping = 0; | ||
| 462 | } | 276 | } |
| 463 | 277 | ||
| 464 | /* | 278 | static inline void __btrfs_queue_work(struct __btrfs_workqueue *wq, |
| 465 | * starts new worker threads. This does not enforce the max worker | 279 | struct btrfs_work *work) |
| 466 | * count in case you need to temporarily go past it. | ||
| 467 | */ | ||
| 468 | static int __btrfs_start_workers(struct btrfs_workers *workers) | ||
| 469 | { | 280 | { |
| 470 | struct btrfs_worker_thread *worker; | 281 | unsigned long flags; |
| 471 | int ret = 0; | ||
| 472 | |||
| 473 | worker = kzalloc(sizeof(*worker), GFP_NOFS); | ||
| 474 | if (!worker) { | ||
| 475 | ret = -ENOMEM; | ||
| 476 | goto fail; | ||
| 477 | } | ||
| 478 | |||
| 479 | INIT_LIST_HEAD(&worker->pending); | ||
| 480 | INIT_LIST_HEAD(&worker->prio_pending); | ||
| 481 | INIT_LIST_HEAD(&worker->worker_list); | ||
| 482 | spin_lock_init(&worker->lock); | ||
| 483 | |||
| 484 | atomic_set(&worker->num_pending, 0); | ||
| 485 | atomic_set(&worker->refs, 1); | ||
| 486 | worker->workers = workers; | ||
| 487 | worker->task = kthread_create(worker_loop, worker, | ||
| 488 | "btrfs-%s-%d", workers->name, | ||
| 489 | workers->num_workers + 1); | ||
| 490 | if (IS_ERR(worker->task)) { | ||
| 491 | ret = PTR_ERR(worker->task); | ||
| 492 | goto fail; | ||
| 493 | } | ||
| 494 | 282 | ||
| 495 | spin_lock_irq(&workers->lock); | 283 | work->wq = wq; |
| 496 | if (workers->stopping) { | 284 | thresh_queue_hook(wq); |
| 497 | spin_unlock_irq(&workers->lock); | 285 | if (work->ordered_func) { |
| 498 | ret = -EINVAL; | 286 | spin_lock_irqsave(&wq->list_lock, flags); |
| 499 | goto fail_kthread; | 287 | list_add_tail(&work->ordered_list, &wq->ordered_list); |
| 288 | spin_unlock_irqrestore(&wq->list_lock, flags); | ||
| 500 | } | 289 | } |
| 501 | list_add_tail(&worker->worker_list, &workers->idle_list); | 290 | queue_work(wq->normal_wq, &work->normal_work); |
| 502 | worker->idle = 1; | 291 | trace_btrfs_work_queued(work); |
| 503 | workers->num_workers++; | ||
| 504 | workers->num_workers_starting--; | ||
| 505 | WARN_ON(workers->num_workers_starting < 0); | ||
| 506 | spin_unlock_irq(&workers->lock); | ||
| 507 | |||
| 508 | wake_up_process(worker->task); | ||
| 509 | return 0; | ||
| 510 | |||
| 511 | fail_kthread: | ||
| 512 | kthread_stop(worker->task); | ||
| 513 | fail: | ||
| 514 | kfree(worker); | ||
| 515 | spin_lock_irq(&workers->lock); | ||
| 516 | workers->num_workers_starting--; | ||
| 517 | spin_unlock_irq(&workers->lock); | ||
| 518 | return ret; | ||
| 519 | } | 292 | } |
| 520 | 293 | ||
| 521 | int btrfs_start_workers(struct btrfs_workers *workers) | 294 | void btrfs_queue_work(struct btrfs_workqueue *wq, |
| 522 | { | 295 | struct btrfs_work *work) |
| 523 | spin_lock_irq(&workers->lock); | ||
| 524 | workers->num_workers_starting++; | ||
| 525 | spin_unlock_irq(&workers->lock); | ||
| 526 | return __btrfs_start_workers(workers); | ||
| 527 | } | ||
| 528 | |||
| 529 | /* | ||
| 530 | * run through the list and find a worker thread that doesn't have a lot | ||
| 531 | * to do right now. This can return null if we aren't yet at the thread | ||
| 532 | * count limit and all of the threads are busy. | ||
| 533 | */ | ||
| 534 | static struct btrfs_worker_thread *next_worker(struct btrfs_workers *workers) | ||
| 535 | { | 296 | { |
| 536 | struct btrfs_worker_thread *worker; | 297 | struct __btrfs_workqueue *dest_wq; |
| 537 | struct list_head *next; | ||
| 538 | int enforce_min; | ||
| 539 | 298 | ||
| 540 | enforce_min = (workers->num_workers + workers->num_workers_starting) < | 299 | if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags) && wq->high) |
| 541 | workers->max_workers; | 300 | dest_wq = wq->high; |
| 542 | 301 | else | |
| 543 | /* | 302 | dest_wq = wq->normal; |
| 544 | * if we find an idle thread, don't move it to the end of the | 303 | __btrfs_queue_work(dest_wq, work); |
| 545 | * idle list. This improves the chance that the next submission | ||
| 546 | * will reuse the same thread, and maybe catch it while it is still | ||
| 547 | * working | ||
| 548 | */ | ||
| 549 | if (!list_empty(&workers->idle_list)) { | ||
| 550 | next = workers->idle_list.next; | ||
| 551 | worker = list_entry(next, struct btrfs_worker_thread, | ||
| 552 | worker_list); | ||
| 553 | return worker; | ||
| 554 | } | ||
| 555 | if (enforce_min || list_empty(&workers->worker_list)) | ||
| 556 | return NULL; | ||
| 557 | |||
| 558 | /* | ||
| 559 | * if we pick a busy task, move the task to the end of the list. | ||
| 560 | * hopefully this will keep things somewhat evenly balanced. | ||
| 561 | * Do the move in batches based on the sequence number. This groups | ||
| 562 | * requests submitted at roughly the same time onto the same worker. | ||
| 563 | */ | ||
| 564 | next = workers->worker_list.next; | ||
| 565 | worker = list_entry(next, struct btrfs_worker_thread, worker_list); | ||
| 566 | worker->sequence++; | ||
| 567 | |||
| 568 | if (worker->sequence % workers->idle_thresh == 0) | ||
| 569 | list_move_tail(next, &workers->worker_list); | ||
| 570 | return worker; | ||
| 571 | } | 304 | } |
| 572 | 305 | ||
| 573 | /* | 306 | static inline void |
| 574 | * selects a worker thread to take the next job. This will either find | 307 | __btrfs_destroy_workqueue(struct __btrfs_workqueue *wq) |
| 575 | * an idle worker, start a new worker up to the max count, or just return | ||
| 576 | * one of the existing busy workers. | ||
| 577 | */ | ||
| 578 | static struct btrfs_worker_thread *find_worker(struct btrfs_workers *workers) | ||
| 579 | { | 308 | { |
| 580 | struct btrfs_worker_thread *worker; | 309 | destroy_workqueue(wq->normal_wq); |
| 581 | unsigned long flags; | 310 | trace_btrfs_workqueue_destroy(wq); |
| 582 | struct list_head *fallback; | 311 | kfree(wq); |
| 583 | int ret; | ||
| 584 | |||
| 585 | spin_lock_irqsave(&workers->lock, flags); | ||
| 586 | again: | ||
| 587 | worker = next_worker(workers); | ||
| 588 | |||
| 589 | if (!worker) { | ||
| 590 | if (workers->num_workers + workers->num_workers_starting >= | ||
| 591 | workers->max_workers) { | ||
| 592 | goto fallback; | ||
| 593 | } else if (workers->atomic_worker_start) { | ||
| 594 | workers->atomic_start_pending = 1; | ||
| 595 | goto fallback; | ||
| 596 | } else { | ||
| 597 | workers->num_workers_starting++; | ||
| 598 | spin_unlock_irqrestore(&workers->lock, flags); | ||
| 599 | /* we're below the limit, start another worker */ | ||
| 600 | ret = __btrfs_start_workers(workers); | ||
| 601 | spin_lock_irqsave(&workers->lock, flags); | ||
| 602 | if (ret) | ||
| 603 | goto fallback; | ||
| 604 | goto again; | ||
| 605 | } | ||
| 606 | } | ||
| 607 | goto found; | ||
| 608 | |||
| 609 | fallback: | ||
| 610 | fallback = NULL; | ||
| 611 | /* | ||
| 612 | * we have failed to find any workers, just | ||
| 613 | * return the first one we can find. | ||
| 614 | */ | ||
| 615 | if (!list_empty(&workers->worker_list)) | ||
| 616 | fallback = workers->worker_list.next; | ||
| 617 | if (!list_empty(&workers->idle_list)) | ||
| 618 | fallback = workers->idle_list.next; | ||
| 619 | BUG_ON(!fallback); | ||
| 620 | worker = list_entry(fallback, | ||
| 621 | struct btrfs_worker_thread, worker_list); | ||
| 622 | found: | ||
| 623 | /* | ||
| 624 | * this makes sure the worker doesn't exit before it is placed | ||
| 625 | * onto a busy/idle list | ||
| 626 | */ | ||
| 627 | atomic_inc(&worker->num_pending); | ||
| 628 | spin_unlock_irqrestore(&workers->lock, flags); | ||
| 629 | return worker; | ||
| 630 | } | 312 | } |
| 631 | 313 | ||
| 632 | /* | 314 | void btrfs_destroy_workqueue(struct btrfs_workqueue *wq) |
| 633 | * btrfs_requeue_work just puts the work item back on the tail of the list | ||
| 634 | * it was taken from. It is intended for use with long running work functions | ||
| 635 | * that make some progress and want to give the cpu up for others. | ||
| 636 | */ | ||
| 637 | void btrfs_requeue_work(struct btrfs_work *work) | ||
| 638 | { | 315 | { |
| 639 | struct btrfs_worker_thread *worker = work->worker; | 316 | if (!wq) |
| 640 | unsigned long flags; | ||
| 641 | int wake = 0; | ||
| 642 | |||
| 643 | if (test_and_set_bit(WORK_QUEUED_BIT, &work->flags)) | ||
| 644 | return; | 317 | return; |
| 645 | 318 | if (wq->high) | |
| 646 | spin_lock_irqsave(&worker->lock, flags); | 319 | __btrfs_destroy_workqueue(wq->high); |
| 647 | if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags)) | 320 | __btrfs_destroy_workqueue(wq->normal); |
| 648 | list_add_tail(&work->list, &worker->prio_pending); | 321 | kfree(wq); |
| 649 | else | ||
| 650 | list_add_tail(&work->list, &worker->pending); | ||
| 651 | atomic_inc(&worker->num_pending); | ||
| 652 | |||
| 653 | /* by definition we're busy, take ourselves off the idle | ||
| 654 | * list | ||
| 655 | */ | ||
| 656 | if (worker->idle) { | ||
| 657 | spin_lock(&worker->workers->lock); | ||
| 658 | worker->idle = 0; | ||
| 659 | list_move_tail(&worker->worker_list, | ||
| 660 | &worker->workers->worker_list); | ||
| 661 | spin_unlock(&worker->workers->lock); | ||
| 662 | } | ||
| 663 | if (!worker->working) { | ||
| 664 | wake = 1; | ||
| 665 | worker->working = 1; | ||
| 666 | } | ||
| 667 | |||
| 668 | if (wake) | ||
| 669 | wake_up_process(worker->task); | ||
| 670 | spin_unlock_irqrestore(&worker->lock, flags); | ||
| 671 | } | 322 | } |
| 672 | 323 | ||
| 673 | void btrfs_set_work_high_prio(struct btrfs_work *work) | 324 | void btrfs_workqueue_set_max(struct btrfs_workqueue *wq, int max) |
| 674 | { | 325 | { |
| 675 | set_bit(WORK_HIGH_PRIO_BIT, &work->flags); | 326 | if (!wq) |
| 327 | return; | ||
| 328 | wq->normal->max_active = max; | ||
| 329 | if (wq->high) | ||
| 330 | wq->high->max_active = max; | ||
| 676 | } | 331 | } |
| 677 | 332 | ||
| 678 | /* | 333 | void btrfs_set_work_high_priority(struct btrfs_work *work) |
| 679 | * places a struct btrfs_work into the pending queue of one of the kthreads | ||
| 680 | */ | ||
| 681 | void btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work) | ||
| 682 | { | 334 | { |
| 683 | struct btrfs_worker_thread *worker; | 335 | set_bit(WORK_HIGH_PRIO_BIT, &work->flags); |
| 684 | unsigned long flags; | ||
| 685 | int wake = 0; | ||
| 686 | |||
| 687 | /* don't requeue something already on a list */ | ||
| 688 | if (test_and_set_bit(WORK_QUEUED_BIT, &work->flags)) | ||
| 689 | return; | ||
| 690 | |||
| 691 | worker = find_worker(workers); | ||
| 692 | if (workers->ordered) { | ||
| 693 | /* | ||
| 694 | * you're not allowed to do ordered queues from an | ||
| 695 | * interrupt handler | ||
| 696 | */ | ||
| 697 | spin_lock(&workers->order_lock); | ||
| 698 | if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags)) { | ||
| 699 | list_add_tail(&work->order_list, | ||
| 700 | &workers->prio_order_list); | ||
| 701 | } else { | ||
| 702 | list_add_tail(&work->order_list, &workers->order_list); | ||
| 703 | } | ||
| 704 | spin_unlock(&workers->order_lock); | ||
| 705 | } else { | ||
| 706 | INIT_LIST_HEAD(&work->order_list); | ||
| 707 | } | ||
| 708 | |||
| 709 | spin_lock_irqsave(&worker->lock, flags); | ||
| 710 | |||
| 711 | if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags)) | ||
| 712 | list_add_tail(&work->list, &worker->prio_pending); | ||
| 713 | else | ||
| 714 | list_add_tail(&work->list, &worker->pending); | ||
| 715 | check_busy_worker(worker); | ||
| 716 | |||
| 717 | /* | ||
| 718 | * avoid calling into wake_up_process if this thread has already | ||
| 719 | * been kicked | ||
| 720 | */ | ||
| 721 | if (!worker->working) | ||
| 722 | wake = 1; | ||
| 723 | worker->working = 1; | ||
| 724 | |||
| 725 | if (wake) | ||
| 726 | wake_up_process(worker->task); | ||
| 727 | spin_unlock_irqrestore(&worker->lock, flags); | ||
| 728 | } | 336 | } |
diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h index 1f26792683ed..9c6b66d15fb0 100644 --- a/fs/btrfs/async-thread.h +++ b/fs/btrfs/async-thread.h | |||
| @@ -1,5 +1,6 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * Copyright (C) 2007 Oracle. All rights reserved. | 2 | * Copyright (C) 2007 Oracle. All rights reserved. |
| 3 | * Copyright (C) 2014 Fujitsu. All rights reserved. | ||
| 3 | * | 4 | * |
| 4 | * This program is free software; you can redistribute it and/or | 5 | * This program is free software; you can redistribute it and/or |
| 5 | * modify it under the terms of the GNU General Public | 6 | * modify it under the terms of the GNU General Public |
| @@ -19,103 +20,35 @@ | |||
| 19 | #ifndef __BTRFS_ASYNC_THREAD_ | 20 | #ifndef __BTRFS_ASYNC_THREAD_ |
| 20 | #define __BTRFS_ASYNC_THREAD_ | 21 | #define __BTRFS_ASYNC_THREAD_ |
| 21 | 22 | ||
| 22 | struct btrfs_worker_thread; | 23 | struct btrfs_workqueue; |
| 24 | /* Internal use only */ | ||
| 25 | struct __btrfs_workqueue; | ||
| 26 | struct btrfs_work; | ||
| 27 | typedef void (*btrfs_func_t)(struct btrfs_work *arg); | ||
| 23 | 28 | ||
| 24 | /* | ||
| 25 | * This is similar to a workqueue, but it is meant to spread the operations | ||
| 26 | * across all available cpus instead of just the CPU that was used to | ||
| 27 | * queue the work. There is also some batching introduced to try and | ||
| 28 | * cut down on context switches. | ||
| 29 | * | ||
| 30 | * By default threads are added on demand up to 2 * the number of cpus. | ||
| 31 | * Changing struct btrfs_workers->max_workers is one way to prevent | ||
| 32 | * demand creation of kthreads. | ||
| 33 | * | ||
| 34 | * the basic model of these worker threads is to embed a btrfs_work | ||
| 35 | * structure in your own data struct, and use container_of in a | ||
| 36 | * work function to get back to your data struct. | ||
| 37 | */ | ||
| 38 | struct btrfs_work { | 29 | struct btrfs_work { |
| 39 | /* | 30 | btrfs_func_t func; |
| 40 | * func should be set to the function you want called | 31 | btrfs_func_t ordered_func; |
| 41 | * your work struct is passed as the only arg | 32 | btrfs_func_t ordered_free; |
| 42 | * | 33 | |
| 43 | * ordered_func must be set for work sent to an ordered work queue, | 34 | /* Don't touch things below */ |
| 44 | * and it is called to complete a given work item in the same | 35 | struct work_struct normal_work; |
| 45 | * order they were sent to the queue. | 36 | struct list_head ordered_list; |
| 46 | */ | 37 | struct __btrfs_workqueue *wq; |
| 47 | void (*func)(struct btrfs_work *work); | ||
| 48 | void (*ordered_func)(struct btrfs_work *work); | ||
| 49 | void (*ordered_free)(struct btrfs_work *work); | ||
| 50 | |||
| 51 | /* | ||
| 52 | * flags should be set to zero. It is used to make sure the | ||
| 53 | * struct is only inserted once into the list. | ||
| 54 | */ | ||
| 55 | unsigned long flags; | 38 | unsigned long flags; |
| 56 | |||
| 57 | /* don't touch these */ | ||
| 58 | struct btrfs_worker_thread *worker; | ||
| 59 | struct list_head list; | ||
| 60 | struct list_head order_list; | ||
| 61 | }; | ||
| 62 | |||
| 63 | struct btrfs_workers { | ||
| 64 | /* current number of running workers */ | ||
| 65 | int num_workers; | ||
| 66 | |||
| 67 | int num_workers_starting; | ||
| 68 | |||
| 69 | /* max number of workers allowed. changed by btrfs_start_workers */ | ||
| 70 | int max_workers; | ||
| 71 | |||
| 72 | /* once a worker has this many requests or fewer, it is idle */ | ||
| 73 | int idle_thresh; | ||
| 74 | |||
| 75 | /* force completions in the order they were queued */ | ||
| 76 | int ordered; | ||
| 77 | |||
| 78 | /* more workers required, but in an interrupt handler */ | ||
| 79 | int atomic_start_pending; | ||
| 80 | |||
| 81 | /* | ||
| 82 | * are we allowed to sleep while starting workers or are we required | ||
| 83 | * to start them at a later time? If we can't sleep, this indicates | ||
| 84 | * which queue we need to use to schedule thread creation. | ||
| 85 | */ | ||
| 86 | struct btrfs_workers *atomic_worker_start; | ||
| 87 | |||
| 88 | /* list with all the work threads. The workers on the idle thread | ||
| 89 | * may be actively servicing jobs, but they haven't yet hit the | ||
| 90 | * idle thresh limit above. | ||
| 91 | */ | ||
| 92 | struct list_head worker_list; | ||
| 93 | struct list_head idle_list; | ||
| 94 | |||
| 95 | /* | ||
| 96 | * when operating in ordered mode, this maintains the list | ||
| 97 | * of work items waiting for completion | ||
| 98 | */ | ||
| 99 | struct list_head order_list; | ||
| 100 | struct list_head prio_order_list; | ||
| 101 | |||
| 102 | /* lock for finding the next worker thread to queue on */ | ||
| 103 | spinlock_t lock; | ||
| 104 | |||
| 105 | /* lock for the ordered lists */ | ||
| 106 | spinlock_t order_lock; | ||
| 107 | |||
| 108 | /* extra name for this worker, used for current->name */ | ||
| 109 | char *name; | ||
| 110 | |||
| 111 | int stopping; | ||
| 112 | }; | 39 | }; |
| 113 | 40 | ||
| 114 | void btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work); | 41 | struct btrfs_workqueue *btrfs_alloc_workqueue(const char *name, |
| 115 | int btrfs_start_workers(struct btrfs_workers *workers); | 42 | int flags, |
| 116 | void btrfs_stop_workers(struct btrfs_workers *workers); | 43 | int max_active, |
| 117 | void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max, | 44 | int thresh); |
| 118 | struct btrfs_workers *async_starter); | 45 | void btrfs_init_work(struct btrfs_work *work, |
| 119 | void btrfs_requeue_work(struct btrfs_work *work); | 46 | btrfs_func_t func, |
| 120 | void btrfs_set_work_high_prio(struct btrfs_work *work); | 47 | btrfs_func_t ordered_func, |
| 48 | btrfs_func_t ordered_free); | ||
| 49 | void btrfs_queue_work(struct btrfs_workqueue *wq, | ||
| 50 | struct btrfs_work *work); | ||
| 51 | void btrfs_destroy_workqueue(struct btrfs_workqueue *wq); | ||
| 52 | void btrfs_workqueue_set_max(struct btrfs_workqueue *wq, int max); | ||
| 53 | void btrfs_set_work_high_priority(struct btrfs_work *work); | ||
| 121 | #endif | 54 | #endif |
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index aded3ef3d3d4..10db21fa0926 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c | |||
| @@ -220,7 +220,8 @@ static int __add_prelim_ref(struct list_head *head, u64 root_id, | |||
| 220 | 220 | ||
| 221 | static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path, | 221 | static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path, |
| 222 | struct ulist *parents, struct __prelim_ref *ref, | 222 | struct ulist *parents, struct __prelim_ref *ref, |
| 223 | int level, u64 time_seq, const u64 *extent_item_pos) | 223 | int level, u64 time_seq, const u64 *extent_item_pos, |
| 224 | u64 total_refs) | ||
| 224 | { | 225 | { |
| 225 | int ret = 0; | 226 | int ret = 0; |
| 226 | int slot; | 227 | int slot; |
| @@ -249,7 +250,7 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path, | |||
| 249 | if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) | 250 | if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) |
| 250 | ret = btrfs_next_old_leaf(root, path, time_seq); | 251 | ret = btrfs_next_old_leaf(root, path, time_seq); |
| 251 | 252 | ||
| 252 | while (!ret && count < ref->count) { | 253 | while (!ret && count < total_refs) { |
| 253 | eb = path->nodes[0]; | 254 | eb = path->nodes[0]; |
| 254 | slot = path->slots[0]; | 255 | slot = path->slots[0]; |
| 255 | 256 | ||
| @@ -306,7 +307,7 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info, | |||
| 306 | struct btrfs_path *path, u64 time_seq, | 307 | struct btrfs_path *path, u64 time_seq, |
| 307 | struct __prelim_ref *ref, | 308 | struct __prelim_ref *ref, |
| 308 | struct ulist *parents, | 309 | struct ulist *parents, |
| 309 | const u64 *extent_item_pos) | 310 | const u64 *extent_item_pos, u64 total_refs) |
| 310 | { | 311 | { |
| 311 | struct btrfs_root *root; | 312 | struct btrfs_root *root; |
| 312 | struct btrfs_key root_key; | 313 | struct btrfs_key root_key; |
| @@ -329,7 +330,10 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info, | |||
| 329 | goto out; | 330 | goto out; |
| 330 | } | 331 | } |
| 331 | 332 | ||
| 332 | root_level = btrfs_old_root_level(root, time_seq); | 333 | if (path->search_commit_root) |
| 334 | root_level = btrfs_header_level(root->commit_root); | ||
| 335 | else | ||
| 336 | root_level = btrfs_old_root_level(root, time_seq); | ||
| 333 | 337 | ||
| 334 | if (root_level + 1 == level) { | 338 | if (root_level + 1 == level) { |
| 335 | srcu_read_unlock(&fs_info->subvol_srcu, index); | 339 | srcu_read_unlock(&fs_info->subvol_srcu, index); |
| @@ -361,7 +365,7 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info, | |||
| 361 | } | 365 | } |
| 362 | 366 | ||
| 363 | ret = add_all_parents(root, path, parents, ref, level, time_seq, | 367 | ret = add_all_parents(root, path, parents, ref, level, time_seq, |
| 364 | extent_item_pos); | 368 | extent_item_pos, total_refs); |
| 365 | out: | 369 | out: |
| 366 | path->lowest_level = 0; | 370 | path->lowest_level = 0; |
| 367 | btrfs_release_path(path); | 371 | btrfs_release_path(path); |
| @@ -374,7 +378,7 @@ out: | |||
| 374 | static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info, | 378 | static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info, |
| 375 | struct btrfs_path *path, u64 time_seq, | 379 | struct btrfs_path *path, u64 time_seq, |
| 376 | struct list_head *head, | 380 | struct list_head *head, |
| 377 | const u64 *extent_item_pos) | 381 | const u64 *extent_item_pos, u64 total_refs) |
| 378 | { | 382 | { |
| 379 | int err; | 383 | int err; |
| 380 | int ret = 0; | 384 | int ret = 0; |
| @@ -400,7 +404,8 @@ static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info, | |||
| 400 | if (ref->count == 0) | 404 | if (ref->count == 0) |
| 401 | continue; | 405 | continue; |
| 402 | err = __resolve_indirect_ref(fs_info, path, time_seq, ref, | 406 | err = __resolve_indirect_ref(fs_info, path, time_seq, ref, |
| 403 | parents, extent_item_pos); | 407 | parents, extent_item_pos, |
| 408 | total_refs); | ||
| 404 | /* | 409 | /* |
| 405 | * we can only tolerate ENOENT,otherwise,we should catch error | 410 | * we can only tolerate ENOENT,otherwise,we should catch error |
| 406 | * and return directly. | 411 | * and return directly. |
| @@ -557,7 +562,7 @@ static void __merge_refs(struct list_head *head, int mode) | |||
| 557 | * smaller or equal that seq to the list | 562 | * smaller or equal that seq to the list |
| 558 | */ | 563 | */ |
| 559 | static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, | 564 | static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, |
| 560 | struct list_head *prefs) | 565 | struct list_head *prefs, u64 *total_refs) |
| 561 | { | 566 | { |
| 562 | struct btrfs_delayed_extent_op *extent_op = head->extent_op; | 567 | struct btrfs_delayed_extent_op *extent_op = head->extent_op; |
| 563 | struct rb_node *n = &head->node.rb_node; | 568 | struct rb_node *n = &head->node.rb_node; |
| @@ -593,6 +598,7 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, | |||
| 593 | default: | 598 | default: |
| 594 | BUG_ON(1); | 599 | BUG_ON(1); |
| 595 | } | 600 | } |
| 601 | *total_refs += (node->ref_mod * sgn); | ||
| 596 | switch (node->type) { | 602 | switch (node->type) { |
| 597 | case BTRFS_TREE_BLOCK_REF_KEY: { | 603 | case BTRFS_TREE_BLOCK_REF_KEY: { |
| 598 | struct btrfs_delayed_tree_ref *ref; | 604 | struct btrfs_delayed_tree_ref *ref; |
| @@ -653,7 +659,8 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, | |||
| 653 | */ | 659 | */ |
| 654 | static int __add_inline_refs(struct btrfs_fs_info *fs_info, | 660 | static int __add_inline_refs(struct btrfs_fs_info *fs_info, |
| 655 | struct btrfs_path *path, u64 bytenr, | 661 | struct btrfs_path *path, u64 bytenr, |
| 656 | int *info_level, struct list_head *prefs) | 662 | int *info_level, struct list_head *prefs, |
| 663 | u64 *total_refs) | ||
| 657 | { | 664 | { |
| 658 | int ret = 0; | 665 | int ret = 0; |
| 659 | int slot; | 666 | int slot; |
| @@ -677,6 +684,7 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info, | |||
| 677 | 684 | ||
| 678 | ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item); | 685 | ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item); |
| 679 | flags = btrfs_extent_flags(leaf, ei); | 686 | flags = btrfs_extent_flags(leaf, ei); |
| 687 | *total_refs += btrfs_extent_refs(leaf, ei); | ||
| 680 | btrfs_item_key_to_cpu(leaf, &found_key, slot); | 688 | btrfs_item_key_to_cpu(leaf, &found_key, slot); |
| 681 | 689 | ||
| 682 | ptr = (unsigned long)(ei + 1); | 690 | ptr = (unsigned long)(ei + 1); |
| @@ -859,6 +867,7 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans, | |||
| 859 | struct list_head prefs; | 867 | struct list_head prefs; |
| 860 | struct __prelim_ref *ref; | 868 | struct __prelim_ref *ref; |
| 861 | struct extent_inode_elem *eie = NULL; | 869 | struct extent_inode_elem *eie = NULL; |
| 870 | u64 total_refs = 0; | ||
| 862 | 871 | ||
| 863 | INIT_LIST_HEAD(&prefs); | 872 | INIT_LIST_HEAD(&prefs); |
| 864 | INIT_LIST_HEAD(&prefs_delayed); | 873 | INIT_LIST_HEAD(&prefs_delayed); |
| @@ -873,8 +882,10 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans, | |||
| 873 | path = btrfs_alloc_path(); | 882 | path = btrfs_alloc_path(); |
| 874 | if (!path) | 883 | if (!path) |
| 875 | return -ENOMEM; | 884 | return -ENOMEM; |
| 876 | if (!trans) | 885 | if (!trans) { |
| 877 | path->search_commit_root = 1; | 886 | path->search_commit_root = 1; |
| 887 | path->skip_locking = 1; | ||
| 888 | } | ||
| 878 | 889 | ||
| 879 | /* | 890 | /* |
| 880 | * grab both a lock on the path and a lock on the delayed ref head. | 891 | * grab both a lock on the path and a lock on the delayed ref head. |
| @@ -915,7 +926,7 @@ again: | |||
| 915 | } | 926 | } |
| 916 | spin_unlock(&delayed_refs->lock); | 927 | spin_unlock(&delayed_refs->lock); |
| 917 | ret = __add_delayed_refs(head, time_seq, | 928 | ret = __add_delayed_refs(head, time_seq, |
| 918 | &prefs_delayed); | 929 | &prefs_delayed, &total_refs); |
| 919 | mutex_unlock(&head->mutex); | 930 | mutex_unlock(&head->mutex); |
| 920 | if (ret) | 931 | if (ret) |
| 921 | goto out; | 932 | goto out; |
| @@ -936,7 +947,8 @@ again: | |||
| 936 | (key.type == BTRFS_EXTENT_ITEM_KEY || | 947 | (key.type == BTRFS_EXTENT_ITEM_KEY || |
| 937 | key.type == BTRFS_METADATA_ITEM_KEY)) { | 948 | key.type == BTRFS_METADATA_ITEM_KEY)) { |
| 938 | ret = __add_inline_refs(fs_info, path, bytenr, | 949 | ret = __add_inline_refs(fs_info, path, bytenr, |
| 939 | &info_level, &prefs); | 950 | &info_level, &prefs, |
| 951 | &total_refs); | ||
| 940 | if (ret) | 952 | if (ret) |
| 941 | goto out; | 953 | goto out; |
| 942 | ret = __add_keyed_refs(fs_info, path, bytenr, | 954 | ret = __add_keyed_refs(fs_info, path, bytenr, |
| @@ -956,7 +968,7 @@ again: | |||
| 956 | __merge_refs(&prefs, 1); | 968 | __merge_refs(&prefs, 1); |
| 957 | 969 | ||
| 958 | ret = __resolve_indirect_refs(fs_info, path, time_seq, &prefs, | 970 | ret = __resolve_indirect_refs(fs_info, path, time_seq, &prefs, |
| 959 | extent_item_pos); | 971 | extent_item_pos, total_refs); |
| 960 | if (ret) | 972 | if (ret) |
| 961 | goto out; | 973 | goto out; |
| 962 | 974 | ||
| @@ -965,7 +977,7 @@ again: | |||
| 965 | while (!list_empty(&prefs)) { | 977 | while (!list_empty(&prefs)) { |
| 966 | ref = list_first_entry(&prefs, struct __prelim_ref, list); | 978 | ref = list_first_entry(&prefs, struct __prelim_ref, list); |
| 967 | WARN_ON(ref->count < 0); | 979 | WARN_ON(ref->count < 0); |
| 968 | if (ref->count && ref->root_id && ref->parent == 0) { | 980 | if (roots && ref->count && ref->root_id && ref->parent == 0) { |
| 969 | /* no parent == root of tree */ | 981 | /* no parent == root of tree */ |
| 970 | ret = ulist_add(roots, ref->root_id, 0, GFP_NOFS); | 982 | ret = ulist_add(roots, ref->root_id, 0, GFP_NOFS); |
| 971 | if (ret < 0) | 983 | if (ret < 0) |
| @@ -1061,22 +1073,14 @@ static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans, | |||
| 1061 | u64 time_seq, struct ulist **leafs, | 1073 | u64 time_seq, struct ulist **leafs, |
| 1062 | const u64 *extent_item_pos) | 1074 | const u64 *extent_item_pos) |
| 1063 | { | 1075 | { |
| 1064 | struct ulist *tmp; | ||
| 1065 | int ret; | 1076 | int ret; |
| 1066 | 1077 | ||
| 1067 | tmp = ulist_alloc(GFP_NOFS); | ||
| 1068 | if (!tmp) | ||
| 1069 | return -ENOMEM; | ||
| 1070 | *leafs = ulist_alloc(GFP_NOFS); | 1078 | *leafs = ulist_alloc(GFP_NOFS); |
| 1071 | if (!*leafs) { | 1079 | if (!*leafs) |
| 1072 | ulist_free(tmp); | ||
| 1073 | return -ENOMEM; | 1080 | return -ENOMEM; |
| 1074 | } | ||
| 1075 | 1081 | ||
| 1076 | ret = find_parent_nodes(trans, fs_info, bytenr, | 1082 | ret = find_parent_nodes(trans, fs_info, bytenr, |
| 1077 | time_seq, *leafs, tmp, extent_item_pos); | 1083 | time_seq, *leafs, NULL, extent_item_pos); |
| 1078 | ulist_free(tmp); | ||
| 1079 | |||
| 1080 | if (ret < 0 && ret != -ENOENT) { | 1084 | if (ret < 0 && ret != -ENOENT) { |
| 1081 | free_leaf_list(*leafs); | 1085 | free_leaf_list(*leafs); |
| 1082 | return ret; | 1086 | return ret; |
| @@ -1098,9 +1102,9 @@ static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans, | |||
| 1098 | * | 1102 | * |
| 1099 | * returns 0 on success, < 0 on error. | 1103 | * returns 0 on success, < 0 on error. |
| 1100 | */ | 1104 | */ |
| 1101 | int btrfs_find_all_roots(struct btrfs_trans_handle *trans, | 1105 | static int __btrfs_find_all_roots(struct btrfs_trans_handle *trans, |
| 1102 | struct btrfs_fs_info *fs_info, u64 bytenr, | 1106 | struct btrfs_fs_info *fs_info, u64 bytenr, |
| 1103 | u64 time_seq, struct ulist **roots) | 1107 | u64 time_seq, struct ulist **roots) |
| 1104 | { | 1108 | { |
| 1105 | struct ulist *tmp; | 1109 | struct ulist *tmp; |
| 1106 | struct ulist_node *node = NULL; | 1110 | struct ulist_node *node = NULL; |
| @@ -1136,6 +1140,20 @@ int btrfs_find_all_roots(struct btrfs_trans_handle *trans, | |||
| 1136 | return 0; | 1140 | return 0; |
| 1137 | } | 1141 | } |
| 1138 | 1142 | ||
| 1143 | int btrfs_find_all_roots(struct btrfs_trans_handle *trans, | ||
| 1144 | struct btrfs_fs_info *fs_info, u64 bytenr, | ||
| 1145 | u64 time_seq, struct ulist **roots) | ||
| 1146 | { | ||
| 1147 | int ret; | ||
| 1148 | |||
| 1149 | if (!trans) | ||
| 1150 | down_read(&fs_info->commit_root_sem); | ||
| 1151 | ret = __btrfs_find_all_roots(trans, fs_info, bytenr, time_seq, roots); | ||
| 1152 | if (!trans) | ||
| 1153 | up_read(&fs_info->commit_root_sem); | ||
| 1154 | return ret; | ||
| 1155 | } | ||
| 1156 | |||
| 1139 | /* | 1157 | /* |
| 1140 | * this makes the path point to (inum INODE_ITEM ioff) | 1158 | * this makes the path point to (inum INODE_ITEM ioff) |
| 1141 | */ | 1159 | */ |
| @@ -1333,38 +1351,13 @@ int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical, | |||
| 1333 | if (ret < 0) | 1351 | if (ret < 0) |
| 1334 | return ret; | 1352 | return ret; |
| 1335 | 1353 | ||
| 1336 | while (1) { | 1354 | ret = btrfs_previous_extent_item(fs_info->extent_root, path, 0); |
| 1337 | u32 nritems; | 1355 | if (ret) { |
| 1338 | if (path->slots[0] == 0) { | 1356 | if (ret > 0) |
| 1339 | btrfs_set_path_blocking(path); | 1357 | ret = -ENOENT; |
| 1340 | ret = btrfs_prev_leaf(fs_info->extent_root, path); | 1358 | return ret; |
| 1341 | if (ret != 0) { | ||
| 1342 | if (ret > 0) { | ||
| 1343 | pr_debug("logical %llu is not within " | ||
| 1344 | "any extent\n", logical); | ||
| 1345 | ret = -ENOENT; | ||
| 1346 | } | ||
| 1347 | return ret; | ||
| 1348 | } | ||
| 1349 | } else { | ||
| 1350 | path->slots[0]--; | ||
| 1351 | } | ||
| 1352 | nritems = btrfs_header_nritems(path->nodes[0]); | ||
| 1353 | if (nritems == 0) { | ||
| 1354 | pr_debug("logical %llu is not within any extent\n", | ||
| 1355 | logical); | ||
| 1356 | return -ENOENT; | ||
| 1357 | } | ||
| 1358 | if (path->slots[0] == nritems) | ||
| 1359 | path->slots[0]--; | ||
| 1360 | |||
| 1361 | btrfs_item_key_to_cpu(path->nodes[0], found_key, | ||
| 1362 | path->slots[0]); | ||
| 1363 | if (found_key->type == BTRFS_EXTENT_ITEM_KEY || | ||
| 1364 | found_key->type == BTRFS_METADATA_ITEM_KEY) | ||
| 1365 | break; | ||
| 1366 | } | 1359 | } |
| 1367 | 1360 | btrfs_item_key_to_cpu(path->nodes[0], found_key, path->slots[0]); | |
| 1368 | if (found_key->type == BTRFS_METADATA_ITEM_KEY) | 1361 | if (found_key->type == BTRFS_METADATA_ITEM_KEY) |
| 1369 | size = fs_info->extent_root->leafsize; | 1362 | size = fs_info->extent_root->leafsize; |
| 1370 | else if (found_key->type == BTRFS_EXTENT_ITEM_KEY) | 1363 | else if (found_key->type == BTRFS_EXTENT_ITEM_KEY) |
| @@ -1540,6 +1533,8 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info, | |||
| 1540 | if (IS_ERR(trans)) | 1533 | if (IS_ERR(trans)) |
| 1541 | return PTR_ERR(trans); | 1534 | return PTR_ERR(trans); |
| 1542 | btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem); | 1535 | btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem); |
| 1536 | } else { | ||
| 1537 | down_read(&fs_info->commit_root_sem); | ||
| 1543 | } | 1538 | } |
| 1544 | 1539 | ||
| 1545 | ret = btrfs_find_all_leafs(trans, fs_info, extent_item_objectid, | 1540 | ret = btrfs_find_all_leafs(trans, fs_info, extent_item_objectid, |
| @@ -1550,8 +1545,8 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info, | |||
| 1550 | 1545 | ||
| 1551 | ULIST_ITER_INIT(&ref_uiter); | 1546 | ULIST_ITER_INIT(&ref_uiter); |
| 1552 | while (!ret && (ref_node = ulist_next(refs, &ref_uiter))) { | 1547 | while (!ret && (ref_node = ulist_next(refs, &ref_uiter))) { |
| 1553 | ret = btrfs_find_all_roots(trans, fs_info, ref_node->val, | 1548 | ret = __btrfs_find_all_roots(trans, fs_info, ref_node->val, |
| 1554 | tree_mod_seq_elem.seq, &roots); | 1549 | tree_mod_seq_elem.seq, &roots); |
| 1555 | if (ret) | 1550 | if (ret) |
| 1556 | break; | 1551 | break; |
| 1557 | ULIST_ITER_INIT(&root_uiter); | 1552 | ULIST_ITER_INIT(&root_uiter); |
| @@ -1573,6 +1568,8 @@ out: | |||
| 1573 | if (!search_commit_root) { | 1568 | if (!search_commit_root) { |
| 1574 | btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem); | 1569 | btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem); |
| 1575 | btrfs_end_transaction(trans, fs_info->extent_root); | 1570 | btrfs_end_transaction(trans, fs_info->extent_root); |
| 1571 | } else { | ||
| 1572 | up_read(&fs_info->commit_root_sem); | ||
| 1576 | } | 1573 | } |
| 1577 | 1574 | ||
| 1578 | return ret; | 1575 | return ret; |
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 8fed2125689e..c9a24444ec9a 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h | |||
| @@ -109,14 +109,17 @@ struct btrfs_inode { | |||
| 109 | u64 last_trans; | 109 | u64 last_trans; |
| 110 | 110 | ||
| 111 | /* | 111 | /* |
| 112 | * log transid when this inode was last modified | 112 | * transid that last logged this inode |
| 113 | */ | 113 | */ |
| 114 | u64 last_sub_trans; | 114 | u64 logged_trans; |
| 115 | 115 | ||
| 116 | /* | 116 | /* |
| 117 | * transid that last logged this inode | 117 | * log transid when this inode was last modified |
| 118 | */ | 118 | */ |
| 119 | u64 logged_trans; | 119 | int last_sub_trans; |
| 120 | |||
| 121 | /* a local copy of root's last_log_commit */ | ||
| 122 | int last_log_commit; | ||
| 120 | 123 | ||
| 121 | /* total number of bytes pending delalloc, used by stat to calc the | 124 | /* total number of bytes pending delalloc, used by stat to calc the |
| 122 | * real block usage of the file | 125 | * real block usage of the file |
| @@ -155,9 +158,6 @@ struct btrfs_inode { | |||
| 155 | /* flags field from the on disk inode */ | 158 | /* flags field from the on disk inode */ |
| 156 | u32 flags; | 159 | u32 flags; |
| 157 | 160 | ||
| 158 | /* a local copy of root's last_log_commit */ | ||
| 159 | unsigned long last_log_commit; | ||
| 160 | |||
| 161 | /* | 161 | /* |
| 162 | * Counters to keep track of the number of extent item's we may use due | 162 | * Counters to keep track of the number of extent item's we may use due |
| 163 | * to delalloc and such. outstanding_extents is the number of extent | 163 | * to delalloc and such. outstanding_extents is the number of extent |
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index b01fb6c527e3..d43c544d3b68 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c | |||
| @@ -472,7 +472,7 @@ static noinline int add_ra_bio_pages(struct inode *inode, | |||
| 472 | rcu_read_lock(); | 472 | rcu_read_lock(); |
| 473 | page = radix_tree_lookup(&mapping->page_tree, pg_index); | 473 | page = radix_tree_lookup(&mapping->page_tree, pg_index); |
| 474 | rcu_read_unlock(); | 474 | rcu_read_unlock(); |
| 475 | if (page) { | 475 | if (page && !radix_tree_exceptional_entry(page)) { |
| 476 | misses++; | 476 | misses++; |
| 477 | if (misses > 4) | 477 | if (misses > 4) |
| 478 | break; | 478 | break; |
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index cbd3a7d6fa68..1bcfcdb23cf4 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c | |||
| @@ -2769,9 +2769,13 @@ again: | |||
| 2769 | * the commit roots are read only | 2769 | * the commit roots are read only |
| 2770 | * so we always do read locks | 2770 | * so we always do read locks |
| 2771 | */ | 2771 | */ |
| 2772 | if (p->need_commit_sem) | ||
| 2773 | down_read(&root->fs_info->commit_root_sem); | ||
| 2772 | b = root->commit_root; | 2774 | b = root->commit_root; |
| 2773 | extent_buffer_get(b); | 2775 | extent_buffer_get(b); |
| 2774 | level = btrfs_header_level(b); | 2776 | level = btrfs_header_level(b); |
| 2777 | if (p->need_commit_sem) | ||
| 2778 | up_read(&root->fs_info->commit_root_sem); | ||
| 2775 | if (!p->skip_locking) | 2779 | if (!p->skip_locking) |
| 2776 | btrfs_tree_read_lock(b); | 2780 | btrfs_tree_read_lock(b); |
| 2777 | } else { | 2781 | } else { |
| @@ -5360,7 +5364,6 @@ int btrfs_compare_trees(struct btrfs_root *left_root, | |||
| 5360 | { | 5364 | { |
| 5361 | int ret; | 5365 | int ret; |
| 5362 | int cmp; | 5366 | int cmp; |
| 5363 | struct btrfs_trans_handle *trans = NULL; | ||
| 5364 | struct btrfs_path *left_path = NULL; | 5367 | struct btrfs_path *left_path = NULL; |
| 5365 | struct btrfs_path *right_path = NULL; | 5368 | struct btrfs_path *right_path = NULL; |
| 5366 | struct btrfs_key left_key; | 5369 | struct btrfs_key left_key; |
| @@ -5376,9 +5379,8 @@ int btrfs_compare_trees(struct btrfs_root *left_root, | |||
| 5376 | int advance_right; | 5379 | int advance_right; |
| 5377 | u64 left_blockptr; | 5380 | u64 left_blockptr; |
| 5378 | u64 right_blockptr; | 5381 | u64 right_blockptr; |
| 5379 | u64 left_start_ctransid; | 5382 | u64 left_gen; |
| 5380 | u64 right_start_ctransid; | 5383 | u64 right_gen; |
| 5381 | u64 ctransid; | ||
| 5382 | 5384 | ||
| 5383 | left_path = btrfs_alloc_path(); | 5385 | left_path = btrfs_alloc_path(); |
| 5384 | if (!left_path) { | 5386 | if (!left_path) { |
| @@ -5402,21 +5404,6 @@ int btrfs_compare_trees(struct btrfs_root *left_root, | |||
| 5402 | right_path->search_commit_root = 1; | 5404 | right_path->search_commit_root = 1; |
| 5403 | right_path->skip_locking = 1; | 5405 | right_path->skip_locking = 1; |
| 5404 | 5406 | ||
| 5405 | spin_lock(&left_root->root_item_lock); | ||
| 5406 | left_start_ctransid = btrfs_root_ctransid(&left_root->root_item); | ||
| 5407 | spin_unlock(&left_root->root_item_lock); | ||
| 5408 | |||
| 5409 | spin_lock(&right_root->root_item_lock); | ||
| 5410 | right_start_ctransid = btrfs_root_ctransid(&right_root->root_item); | ||
| 5411 | spin_unlock(&right_root->root_item_lock); | ||
| 5412 | |||
| 5413 | trans = btrfs_join_transaction(left_root); | ||
| 5414 | if (IS_ERR(trans)) { | ||
| 5415 | ret = PTR_ERR(trans); | ||
| 5416 | trans = NULL; | ||
| 5417 | goto out; | ||
| 5418 | } | ||
| 5419 | |||
| 5420 | /* | 5407 | /* |
| 5421 | * Strategy: Go to the first items of both trees. Then do | 5408 | * Strategy: Go to the first items of both trees. Then do |
| 5422 | * | 5409 | * |
| @@ -5453,6 +5440,7 @@ int btrfs_compare_trees(struct btrfs_root *left_root, | |||
| 5453 | * the right if possible or go up and right. | 5440 | * the right if possible or go up and right. |
| 5454 | */ | 5441 | */ |
| 5455 | 5442 | ||
| 5443 | down_read(&left_root->fs_info->commit_root_sem); | ||
| 5456 | left_level = btrfs_header_level(left_root->commit_root); | 5444 | left_level = btrfs_header_level(left_root->commit_root); |
| 5457 | left_root_level = left_level; | 5445 | left_root_level = left_level; |
| 5458 | left_path->nodes[left_level] = left_root->commit_root; | 5446 | left_path->nodes[left_level] = left_root->commit_root; |
| @@ -5462,6 +5450,7 @@ int btrfs_compare_trees(struct btrfs_root *left_root, | |||
| 5462 | right_root_level = right_level; | 5450 | right_root_level = right_level; |
| 5463 | right_path->nodes[right_level] = right_root->commit_root; | 5451 | right_path->nodes[right_level] = right_root->commit_root; |
| 5464 | extent_buffer_get(right_path->nodes[right_level]); | 5452 | extent_buffer_get(right_path->nodes[right_level]); |
| 5453 | up_read(&left_root->fs_info->commit_root_sem); | ||
| 5465 | 5454 | ||
| 5466 | if (left_level == 0) | 5455 | if (left_level == 0) |
| 5467 | btrfs_item_key_to_cpu(left_path->nodes[left_level], | 5456 | btrfs_item_key_to_cpu(left_path->nodes[left_level], |
| @@ -5480,67 +5469,6 @@ int btrfs_compare_trees(struct btrfs_root *left_root, | |||
| 5480 | advance_left = advance_right = 0; | 5469 | advance_left = advance_right = 0; |
| 5481 | 5470 | ||
| 5482 | while (1) { | 5471 | while (1) { |
| 5483 | /* | ||
| 5484 | * We need to make sure the transaction does not get committed | ||
| 5485 | * while we do anything on commit roots. This means, we need to | ||
| 5486 | * join and leave transactions for every item that we process. | ||
| 5487 | */ | ||
| 5488 | if (trans && btrfs_should_end_transaction(trans, left_root)) { | ||
| 5489 | btrfs_release_path(left_path); | ||
| 5490 | btrfs_release_path(right_path); | ||
| 5491 | |||
| 5492 | ret = btrfs_end_transaction(trans, left_root); | ||
| 5493 | trans = NULL; | ||
| 5494 | if (ret < 0) | ||
| 5495 | goto out; | ||
| 5496 | } | ||
| 5497 | /* now rejoin the transaction */ | ||
| 5498 | if (!trans) { | ||
| 5499 | trans = btrfs_join_transaction(left_root); | ||
| 5500 | if (IS_ERR(trans)) { | ||
| 5501 | ret = PTR_ERR(trans); | ||
| 5502 | trans = NULL; | ||
| 5503 | goto out; | ||
| 5504 | } | ||
| 5505 | |||
| 5506 | spin_lock(&left_root->root_item_lock); | ||
| 5507 | ctransid = btrfs_root_ctransid(&left_root->root_item); | ||
| 5508 | spin_unlock(&left_root->root_item_lock); | ||
| 5509 | if (ctransid != left_start_ctransid) | ||
| 5510 | left_start_ctransid = 0; | ||
| 5511 | |||
| 5512 | spin_lock(&right_root->root_item_lock); | ||
| 5513 | ctransid = btrfs_root_ctransid(&right_root->root_item); | ||
| 5514 | spin_unlock(&right_root->root_item_lock); | ||
| 5515 | if (ctransid != right_start_ctransid) | ||
| 5516 | right_start_ctransid = 0; | ||
| 5517 | |||
| 5518 | if (!left_start_ctransid || !right_start_ctransid) { | ||
| 5519 | WARN(1, KERN_WARNING | ||
| 5520 | "BTRFS: btrfs_compare_tree detected " | ||
| 5521 | "a change in one of the trees while " | ||
| 5522 | "iterating. This is probably a " | ||
| 5523 | "bug.\n"); | ||
| 5524 | ret = -EIO; | ||
| 5525 | goto out; | ||
| 5526 | } | ||
| 5527 | |||
| 5528 | /* | ||
| 5529 | * the commit root may have changed, so start again | ||
| 5530 | * where we stopped | ||
| 5531 | */ | ||
| 5532 | left_path->lowest_level = left_level; | ||
| 5533 | right_path->lowest_level = right_level; | ||
| 5534 | ret = btrfs_search_slot(NULL, left_root, | ||
| 5535 | &left_key, left_path, 0, 0); | ||
| 5536 | if (ret < 0) | ||
| 5537 | goto out; | ||
| 5538 | ret = btrfs_search_slot(NULL, right_root, | ||
| 5539 | &right_key, right_path, 0, 0); | ||
| 5540 | if (ret < 0) | ||
| 5541 | goto out; | ||
| 5542 | } | ||
| 5543 | |||
| 5544 | if (advance_left && !left_end_reached) { | 5472 | if (advance_left && !left_end_reached) { |
| 5545 | ret = tree_advance(left_root, left_path, &left_level, | 5473 | ret = tree_advance(left_root, left_path, &left_level, |
| 5546 | left_root_level, | 5474 | left_root_level, |
| @@ -5640,7 +5568,14 @@ int btrfs_compare_trees(struct btrfs_root *left_root, | |||
| 5640 | right_blockptr = btrfs_node_blockptr( | 5568 | right_blockptr = btrfs_node_blockptr( |
| 5641 | right_path->nodes[right_level], | 5569 | right_path->nodes[right_level], |
| 5642 | right_path->slots[right_level]); | 5570 | right_path->slots[right_level]); |
| 5643 | if (left_blockptr == right_blockptr) { | 5571 | left_gen = btrfs_node_ptr_generation( |
| 5572 | left_path->nodes[left_level], | ||
| 5573 | left_path->slots[left_level]); | ||
| 5574 | right_gen = btrfs_node_ptr_generation( | ||
| 5575 | right_path->nodes[right_level], | ||
| 5576 | right_path->slots[right_level]); | ||
| 5577 | if (left_blockptr == right_blockptr && | ||
| 5578 | left_gen == right_gen) { | ||
| 5644 | /* | 5579 | /* |
| 5645 | * As we're on a shared block, don't | 5580 | * As we're on a shared block, don't |
| 5646 | * allow to go deeper. | 5581 | * allow to go deeper. |
| @@ -5663,14 +5598,6 @@ out: | |||
| 5663 | btrfs_free_path(left_path); | 5598 | btrfs_free_path(left_path); |
| 5664 | btrfs_free_path(right_path); | 5599 | btrfs_free_path(right_path); |
| 5665 | kfree(tmp_buf); | 5600 | kfree(tmp_buf); |
| 5666 | |||
| 5667 | if (trans) { | ||
| 5668 | if (!ret) | ||
| 5669 | ret = btrfs_end_transaction(trans, left_root); | ||
| 5670 | else | ||
| 5671 | btrfs_end_transaction(trans, left_root); | ||
| 5672 | } | ||
| 5673 | |||
| 5674 | return ret; | 5601 | return ret; |
| 5675 | } | 5602 | } |
| 5676 | 5603 | ||
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 2c1a42ca519f..4c48df572bd6 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
| @@ -351,6 +351,7 @@ static inline unsigned long btrfs_chunk_item_size(int num_stripes) | |||
| 351 | #define BTRFS_FS_STATE_ERROR 0 | 351 | #define BTRFS_FS_STATE_ERROR 0 |
| 352 | #define BTRFS_FS_STATE_REMOUNTING 1 | 352 | #define BTRFS_FS_STATE_REMOUNTING 1 |
| 353 | #define BTRFS_FS_STATE_TRANS_ABORTED 2 | 353 | #define BTRFS_FS_STATE_TRANS_ABORTED 2 |
| 354 | #define BTRFS_FS_STATE_DEV_REPLACING 3 | ||
| 354 | 355 | ||
| 355 | /* Super block flags */ | 356 | /* Super block flags */ |
| 356 | /* Errors detected */ | 357 | /* Errors detected */ |
| @@ -608,6 +609,7 @@ struct btrfs_path { | |||
| 608 | unsigned int skip_locking:1; | 609 | unsigned int skip_locking:1; |
| 609 | unsigned int leave_spinning:1; | 610 | unsigned int leave_spinning:1; |
| 610 | unsigned int search_commit_root:1; | 611 | unsigned int search_commit_root:1; |
| 612 | unsigned int need_commit_sem:1; | ||
| 611 | }; | 613 | }; |
| 612 | 614 | ||
| 613 | /* | 615 | /* |
| @@ -985,7 +987,8 @@ struct btrfs_dev_replace_item { | |||
| 985 | #define BTRFS_BLOCK_GROUP_RAID10 (1ULL << 6) | 987 | #define BTRFS_BLOCK_GROUP_RAID10 (1ULL << 6) |
| 986 | #define BTRFS_BLOCK_GROUP_RAID5 (1ULL << 7) | 988 | #define BTRFS_BLOCK_GROUP_RAID5 (1ULL << 7) |
| 987 | #define BTRFS_BLOCK_GROUP_RAID6 (1ULL << 8) | 989 | #define BTRFS_BLOCK_GROUP_RAID6 (1ULL << 8) |
| 988 | #define BTRFS_BLOCK_GROUP_RESERVED BTRFS_AVAIL_ALLOC_BIT_SINGLE | 990 | #define BTRFS_BLOCK_GROUP_RESERVED (BTRFS_AVAIL_ALLOC_BIT_SINGLE | \ |
| 991 | BTRFS_SPACE_INFO_GLOBAL_RSV) | ||
| 989 | 992 | ||
| 990 | enum btrfs_raid_types { | 993 | enum btrfs_raid_types { |
| 991 | BTRFS_RAID_RAID10, | 994 | BTRFS_RAID_RAID10, |
| @@ -1017,6 +1020,12 @@ enum btrfs_raid_types { | |||
| 1017 | */ | 1020 | */ |
| 1018 | #define BTRFS_AVAIL_ALLOC_BIT_SINGLE (1ULL << 48) | 1021 | #define BTRFS_AVAIL_ALLOC_BIT_SINGLE (1ULL << 48) |
| 1019 | 1022 | ||
| 1023 | /* | ||
| 1024 | * A fake block group type that is used to communicate global block reserve | ||
| 1025 | * size to userspace via the SPACE_INFO ioctl. | ||
| 1026 | */ | ||
| 1027 | #define BTRFS_SPACE_INFO_GLOBAL_RSV (1ULL << 49) | ||
| 1028 | |||
| 1020 | #define BTRFS_EXTENDED_PROFILE_MASK (BTRFS_BLOCK_GROUP_PROFILE_MASK | \ | 1029 | #define BTRFS_EXTENDED_PROFILE_MASK (BTRFS_BLOCK_GROUP_PROFILE_MASK | \ |
| 1021 | BTRFS_AVAIL_ALLOC_BIT_SINGLE) | 1030 | BTRFS_AVAIL_ALLOC_BIT_SINGLE) |
| 1022 | 1031 | ||
| @@ -1439,7 +1448,7 @@ struct btrfs_fs_info { | |||
| 1439 | */ | 1448 | */ |
| 1440 | struct mutex ordered_extent_flush_mutex; | 1449 | struct mutex ordered_extent_flush_mutex; |
| 1441 | 1450 | ||
| 1442 | struct rw_semaphore extent_commit_sem; | 1451 | struct rw_semaphore commit_root_sem; |
| 1443 | 1452 | ||
| 1444 | struct rw_semaphore cleanup_work_sem; | 1453 | struct rw_semaphore cleanup_work_sem; |
| 1445 | 1454 | ||
| @@ -1489,6 +1498,7 @@ struct btrfs_fs_info { | |||
| 1489 | */ | 1498 | */ |
| 1490 | struct list_head ordered_roots; | 1499 | struct list_head ordered_roots; |
| 1491 | 1500 | ||
| 1501 | struct mutex delalloc_root_mutex; | ||
| 1492 | spinlock_t delalloc_root_lock; | 1502 | spinlock_t delalloc_root_lock; |
| 1493 | /* all fs/file tree roots that have delalloc inodes. */ | 1503 | /* all fs/file tree roots that have delalloc inodes. */ |
| 1494 | struct list_head delalloc_roots; | 1504 | struct list_head delalloc_roots; |
| @@ -1503,28 +1513,27 @@ struct btrfs_fs_info { | |||
| 1503 | * A third pool does submit_bio to avoid deadlocking with the other | 1513 | * A third pool does submit_bio to avoid deadlocking with the other |
| 1504 | * two | 1514 | * two |
| 1505 | */ | 1515 | */ |
| 1506 | struct btrfs_workers generic_worker; | 1516 | struct btrfs_workqueue *workers; |
| 1507 | struct btrfs_workers workers; | 1517 | struct btrfs_workqueue *delalloc_workers; |
| 1508 | struct btrfs_workers delalloc_workers; | 1518 | struct btrfs_workqueue *flush_workers; |
| 1509 | struct btrfs_workers flush_workers; | 1519 | struct btrfs_workqueue *endio_workers; |
| 1510 | struct btrfs_workers endio_workers; | 1520 | struct btrfs_workqueue *endio_meta_workers; |
| 1511 | struct btrfs_workers endio_meta_workers; | 1521 | struct btrfs_workqueue *endio_raid56_workers; |
| 1512 | struct btrfs_workers endio_raid56_workers; | 1522 | struct btrfs_workqueue *rmw_workers; |
| 1513 | struct btrfs_workers rmw_workers; | 1523 | struct btrfs_workqueue *endio_meta_write_workers; |
| 1514 | struct btrfs_workers endio_meta_write_workers; | 1524 | struct btrfs_workqueue *endio_write_workers; |
| 1515 | struct btrfs_workers endio_write_workers; | 1525 | struct btrfs_workqueue *endio_freespace_worker; |
| 1516 | struct btrfs_workers endio_freespace_worker; | 1526 | struct btrfs_workqueue *submit_workers; |
| 1517 | struct btrfs_workers submit_workers; | 1527 | struct btrfs_workqueue *caching_workers; |
| 1518 | struct btrfs_workers caching_workers; | 1528 | struct btrfs_workqueue *readahead_workers; |
| 1519 | struct btrfs_workers readahead_workers; | ||
| 1520 | 1529 | ||
| 1521 | /* | 1530 | /* |
| 1522 | * fixup workers take dirty pages that didn't properly go through | 1531 | * fixup workers take dirty pages that didn't properly go through |
| 1523 | * the cow mechanism and make them safe to write. It happens | 1532 | * the cow mechanism and make them safe to write. It happens |
| 1524 | * for the sys_munmap function call path | 1533 | * for the sys_munmap function call path |
| 1525 | */ | 1534 | */ |
| 1526 | struct btrfs_workers fixup_workers; | 1535 | struct btrfs_workqueue *fixup_workers; |
| 1527 | struct btrfs_workers delayed_workers; | 1536 | struct btrfs_workqueue *delayed_workers; |
| 1528 | struct task_struct *transaction_kthread; | 1537 | struct task_struct *transaction_kthread; |
| 1529 | struct task_struct *cleaner_kthread; | 1538 | struct task_struct *cleaner_kthread; |
| 1530 | int thread_pool_size; | 1539 | int thread_pool_size; |
| @@ -1604,9 +1613,9 @@ struct btrfs_fs_info { | |||
| 1604 | atomic_t scrub_cancel_req; | 1613 | atomic_t scrub_cancel_req; |
| 1605 | wait_queue_head_t scrub_pause_wait; | 1614 | wait_queue_head_t scrub_pause_wait; |
| 1606 | int scrub_workers_refcnt; | 1615 | int scrub_workers_refcnt; |
| 1607 | struct btrfs_workers scrub_workers; | 1616 | struct btrfs_workqueue *scrub_workers; |
| 1608 | struct btrfs_workers scrub_wr_completion_workers; | 1617 | struct btrfs_workqueue *scrub_wr_completion_workers; |
| 1609 | struct btrfs_workers scrub_nocow_workers; | 1618 | struct btrfs_workqueue *scrub_nocow_workers; |
| 1610 | 1619 | ||
| 1611 | #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY | 1620 | #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY |
| 1612 | u32 check_integrity_print_mask; | 1621 | u32 check_integrity_print_mask; |
| @@ -1647,7 +1656,7 @@ struct btrfs_fs_info { | |||
| 1647 | /* qgroup rescan items */ | 1656 | /* qgroup rescan items */ |
| 1648 | struct mutex qgroup_rescan_lock; /* protects the progress item */ | 1657 | struct mutex qgroup_rescan_lock; /* protects the progress item */ |
| 1649 | struct btrfs_key qgroup_rescan_progress; | 1658 | struct btrfs_key qgroup_rescan_progress; |
| 1650 | struct btrfs_workers qgroup_rescan_workers; | 1659 | struct btrfs_workqueue *qgroup_rescan_workers; |
| 1651 | struct completion qgroup_rescan_completion; | 1660 | struct completion qgroup_rescan_completion; |
| 1652 | struct btrfs_work qgroup_rescan_work; | 1661 | struct btrfs_work qgroup_rescan_work; |
| 1653 | 1662 | ||
| @@ -1674,10 +1683,18 @@ struct btrfs_fs_info { | |||
| 1674 | 1683 | ||
| 1675 | atomic_t mutually_exclusive_operation_running; | 1684 | atomic_t mutually_exclusive_operation_running; |
| 1676 | 1685 | ||
| 1686 | struct percpu_counter bio_counter; | ||
| 1687 | wait_queue_head_t replace_wait; | ||
| 1688 | |||
| 1677 | struct semaphore uuid_tree_rescan_sem; | 1689 | struct semaphore uuid_tree_rescan_sem; |
| 1678 | unsigned int update_uuid_tree_gen:1; | 1690 | unsigned int update_uuid_tree_gen:1; |
| 1679 | }; | 1691 | }; |
| 1680 | 1692 | ||
| 1693 | struct btrfs_subvolume_writers { | ||
| 1694 | struct percpu_counter counter; | ||
| 1695 | wait_queue_head_t wait; | ||
| 1696 | }; | ||
| 1697 | |||
| 1681 | /* | 1698 | /* |
| 1682 | * in ram representation of the tree. extent_root is used for all allocations | 1699 | * in ram representation of the tree. extent_root is used for all allocations |
| 1683 | * and for the extent tree extent_root root. | 1700 | * and for the extent tree extent_root root. |
| @@ -1702,7 +1719,6 @@ struct btrfs_root { | |||
| 1702 | struct btrfs_block_rsv *block_rsv; | 1719 | struct btrfs_block_rsv *block_rsv; |
| 1703 | 1720 | ||
| 1704 | /* free ino cache stuff */ | 1721 | /* free ino cache stuff */ |
| 1705 | struct mutex fs_commit_mutex; | ||
| 1706 | struct btrfs_free_space_ctl *free_ino_ctl; | 1722 | struct btrfs_free_space_ctl *free_ino_ctl; |
| 1707 | enum btrfs_caching_type cached; | 1723 | enum btrfs_caching_type cached; |
| 1708 | spinlock_t cache_lock; | 1724 | spinlock_t cache_lock; |
| @@ -1714,11 +1730,15 @@ struct btrfs_root { | |||
| 1714 | struct mutex log_mutex; | 1730 | struct mutex log_mutex; |
| 1715 | wait_queue_head_t log_writer_wait; | 1731 | wait_queue_head_t log_writer_wait; |
| 1716 | wait_queue_head_t log_commit_wait[2]; | 1732 | wait_queue_head_t log_commit_wait[2]; |
| 1733 | struct list_head log_ctxs[2]; | ||
| 1717 | atomic_t log_writers; | 1734 | atomic_t log_writers; |
| 1718 | atomic_t log_commit[2]; | 1735 | atomic_t log_commit[2]; |
| 1719 | atomic_t log_batch; | 1736 | atomic_t log_batch; |
| 1720 | unsigned long log_transid; | 1737 | int log_transid; |
| 1721 | unsigned long last_log_commit; | 1738 | /* No matter the commit succeeds or not*/ |
| 1739 | int log_transid_committed; | ||
| 1740 | /* Just be updated when the commit succeeds. */ | ||
| 1741 | int last_log_commit; | ||
| 1722 | pid_t log_start_pid; | 1742 | pid_t log_start_pid; |
| 1723 | bool log_multiple_pids; | 1743 | bool log_multiple_pids; |
| 1724 | 1744 | ||
| @@ -1793,6 +1813,7 @@ struct btrfs_root { | |||
| 1793 | spinlock_t root_item_lock; | 1813 | spinlock_t root_item_lock; |
| 1794 | atomic_t refs; | 1814 | atomic_t refs; |
| 1795 | 1815 | ||
| 1816 | struct mutex delalloc_mutex; | ||
| 1796 | spinlock_t delalloc_lock; | 1817 | spinlock_t delalloc_lock; |
| 1797 | /* | 1818 | /* |
| 1798 | * all of the inodes that have delalloc bytes. It is possible for | 1819 | * all of the inodes that have delalloc bytes. It is possible for |
| @@ -1802,6 +1823,8 @@ struct btrfs_root { | |||
| 1802 | struct list_head delalloc_inodes; | 1823 | struct list_head delalloc_inodes; |
| 1803 | struct list_head delalloc_root; | 1824 | struct list_head delalloc_root; |
| 1804 | u64 nr_delalloc_inodes; | 1825 | u64 nr_delalloc_inodes; |
| 1826 | |||
| 1827 | struct mutex ordered_extent_mutex; | ||
| 1805 | /* | 1828 | /* |
| 1806 | * this is used by the balancing code to wait for all the pending | 1829 | * this is used by the balancing code to wait for all the pending |
| 1807 | * ordered extents | 1830 | * ordered extents |
| @@ -1822,6 +1845,8 @@ struct btrfs_root { | |||
| 1822 | * manipulation with the read-only status via SUBVOL_SETFLAGS | 1845 | * manipulation with the read-only status via SUBVOL_SETFLAGS |
| 1823 | */ | 1846 | */ |
| 1824 | int send_in_progress; | 1847 | int send_in_progress; |
| 1848 | struct btrfs_subvolume_writers *subv_writers; | ||
| 1849 | atomic_t will_be_snapshoted; | ||
| 1825 | }; | 1850 | }; |
| 1826 | 1851 | ||
| 1827 | struct btrfs_ioctl_defrag_range_args { | 1852 | struct btrfs_ioctl_defrag_range_args { |
| @@ -3346,6 +3371,9 @@ int btrfs_init_space_info(struct btrfs_fs_info *fs_info); | |||
| 3346 | int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans, | 3371 | int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans, |
| 3347 | struct btrfs_fs_info *fs_info); | 3372 | struct btrfs_fs_info *fs_info); |
| 3348 | int __get_raid_index(u64 flags); | 3373 | int __get_raid_index(u64 flags); |
| 3374 | |||
| 3375 | int btrfs_start_nocow_write(struct btrfs_root *root); | ||
| 3376 | void btrfs_end_nocow_write(struct btrfs_root *root); | ||
| 3349 | /* ctree.c */ | 3377 | /* ctree.c */ |
| 3350 | int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, | 3378 | int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, |
| 3351 | int level, int *slot); | 3379 | int level, int *slot); |
| @@ -3723,7 +3751,8 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, | |||
| 3723 | u32 min_type); | 3751 | u32 min_type); |
| 3724 | 3752 | ||
| 3725 | int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput); | 3753 | int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput); |
| 3726 | int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int delay_iput); | 3754 | int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int delay_iput, |
| 3755 | int nr); | ||
| 3727 | int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end, | 3756 | int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end, |
| 3728 | struct extent_state **cached_state); | 3757 | struct extent_state **cached_state); |
| 3729 | int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, | 3758 | int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, |
| @@ -4005,6 +4034,11 @@ int btrfs_scrub_cancel_dev(struct btrfs_fs_info *info, | |||
| 4005 | int btrfs_scrub_progress(struct btrfs_root *root, u64 devid, | 4034 | int btrfs_scrub_progress(struct btrfs_root *root, u64 devid, |
| 4006 | struct btrfs_scrub_progress *progress); | 4035 | struct btrfs_scrub_progress *progress); |
| 4007 | 4036 | ||
| 4037 | /* dev-replace.c */ | ||
| 4038 | void btrfs_bio_counter_inc_blocked(struct btrfs_fs_info *fs_info); | ||
| 4039 | void btrfs_bio_counter_inc_noblocked(struct btrfs_fs_info *fs_info); | ||
| 4040 | void btrfs_bio_counter_dec(struct btrfs_fs_info *fs_info); | ||
| 4041 | |||
| 4008 | /* reada.c */ | 4042 | /* reada.c */ |
| 4009 | struct reada_control { | 4043 | struct reada_control { |
| 4010 | struct btrfs_root *root; /* tree to prefetch */ | 4044 | struct btrfs_root *root; /* tree to prefetch */ |
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 451b00c86f6c..33e561a84013 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c | |||
| @@ -1392,11 +1392,11 @@ static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root, | |||
| 1392 | return -ENOMEM; | 1392 | return -ENOMEM; |
| 1393 | 1393 | ||
| 1394 | async_work->delayed_root = delayed_root; | 1394 | async_work->delayed_root = delayed_root; |
| 1395 | async_work->work.func = btrfs_async_run_delayed_root; | 1395 | btrfs_init_work(&async_work->work, btrfs_async_run_delayed_root, |
| 1396 | async_work->work.flags = 0; | 1396 | NULL, NULL); |
| 1397 | async_work->nr = nr; | 1397 | async_work->nr = nr; |
| 1398 | 1398 | ||
| 1399 | btrfs_queue_worker(&root->fs_info->delayed_workers, &async_work->work); | 1399 | btrfs_queue_work(root->fs_info->delayed_workers, &async_work->work); |
| 1400 | return 0; | 1400 | return 0; |
| 1401 | } | 1401 | } |
| 1402 | 1402 | ||
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index f3bff89eecf0..31299646024d 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c | |||
| @@ -199,44 +199,31 @@ static struct btrfs_delayed_ref_head *htree_insert(struct rb_root *root, | |||
| 199 | */ | 199 | */ |
| 200 | static struct btrfs_delayed_ref_head * | 200 | static struct btrfs_delayed_ref_head * |
| 201 | find_ref_head(struct rb_root *root, u64 bytenr, | 201 | find_ref_head(struct rb_root *root, u64 bytenr, |
| 202 | struct btrfs_delayed_ref_head **last, int return_bigger) | 202 | int return_bigger) |
| 203 | { | 203 | { |
| 204 | struct rb_node *n; | 204 | struct rb_node *n; |
| 205 | struct btrfs_delayed_ref_head *entry; | 205 | struct btrfs_delayed_ref_head *entry; |
| 206 | int cmp = 0; | ||
| 207 | 206 | ||
| 208 | again: | ||
| 209 | n = root->rb_node; | 207 | n = root->rb_node; |
| 210 | entry = NULL; | 208 | entry = NULL; |
| 211 | while (n) { | 209 | while (n) { |
| 212 | entry = rb_entry(n, struct btrfs_delayed_ref_head, href_node); | 210 | entry = rb_entry(n, struct btrfs_delayed_ref_head, href_node); |
| 213 | if (last) | ||
| 214 | *last = entry; | ||
| 215 | 211 | ||
| 216 | if (bytenr < entry->node.bytenr) | 212 | if (bytenr < entry->node.bytenr) |
| 217 | cmp = -1; | ||
| 218 | else if (bytenr > entry->node.bytenr) | ||
| 219 | cmp = 1; | ||
| 220 | else | ||
| 221 | cmp = 0; | ||
| 222 | |||
| 223 | if (cmp < 0) | ||
| 224 | n = n->rb_left; | 213 | n = n->rb_left; |
| 225 | else if (cmp > 0) | 214 | else if (bytenr > entry->node.bytenr) |
| 226 | n = n->rb_right; | 215 | n = n->rb_right; |
| 227 | else | 216 | else |
| 228 | return entry; | 217 | return entry; |
| 229 | } | 218 | } |
| 230 | if (entry && return_bigger) { | 219 | if (entry && return_bigger) { |
| 231 | if (cmp > 0) { | 220 | if (bytenr > entry->node.bytenr) { |
| 232 | n = rb_next(&entry->href_node); | 221 | n = rb_next(&entry->href_node); |
| 233 | if (!n) | 222 | if (!n) |
| 234 | n = rb_first(root); | 223 | n = rb_first(root); |
| 235 | entry = rb_entry(n, struct btrfs_delayed_ref_head, | 224 | entry = rb_entry(n, struct btrfs_delayed_ref_head, |
| 236 | href_node); | 225 | href_node); |
| 237 | bytenr = entry->node.bytenr; | 226 | return entry; |
| 238 | return_bigger = 0; | ||
| 239 | goto again; | ||
| 240 | } | 227 | } |
| 241 | return entry; | 228 | return entry; |
| 242 | } | 229 | } |
| @@ -415,12 +402,12 @@ btrfs_select_ref_head(struct btrfs_trans_handle *trans) | |||
| 415 | 402 | ||
| 416 | again: | 403 | again: |
| 417 | start = delayed_refs->run_delayed_start; | 404 | start = delayed_refs->run_delayed_start; |
| 418 | head = find_ref_head(&delayed_refs->href_root, start, NULL, 1); | 405 | head = find_ref_head(&delayed_refs->href_root, start, 1); |
| 419 | if (!head && !loop) { | 406 | if (!head && !loop) { |
| 420 | delayed_refs->run_delayed_start = 0; | 407 | delayed_refs->run_delayed_start = 0; |
| 421 | start = 0; | 408 | start = 0; |
| 422 | loop = true; | 409 | loop = true; |
| 423 | head = find_ref_head(&delayed_refs->href_root, start, NULL, 1); | 410 | head = find_ref_head(&delayed_refs->href_root, start, 1); |
| 424 | if (!head) | 411 | if (!head) |
| 425 | return NULL; | 412 | return NULL; |
| 426 | } else if (!head && loop) { | 413 | } else if (!head && loop) { |
| @@ -508,6 +495,7 @@ update_existing_head_ref(struct btrfs_delayed_ref_node *existing, | |||
| 508 | ref = btrfs_delayed_node_to_head(update); | 495 | ref = btrfs_delayed_node_to_head(update); |
| 509 | BUG_ON(existing_ref->is_data != ref->is_data); | 496 | BUG_ON(existing_ref->is_data != ref->is_data); |
| 510 | 497 | ||
| 498 | spin_lock(&existing_ref->lock); | ||
| 511 | if (ref->must_insert_reserved) { | 499 | if (ref->must_insert_reserved) { |
| 512 | /* if the extent was freed and then | 500 | /* if the extent was freed and then |
| 513 | * reallocated before the delayed ref | 501 | * reallocated before the delayed ref |
| @@ -549,7 +537,6 @@ update_existing_head_ref(struct btrfs_delayed_ref_node *existing, | |||
| 549 | * only need the lock for this case cause we could be processing it | 537 | * only need the lock for this case cause we could be processing it |
| 550 | * currently, for refs we just added we know we're a-ok. | 538 | * currently, for refs we just added we know we're a-ok. |
| 551 | */ | 539 | */ |
| 552 | spin_lock(&existing_ref->lock); | ||
| 553 | existing->ref_mod += update->ref_mod; | 540 | existing->ref_mod += update->ref_mod; |
| 554 | spin_unlock(&existing_ref->lock); | 541 | spin_unlock(&existing_ref->lock); |
| 555 | } | 542 | } |
| @@ -898,7 +885,7 @@ btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr) | |||
| 898 | struct btrfs_delayed_ref_root *delayed_refs; | 885 | struct btrfs_delayed_ref_root *delayed_refs; |
| 899 | 886 | ||
| 900 | delayed_refs = &trans->transaction->delayed_refs; | 887 | delayed_refs = &trans->transaction->delayed_refs; |
| 901 | return find_ref_head(&delayed_refs->href_root, bytenr, NULL, 0); | 888 | return find_ref_head(&delayed_refs->href_root, bytenr, 0); |
| 902 | } | 889 | } |
| 903 | 890 | ||
| 904 | void btrfs_delayed_ref_exit(void) | 891 | void btrfs_delayed_ref_exit(void) |
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index 564c92638b20..9f2290509aca 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c | |||
| @@ -431,6 +431,35 @@ leave_no_lock: | |||
| 431 | return ret; | 431 | return ret; |
| 432 | } | 432 | } |
| 433 | 433 | ||
| 434 | /* | ||
| 435 | * blocked until all flighting bios are finished. | ||
| 436 | */ | ||
| 437 | static void btrfs_rm_dev_replace_blocked(struct btrfs_fs_info *fs_info) | ||
| 438 | { | ||
| 439 | s64 writers; | ||
| 440 | DEFINE_WAIT(wait); | ||
| 441 | |||
| 442 | set_bit(BTRFS_FS_STATE_DEV_REPLACING, &fs_info->fs_state); | ||
| 443 | do { | ||
| 444 | prepare_to_wait(&fs_info->replace_wait, &wait, | ||
| 445 | TASK_UNINTERRUPTIBLE); | ||
| 446 | writers = percpu_counter_sum(&fs_info->bio_counter); | ||
| 447 | if (writers) | ||
| 448 | schedule(); | ||
| 449 | finish_wait(&fs_info->replace_wait, &wait); | ||
| 450 | } while (writers); | ||
| 451 | } | ||
| 452 | |||
| 453 | /* | ||
| 454 | * we have removed target device, it is safe to allow new bios request. | ||
| 455 | */ | ||
| 456 | static void btrfs_rm_dev_replace_unblocked(struct btrfs_fs_info *fs_info) | ||
| 457 | { | ||
| 458 | clear_bit(BTRFS_FS_STATE_DEV_REPLACING, &fs_info->fs_state); | ||
| 459 | if (waitqueue_active(&fs_info->replace_wait)) | ||
| 460 | wake_up(&fs_info->replace_wait); | ||
| 461 | } | ||
| 462 | |||
| 434 | static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, | 463 | static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, |
| 435 | int scrub_ret) | 464 | int scrub_ret) |
| 436 | { | 465 | { |
| @@ -458,17 +487,11 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, | |||
| 458 | src_device = dev_replace->srcdev; | 487 | src_device = dev_replace->srcdev; |
| 459 | btrfs_dev_replace_unlock(dev_replace); | 488 | btrfs_dev_replace_unlock(dev_replace); |
| 460 | 489 | ||
| 461 | /* replace old device with new one in mapping tree */ | ||
| 462 | if (!scrub_ret) | ||
| 463 | btrfs_dev_replace_update_device_in_mapping_tree(fs_info, | ||
| 464 | src_device, | ||
| 465 | tgt_device); | ||
| 466 | |||
| 467 | /* | 490 | /* |
| 468 | * flush all outstanding I/O and inode extent mappings before the | 491 | * flush all outstanding I/O and inode extent mappings before the |
| 469 | * copy operation is declared as being finished | 492 | * copy operation is declared as being finished |
| 470 | */ | 493 | */ |
| 471 | ret = btrfs_start_delalloc_roots(root->fs_info, 0); | 494 | ret = btrfs_start_delalloc_roots(root->fs_info, 0, -1); |
| 472 | if (ret) { | 495 | if (ret) { |
| 473 | mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); | 496 | mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); |
| 474 | return ret; | 497 | return ret; |
| @@ -484,6 +507,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, | |||
| 484 | WARN_ON(ret); | 507 | WARN_ON(ret); |
| 485 | 508 | ||
| 486 | /* keep away write_all_supers() during the finishing procedure */ | 509 | /* keep away write_all_supers() during the finishing procedure */ |
| 510 | mutex_lock(&root->fs_info->chunk_mutex); | ||
| 487 | mutex_lock(&root->fs_info->fs_devices->device_list_mutex); | 511 | mutex_lock(&root->fs_info->fs_devices->device_list_mutex); |
| 488 | btrfs_dev_replace_lock(dev_replace); | 512 | btrfs_dev_replace_lock(dev_replace); |
| 489 | dev_replace->replace_state = | 513 | dev_replace->replace_state = |
| @@ -494,7 +518,12 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, | |||
| 494 | dev_replace->time_stopped = get_seconds(); | 518 | dev_replace->time_stopped = get_seconds(); |
| 495 | dev_replace->item_needs_writeback = 1; | 519 | dev_replace->item_needs_writeback = 1; |
| 496 | 520 | ||
| 497 | if (scrub_ret) { | 521 | /* replace old device with new one in mapping tree */ |
| 522 | if (!scrub_ret) { | ||
| 523 | btrfs_dev_replace_update_device_in_mapping_tree(fs_info, | ||
| 524 | src_device, | ||
| 525 | tgt_device); | ||
| 526 | } else { | ||
| 498 | printk_in_rcu(KERN_ERR | 527 | printk_in_rcu(KERN_ERR |
| 499 | "BTRFS: btrfs_scrub_dev(%s, %llu, %s) failed %d\n", | 528 | "BTRFS: btrfs_scrub_dev(%s, %llu, %s) failed %d\n", |
| 500 | src_device->missing ? "<missing disk>" : | 529 | src_device->missing ? "<missing disk>" : |
| @@ -503,6 +532,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, | |||
| 503 | rcu_str_deref(tgt_device->name), scrub_ret); | 532 | rcu_str_deref(tgt_device->name), scrub_ret); |
| 504 | btrfs_dev_replace_unlock(dev_replace); | 533 | btrfs_dev_replace_unlock(dev_replace); |
| 505 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); | 534 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); |
| 535 | mutex_unlock(&root->fs_info->chunk_mutex); | ||
| 506 | if (tgt_device) | 536 | if (tgt_device) |
| 507 | btrfs_destroy_dev_replace_tgtdev(fs_info, tgt_device); | 537 | btrfs_destroy_dev_replace_tgtdev(fs_info, tgt_device); |
| 508 | mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); | 538 | mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); |
| @@ -532,8 +562,12 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, | |||
| 532 | fs_info->fs_devices->latest_bdev = tgt_device->bdev; | 562 | fs_info->fs_devices->latest_bdev = tgt_device->bdev; |
| 533 | list_add(&tgt_device->dev_alloc_list, &fs_info->fs_devices->alloc_list); | 563 | list_add(&tgt_device->dev_alloc_list, &fs_info->fs_devices->alloc_list); |
| 534 | 564 | ||
| 565 | btrfs_rm_dev_replace_blocked(fs_info); | ||
| 566 | |||
| 535 | btrfs_rm_dev_replace_srcdev(fs_info, src_device); | 567 | btrfs_rm_dev_replace_srcdev(fs_info, src_device); |
| 536 | 568 | ||
| 569 | btrfs_rm_dev_replace_unblocked(fs_info); | ||
| 570 | |||
| 537 | /* | 571 | /* |
| 538 | * this is again a consistent state where no dev_replace procedure | 572 | * this is again a consistent state where no dev_replace procedure |
| 539 | * is running, the target device is part of the filesystem, the | 573 | * is running, the target device is part of the filesystem, the |
| @@ -543,6 +577,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, | |||
| 543 | */ | 577 | */ |
| 544 | btrfs_dev_replace_unlock(dev_replace); | 578 | btrfs_dev_replace_unlock(dev_replace); |
| 545 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); | 579 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); |
| 580 | mutex_unlock(&root->fs_info->chunk_mutex); | ||
| 546 | 581 | ||
| 547 | /* write back the superblocks */ | 582 | /* write back the superblocks */ |
| 548 | trans = btrfs_start_transaction(root, 0); | 583 | trans = btrfs_start_transaction(root, 0); |
| @@ -862,3 +897,31 @@ void btrfs_dev_replace_unlock(struct btrfs_dev_replace *dev_replace) | |||
| 862 | mutex_unlock(&dev_replace->lock_management_lock); | 897 | mutex_unlock(&dev_replace->lock_management_lock); |
| 863 | } | 898 | } |
| 864 | } | 899 | } |
| 900 | |||
| 901 | void btrfs_bio_counter_inc_noblocked(struct btrfs_fs_info *fs_info) | ||
| 902 | { | ||
| 903 | percpu_counter_inc(&fs_info->bio_counter); | ||
| 904 | } | ||
| 905 | |||
| 906 | void btrfs_bio_counter_dec(struct btrfs_fs_info *fs_info) | ||
| 907 | { | ||
| 908 | percpu_counter_dec(&fs_info->bio_counter); | ||
| 909 | |||
| 910 | if (waitqueue_active(&fs_info->replace_wait)) | ||
| 911 | wake_up(&fs_info->replace_wait); | ||
| 912 | } | ||
| 913 | |||
| 914 | void btrfs_bio_counter_inc_blocked(struct btrfs_fs_info *fs_info) | ||
| 915 | { | ||
| 916 | DEFINE_WAIT(wait); | ||
| 917 | again: | ||
| 918 | percpu_counter_inc(&fs_info->bio_counter); | ||
| 919 | if (test_bit(BTRFS_FS_STATE_DEV_REPLACING, &fs_info->fs_state)) { | ||
| 920 | btrfs_bio_counter_dec(fs_info); | ||
| 921 | wait_event(fs_info->replace_wait, | ||
| 922 | !test_bit(BTRFS_FS_STATE_DEV_REPLACING, | ||
| 923 | &fs_info->fs_state)); | ||
| 924 | goto again; | ||
| 925 | } | ||
| 926 | |||
| 927 | } | ||
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 81ea55314b1f..029d46c2e170 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
| @@ -329,6 +329,8 @@ static int verify_parent_transid(struct extent_io_tree *io_tree, | |||
| 329 | { | 329 | { |
| 330 | struct extent_state *cached_state = NULL; | 330 | struct extent_state *cached_state = NULL; |
| 331 | int ret; | 331 | int ret; |
| 332 | bool need_lock = (current->journal_info == | ||
| 333 | (void *)BTRFS_SEND_TRANS_STUB); | ||
| 332 | 334 | ||
| 333 | if (!parent_transid || btrfs_header_generation(eb) == parent_transid) | 335 | if (!parent_transid || btrfs_header_generation(eb) == parent_transid) |
| 334 | return 0; | 336 | return 0; |
| @@ -336,6 +338,11 @@ static int verify_parent_transid(struct extent_io_tree *io_tree, | |||
| 336 | if (atomic) | 338 | if (atomic) |
| 337 | return -EAGAIN; | 339 | return -EAGAIN; |
| 338 | 340 | ||
| 341 | if (need_lock) { | ||
| 342 | btrfs_tree_read_lock(eb); | ||
| 343 | btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); | ||
| 344 | } | ||
| 345 | |||
| 339 | lock_extent_bits(io_tree, eb->start, eb->start + eb->len - 1, | 346 | lock_extent_bits(io_tree, eb->start, eb->start + eb->len - 1, |
| 340 | 0, &cached_state); | 347 | 0, &cached_state); |
| 341 | if (extent_buffer_uptodate(eb) && | 348 | if (extent_buffer_uptodate(eb) && |
| @@ -347,10 +354,21 @@ static int verify_parent_transid(struct extent_io_tree *io_tree, | |||
| 347 | "found %llu\n", | 354 | "found %llu\n", |
| 348 | eb->start, parent_transid, btrfs_header_generation(eb)); | 355 | eb->start, parent_transid, btrfs_header_generation(eb)); |
| 349 | ret = 1; | 356 | ret = 1; |
| 350 | clear_extent_buffer_uptodate(eb); | 357 | |
| 358 | /* | ||
| 359 | * Things reading via commit roots that don't have normal protection, | ||
| 360 | * like send, can have a really old block in cache that may point at a | ||
| 361 | * block that has been free'd and re-allocated. So don't clear uptodate | ||
| 362 | * if we find an eb that is under IO (dirty/writeback) because we could | ||
| 363 | * end up reading in the stale data and then writing it back out and | ||
| 364 | * making everybody very sad. | ||
| 365 | */ | ||
| 366 | if (!extent_buffer_under_io(eb)) | ||
| 367 | clear_extent_buffer_uptodate(eb); | ||
| 351 | out: | 368 | out: |
| 352 | unlock_extent_cached(io_tree, eb->start, eb->start + eb->len - 1, | 369 | unlock_extent_cached(io_tree, eb->start, eb->start + eb->len - 1, |
| 353 | &cached_state, GFP_NOFS); | 370 | &cached_state, GFP_NOFS); |
| 371 | btrfs_tree_read_unlock_blocking(eb); | ||
| 354 | return ret; | 372 | return ret; |
| 355 | } | 373 | } |
| 356 | 374 | ||
| @@ -678,32 +696,31 @@ static void end_workqueue_bio(struct bio *bio, int err) | |||
| 678 | 696 | ||
| 679 | fs_info = end_io_wq->info; | 697 | fs_info = end_io_wq->info; |
| 680 | end_io_wq->error = err; | 698 | end_io_wq->error = err; |
| 681 | end_io_wq->work.func = end_workqueue_fn; | 699 | btrfs_init_work(&end_io_wq->work, end_workqueue_fn, NULL, NULL); |
| 682 | end_io_wq->work.flags = 0; | ||
| 683 | 700 | ||
| 684 | if (bio->bi_rw & REQ_WRITE) { | 701 | if (bio->bi_rw & REQ_WRITE) { |
| 685 | if (end_io_wq->metadata == BTRFS_WQ_ENDIO_METADATA) | 702 | if (end_io_wq->metadata == BTRFS_WQ_ENDIO_METADATA) |
| 686 | btrfs_queue_worker(&fs_info->endio_meta_write_workers, | 703 | btrfs_queue_work(fs_info->endio_meta_write_workers, |
| 687 | &end_io_wq->work); | 704 | &end_io_wq->work); |
| 688 | else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_FREE_SPACE) | 705 | else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_FREE_SPACE) |
| 689 | btrfs_queue_worker(&fs_info->endio_freespace_worker, | 706 | btrfs_queue_work(fs_info->endio_freespace_worker, |
| 690 | &end_io_wq->work); | 707 | &end_io_wq->work); |
| 691 | else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56) | 708 | else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56) |
| 692 | btrfs_queue_worker(&fs_info->endio_raid56_workers, | 709 | btrfs_queue_work(fs_info->endio_raid56_workers, |
| 693 | &end_io_wq->work); | 710 | &end_io_wq->work); |
| 694 | else | 711 | else |
| 695 | btrfs_queue_worker(&fs_info->endio_write_workers, | 712 | btrfs_queue_work(fs_info->endio_write_workers, |
| 696 | &end_io_wq->work); | 713 | &end_io_wq->work); |
| 697 | } else { | 714 | } else { |
| 698 | if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56) | 715 | if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56) |
| 699 | btrfs_queue_worker(&fs_info->endio_raid56_workers, | 716 | btrfs_queue_work(fs_info->endio_raid56_workers, |
| 700 | &end_io_wq->work); | 717 | &end_io_wq->work); |
| 701 | else if (end_io_wq->metadata) | 718 | else if (end_io_wq->metadata) |
| 702 | btrfs_queue_worker(&fs_info->endio_meta_workers, | 719 | btrfs_queue_work(fs_info->endio_meta_workers, |
| 703 | &end_io_wq->work); | 720 | &end_io_wq->work); |
| 704 | else | 721 | else |
| 705 | btrfs_queue_worker(&fs_info->endio_workers, | 722 | btrfs_queue_work(fs_info->endio_workers, |
| 706 | &end_io_wq->work); | 723 | &end_io_wq->work); |
| 707 | } | 724 | } |
| 708 | } | 725 | } |
| 709 | 726 | ||
| @@ -738,7 +755,7 @@ int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, | |||
| 738 | unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info) | 755 | unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info) |
| 739 | { | 756 | { |
| 740 | unsigned long limit = min_t(unsigned long, | 757 | unsigned long limit = min_t(unsigned long, |
| 741 | info->workers.max_workers, | 758 | info->thread_pool_size, |
| 742 | info->fs_devices->open_devices); | 759 | info->fs_devices->open_devices); |
| 743 | return 256 * limit; | 760 | return 256 * limit; |
| 744 | } | 761 | } |
| @@ -811,11 +828,9 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, | |||
| 811 | async->submit_bio_start = submit_bio_start; | 828 | async->submit_bio_start = submit_bio_start; |
| 812 | async->submit_bio_done = submit_bio_done; | 829 | async->submit_bio_done = submit_bio_done; |
| 813 | 830 | ||
| 814 | async->work.func = run_one_async_start; | 831 | btrfs_init_work(&async->work, run_one_async_start, |
| 815 | async->work.ordered_func = run_one_async_done; | 832 | run_one_async_done, run_one_async_free); |
| 816 | async->work.ordered_free = run_one_async_free; | ||
| 817 | 833 | ||
| 818 | async->work.flags = 0; | ||
| 819 | async->bio_flags = bio_flags; | 834 | async->bio_flags = bio_flags; |
| 820 | async->bio_offset = bio_offset; | 835 | async->bio_offset = bio_offset; |
| 821 | 836 | ||
| @@ -824,9 +839,9 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, | |||
| 824 | atomic_inc(&fs_info->nr_async_submits); | 839 | atomic_inc(&fs_info->nr_async_submits); |
| 825 | 840 | ||
| 826 | if (rw & REQ_SYNC) | 841 | if (rw & REQ_SYNC) |
| 827 | btrfs_set_work_high_prio(&async->work); | 842 | btrfs_set_work_high_priority(&async->work); |
| 828 | 843 | ||
| 829 | btrfs_queue_worker(&fs_info->workers, &async->work); | 844 | btrfs_queue_work(fs_info->workers, &async->work); |
| 830 | 845 | ||
| 831 | while (atomic_read(&fs_info->async_submit_draining) && | 846 | while (atomic_read(&fs_info->async_submit_draining) && |
| 832 | atomic_read(&fs_info->nr_async_submits)) { | 847 | atomic_read(&fs_info->nr_async_submits)) { |
| @@ -1149,6 +1164,32 @@ void clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, | |||
| 1149 | } | 1164 | } |
| 1150 | } | 1165 | } |
| 1151 | 1166 | ||
| 1167 | static struct btrfs_subvolume_writers *btrfs_alloc_subvolume_writers(void) | ||
| 1168 | { | ||
| 1169 | struct btrfs_subvolume_writers *writers; | ||
| 1170 | int ret; | ||
| 1171 | |||
| 1172 | writers = kmalloc(sizeof(*writers), GFP_NOFS); | ||
| 1173 | if (!writers) | ||
| 1174 | return ERR_PTR(-ENOMEM); | ||
| 1175 | |||
| 1176 | ret = percpu_counter_init(&writers->counter, 0); | ||
| 1177 | if (ret < 0) { | ||
| 1178 | kfree(writers); | ||
| 1179 | return ERR_PTR(ret); | ||
| 1180 | } | ||
| 1181 | |||
| 1182 | init_waitqueue_head(&writers->wait); | ||
| 1183 | return writers; | ||
| 1184 | } | ||
| 1185 | |||
| 1186 | static void | ||
| 1187 | btrfs_free_subvolume_writers(struct btrfs_subvolume_writers *writers) | ||
| 1188 | { | ||
| 1189 | percpu_counter_destroy(&writers->counter); | ||
| 1190 | kfree(writers); | ||
| 1191 | } | ||
| 1192 | |||
| 1152 | static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, | 1193 | static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, |
| 1153 | u32 stripesize, struct btrfs_root *root, | 1194 | u32 stripesize, struct btrfs_root *root, |
| 1154 | struct btrfs_fs_info *fs_info, | 1195 | struct btrfs_fs_info *fs_info, |
| @@ -1194,16 +1235,22 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, | |||
| 1194 | spin_lock_init(&root->log_extents_lock[1]); | 1235 | spin_lock_init(&root->log_extents_lock[1]); |
| 1195 | mutex_init(&root->objectid_mutex); | 1236 | mutex_init(&root->objectid_mutex); |
| 1196 | mutex_init(&root->log_mutex); | 1237 | mutex_init(&root->log_mutex); |
| 1238 | mutex_init(&root->ordered_extent_mutex); | ||
| 1239 | mutex_init(&root->delalloc_mutex); | ||
| 1197 | init_waitqueue_head(&root->log_writer_wait); | 1240 | init_waitqueue_head(&root->log_writer_wait); |
| 1198 | init_waitqueue_head(&root->log_commit_wait[0]); | 1241 | init_waitqueue_head(&root->log_commit_wait[0]); |
| 1199 | init_waitqueue_head(&root->log_commit_wait[1]); | 1242 | init_waitqueue_head(&root->log_commit_wait[1]); |
| 1243 | INIT_LIST_HEAD(&root->log_ctxs[0]); | ||
| 1244 | INIT_LIST_HEAD(&root->log_ctxs[1]); | ||
| 1200 | atomic_set(&root->log_commit[0], 0); | 1245 | atomic_set(&root->log_commit[0], 0); |
| 1201 | atomic_set(&root->log_commit[1], 0); | 1246 | atomic_set(&root->log_commit[1], 0); |
| 1202 | atomic_set(&root->log_writers, 0); | 1247 | atomic_set(&root->log_writers, 0); |
| 1203 | atomic_set(&root->log_batch, 0); | 1248 | atomic_set(&root->log_batch, 0); |
| 1204 | atomic_set(&root->orphan_inodes, 0); | 1249 | atomic_set(&root->orphan_inodes, 0); |
| 1205 | atomic_set(&root->refs, 1); | 1250 | atomic_set(&root->refs, 1); |
| 1251 | atomic_set(&root->will_be_snapshoted, 0); | ||
| 1206 | root->log_transid = 0; | 1252 | root->log_transid = 0; |
| 1253 | root->log_transid_committed = -1; | ||
| 1207 | root->last_log_commit = 0; | 1254 | root->last_log_commit = 0; |
| 1208 | if (fs_info) | 1255 | if (fs_info) |
| 1209 | extent_io_tree_init(&root->dirty_log_pages, | 1256 | extent_io_tree_init(&root->dirty_log_pages, |
| @@ -1417,6 +1464,7 @@ int btrfs_add_log_tree(struct btrfs_trans_handle *trans, | |||
| 1417 | WARN_ON(root->log_root); | 1464 | WARN_ON(root->log_root); |
| 1418 | root->log_root = log_root; | 1465 | root->log_root = log_root; |
| 1419 | root->log_transid = 0; | 1466 | root->log_transid = 0; |
| 1467 | root->log_transid_committed = -1; | ||
| 1420 | root->last_log_commit = 0; | 1468 | root->last_log_commit = 0; |
| 1421 | return 0; | 1469 | return 0; |
| 1422 | } | 1470 | } |
| @@ -1498,6 +1546,7 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_root *tree_root, | |||
| 1498 | int btrfs_init_fs_root(struct btrfs_root *root) | 1546 | int btrfs_init_fs_root(struct btrfs_root *root) |
| 1499 | { | 1547 | { |
| 1500 | int ret; | 1548 | int ret; |
| 1549 | struct btrfs_subvolume_writers *writers; | ||
| 1501 | 1550 | ||
| 1502 | root->free_ino_ctl = kzalloc(sizeof(*root->free_ino_ctl), GFP_NOFS); | 1551 | root->free_ino_ctl = kzalloc(sizeof(*root->free_ino_ctl), GFP_NOFS); |
| 1503 | root->free_ino_pinned = kzalloc(sizeof(*root->free_ino_pinned), | 1552 | root->free_ino_pinned = kzalloc(sizeof(*root->free_ino_pinned), |
| @@ -1507,15 +1556,24 @@ int btrfs_init_fs_root(struct btrfs_root *root) | |||
| 1507 | goto fail; | 1556 | goto fail; |
| 1508 | } | 1557 | } |
| 1509 | 1558 | ||
| 1559 | writers = btrfs_alloc_subvolume_writers(); | ||
| 1560 | if (IS_ERR(writers)) { | ||
| 1561 | ret = PTR_ERR(writers); | ||
| 1562 | goto fail; | ||
| 1563 | } | ||
| 1564 | root->subv_writers = writers; | ||
| 1565 | |||
| 1510 | btrfs_init_free_ino_ctl(root); | 1566 | btrfs_init_free_ino_ctl(root); |
| 1511 | mutex_init(&root->fs_commit_mutex); | ||
| 1512 | spin_lock_init(&root->cache_lock); | 1567 | spin_lock_init(&root->cache_lock); |
| 1513 | init_waitqueue_head(&root->cache_wait); | 1568 | init_waitqueue_head(&root->cache_wait); |
| 1514 | 1569 | ||
| 1515 | ret = get_anon_bdev(&root->anon_dev); | 1570 | ret = get_anon_bdev(&root->anon_dev); |
| 1516 | if (ret) | 1571 | if (ret) |
| 1517 | goto fail; | 1572 | goto free_writers; |
| 1518 | return 0; | 1573 | return 0; |
| 1574 | |||
| 1575 | free_writers: | ||
| 1576 | btrfs_free_subvolume_writers(root->subv_writers); | ||
| 1519 | fail: | 1577 | fail: |
| 1520 | kfree(root->free_ino_ctl); | 1578 | kfree(root->free_ino_ctl); |
| 1521 | kfree(root->free_ino_pinned); | 1579 | kfree(root->free_ino_pinned); |
| @@ -1990,23 +2048,22 @@ static noinline int next_root_backup(struct btrfs_fs_info *info, | |||
| 1990 | /* helper to cleanup workers */ | 2048 | /* helper to cleanup workers */ |
| 1991 | static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info) | 2049 | static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info) |
| 1992 | { | 2050 | { |
| 1993 | btrfs_stop_workers(&fs_info->generic_worker); | 2051 | btrfs_destroy_workqueue(fs_info->fixup_workers); |
| 1994 | btrfs_stop_workers(&fs_info->fixup_workers); | 2052 | btrfs_destroy_workqueue(fs_info->delalloc_workers); |
| 1995 | btrfs_stop_workers(&fs_info->delalloc_workers); | 2053 | btrfs_destroy_workqueue(fs_info->workers); |
| 1996 | btrfs_stop_workers(&fs_info->workers); | 2054 | btrfs_destroy_workqueue(fs_info->endio_workers); |
| 1997 | btrfs_stop_workers(&fs_info->endio_workers); | 2055 | btrfs_destroy_workqueue(fs_info->endio_meta_workers); |
| 1998 | btrfs_stop_workers(&fs_info->endio_meta_workers); | 2056 | btrfs_destroy_workqueue(fs_info->endio_raid56_workers); |
| 1999 | btrfs_stop_workers(&fs_info->endio_raid56_workers); | 2057 | btrfs_destroy_workqueue(fs_info->rmw_workers); |
| 2000 | btrfs_stop_workers(&fs_info->rmw_workers); | 2058 | btrfs_destroy_workqueue(fs_info->endio_meta_write_workers); |
| 2001 | btrfs_stop_workers(&fs_info->endio_meta_write_workers); | 2059 | btrfs_destroy_workqueue(fs_info->endio_write_workers); |
| 2002 | btrfs_stop_workers(&fs_info->endio_write_workers); | 2060 | btrfs_destroy_workqueue(fs_info->endio_freespace_worker); |
| 2003 | btrfs_stop_workers(&fs_info->endio_freespace_worker); | 2061 | btrfs_destroy_workqueue(fs_info->submit_workers); |
| 2004 | btrfs_stop_workers(&fs_info->submit_workers); | 2062 | btrfs_destroy_workqueue(fs_info->delayed_workers); |
| 2005 | btrfs_stop_workers(&fs_info->delayed_workers); | 2063 | btrfs_destroy_workqueue(fs_info->caching_workers); |
| 2006 | btrfs_stop_workers(&fs_info->caching_workers); | 2064 | btrfs_destroy_workqueue(fs_info->readahead_workers); |
| 2007 | btrfs_stop_workers(&fs_info->readahead_workers); | 2065 | btrfs_destroy_workqueue(fs_info->flush_workers); |
| 2008 | btrfs_stop_workers(&fs_info->flush_workers); | 2066 | btrfs_destroy_workqueue(fs_info->qgroup_rescan_workers); |
| 2009 | btrfs_stop_workers(&fs_info->qgroup_rescan_workers); | ||
| 2010 | } | 2067 | } |
| 2011 | 2068 | ||
| 2012 | static void free_root_extent_buffers(struct btrfs_root *root) | 2069 | static void free_root_extent_buffers(struct btrfs_root *root) |
| @@ -2097,6 +2154,8 @@ int open_ctree(struct super_block *sb, | |||
| 2097 | int err = -EINVAL; | 2154 | int err = -EINVAL; |
| 2098 | int num_backups_tried = 0; | 2155 | int num_backups_tried = 0; |
| 2099 | int backup_index = 0; | 2156 | int backup_index = 0; |
| 2157 | int max_active; | ||
| 2158 | int flags = WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_UNBOUND; | ||
| 2100 | bool create_uuid_tree; | 2159 | bool create_uuid_tree; |
| 2101 | bool check_uuid_tree; | 2160 | bool check_uuid_tree; |
| 2102 | 2161 | ||
| @@ -2133,10 +2192,16 @@ int open_ctree(struct super_block *sb, | |||
| 2133 | goto fail_dirty_metadata_bytes; | 2192 | goto fail_dirty_metadata_bytes; |
| 2134 | } | 2193 | } |
| 2135 | 2194 | ||
| 2195 | ret = percpu_counter_init(&fs_info->bio_counter, 0); | ||
| 2196 | if (ret) { | ||
| 2197 | err = ret; | ||
| 2198 | goto fail_delalloc_bytes; | ||
| 2199 | } | ||
| 2200 | |||
| 2136 | fs_info->btree_inode = new_inode(sb); | 2201 | fs_info->btree_inode = new_inode(sb); |
| 2137 | if (!fs_info->btree_inode) { | 2202 | if (!fs_info->btree_inode) { |
| 2138 | err = -ENOMEM; | 2203 | err = -ENOMEM; |
| 2139 | goto fail_delalloc_bytes; | 2204 | goto fail_bio_counter; |
| 2140 | } | 2205 | } |
| 2141 | 2206 | ||
| 2142 | mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS); | 2207 | mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS); |
| @@ -2159,6 +2224,7 @@ int open_ctree(struct super_block *sb, | |||
| 2159 | spin_lock_init(&fs_info->buffer_lock); | 2224 | spin_lock_init(&fs_info->buffer_lock); |
| 2160 | rwlock_init(&fs_info->tree_mod_log_lock); | 2225 | rwlock_init(&fs_info->tree_mod_log_lock); |
| 2161 | mutex_init(&fs_info->reloc_mutex); | 2226 | mutex_init(&fs_info->reloc_mutex); |
| 2227 | mutex_init(&fs_info->delalloc_root_mutex); | ||
| 2162 | seqlock_init(&fs_info->profiles_lock); | 2228 | seqlock_init(&fs_info->profiles_lock); |
| 2163 | 2229 | ||
| 2164 | init_completion(&fs_info->kobj_unregister); | 2230 | init_completion(&fs_info->kobj_unregister); |
| @@ -2211,6 +2277,7 @@ int open_ctree(struct super_block *sb, | |||
| 2211 | atomic_set(&fs_info->scrub_pause_req, 0); | 2277 | atomic_set(&fs_info->scrub_pause_req, 0); |
| 2212 | atomic_set(&fs_info->scrubs_paused, 0); | 2278 | atomic_set(&fs_info->scrubs_paused, 0); |
| 2213 | atomic_set(&fs_info->scrub_cancel_req, 0); | 2279 | atomic_set(&fs_info->scrub_cancel_req, 0); |
| 2280 | init_waitqueue_head(&fs_info->replace_wait); | ||
| 2214 | init_waitqueue_head(&fs_info->scrub_pause_wait); | 2281 | init_waitqueue_head(&fs_info->scrub_pause_wait); |
| 2215 | fs_info->scrub_workers_refcnt = 0; | 2282 | fs_info->scrub_workers_refcnt = 0; |
| 2216 | #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY | 2283 | #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY |
| @@ -2274,7 +2341,7 @@ int open_ctree(struct super_block *sb, | |||
| 2274 | mutex_init(&fs_info->transaction_kthread_mutex); | 2341 | mutex_init(&fs_info->transaction_kthread_mutex); |
| 2275 | mutex_init(&fs_info->cleaner_mutex); | 2342 | mutex_init(&fs_info->cleaner_mutex); |
| 2276 | mutex_init(&fs_info->volume_mutex); | 2343 | mutex_init(&fs_info->volume_mutex); |
| 2277 | init_rwsem(&fs_info->extent_commit_sem); | 2344 | init_rwsem(&fs_info->commit_root_sem); |
| 2278 | init_rwsem(&fs_info->cleanup_work_sem); | 2345 | init_rwsem(&fs_info->cleanup_work_sem); |
| 2279 | init_rwsem(&fs_info->subvol_sem); | 2346 | init_rwsem(&fs_info->subvol_sem); |
| 2280 | sema_init(&fs_info->uuid_tree_rescan_sem, 1); | 2347 | sema_init(&fs_info->uuid_tree_rescan_sem, 1); |
| @@ -2458,104 +2525,68 @@ int open_ctree(struct super_block *sb, | |||
| 2458 | goto fail_alloc; | 2525 | goto fail_alloc; |
| 2459 | } | 2526 | } |
| 2460 | 2527 | ||
| 2461 | btrfs_init_workers(&fs_info->generic_worker, | 2528 | max_active = fs_info->thread_pool_size; |
| 2462 | "genwork", 1, NULL); | ||
| 2463 | |||
| 2464 | btrfs_init_workers(&fs_info->workers, "worker", | ||
| 2465 | fs_info->thread_pool_size, | ||
| 2466 | &fs_info->generic_worker); | ||
| 2467 | 2529 | ||
| 2468 | btrfs_init_workers(&fs_info->delalloc_workers, "delalloc", | 2530 | fs_info->workers = |
| 2469 | fs_info->thread_pool_size, NULL); | 2531 | btrfs_alloc_workqueue("worker", flags | WQ_HIGHPRI, |
| 2532 | max_active, 16); | ||
| 2470 | 2533 | ||
| 2471 | btrfs_init_workers(&fs_info->flush_workers, "flush_delalloc", | 2534 | fs_info->delalloc_workers = |
| 2472 | fs_info->thread_pool_size, NULL); | 2535 | btrfs_alloc_workqueue("delalloc", flags, max_active, 2); |
| 2473 | 2536 | ||
| 2474 | btrfs_init_workers(&fs_info->submit_workers, "submit", | 2537 | fs_info->flush_workers = |
| 2475 | min_t(u64, fs_devices->num_devices, | 2538 | btrfs_alloc_workqueue("flush_delalloc", flags, max_active, 0); |
| 2476 | fs_info->thread_pool_size), NULL); | ||
| 2477 | 2539 | ||
| 2478 | btrfs_init_workers(&fs_info->caching_workers, "cache", | 2540 | fs_info->caching_workers = |
| 2479 | fs_info->thread_pool_size, NULL); | 2541 | btrfs_alloc_workqueue("cache", flags, max_active, 0); |
| 2480 | 2542 | ||
| 2481 | /* a higher idle thresh on the submit workers makes it much more | 2543 | /* |
| 2544 | * a higher idle thresh on the submit workers makes it much more | ||
| 2482 | * likely that bios will be send down in a sane order to the | 2545 | * likely that bios will be send down in a sane order to the |
| 2483 | * devices | 2546 | * devices |
| 2484 | */ | 2547 | */ |
| 2485 | fs_info->submit_workers.idle_thresh = 64; | 2548 | fs_info->submit_workers = |
| 2486 | 2549 | btrfs_alloc_workqueue("submit", flags, | |
| 2487 | fs_info->workers.idle_thresh = 16; | 2550 | min_t(u64, fs_devices->num_devices, |
| 2488 | fs_info->workers.ordered = 1; | 2551 | max_active), 64); |
| 2489 | 2552 | ||
| 2490 | fs_info->delalloc_workers.idle_thresh = 2; | 2553 | fs_info->fixup_workers = |
| 2491 | fs_info->delalloc_workers.ordered = 1; | 2554 | btrfs_alloc_workqueue("fixup", flags, 1, 0); |
| 2492 | |||
| 2493 | btrfs_init_workers(&fs_info->fixup_workers, "fixup", 1, | ||
| 2494 | &fs_info->generic_worker); | ||
| 2495 | btrfs_init_workers(&fs_info->endio_workers, "endio", | ||
| 2496 | fs_info->thread_pool_size, | ||
| 2497 | &fs_info->generic_worker); | ||
| 2498 | btrfs_init_workers(&fs_info->endio_meta_workers, "endio-meta", | ||
| 2499 | fs_info->thread_pool_size, | ||
| 2500 | &fs_info->generic_worker); | ||
| 2501 | btrfs_init_workers(&fs_info->endio_meta_write_workers, | ||
| 2502 | "endio-meta-write", fs_info->thread_pool_size, | ||
| 2503 | &fs_info->generic_worker); | ||
| 2504 | btrfs_init_workers(&fs_info->endio_raid56_workers, | ||
| 2505 | "endio-raid56", fs_info->thread_pool_size, | ||
| 2506 | &fs_info->generic_worker); | ||
| 2507 | btrfs_init_workers(&fs_info->rmw_workers, | ||
| 2508 | "rmw", fs_info->thread_pool_size, | ||
| 2509 | &fs_info->generic_worker); | ||
| 2510 | btrfs_init_workers(&fs_info->endio_write_workers, "endio-write", | ||
| 2511 | fs_info->thread_pool_size, | ||
| 2512 | &fs_info->generic_worker); | ||
| 2513 | btrfs_init_workers(&fs_info->endio_freespace_worker, "freespace-write", | ||
| 2514 | 1, &fs_info->generic_worker); | ||
| 2515 | btrfs_init_workers(&fs_info->delayed_workers, "delayed-meta", | ||
| 2516 | fs_info->thread_pool_size, | ||
| 2517 | &fs_info->generic_worker); | ||
| 2518 | btrfs_init_workers(&fs_info->readahead_workers, "readahead", | ||
| 2519 | fs_info->thread_pool_size, | ||
| 2520 | &fs_info->generic_worker); | ||
| 2521 | btrfs_init_workers(&fs_info->qgroup_rescan_workers, "qgroup-rescan", 1, | ||
| 2522 | &fs_info->generic_worker); | ||
| 2523 | 2555 | ||
| 2524 | /* | 2556 | /* |
| 2525 | * endios are largely parallel and should have a very | 2557 | * endios are largely parallel and should have a very |
| 2526 | * low idle thresh | 2558 | * low idle thresh |
| 2527 | */ | 2559 | */ |
| 2528 | fs_info->endio_workers.idle_thresh = 4; | 2560 | fs_info->endio_workers = |
| 2529 | fs_info->endio_meta_workers.idle_thresh = 4; | 2561 | btrfs_alloc_workqueue("endio", flags, max_active, 4); |
| 2530 | fs_info->endio_raid56_workers.idle_thresh = 4; | 2562 | fs_info->endio_meta_workers = |
| 2531 | fs_info->rmw_workers.idle_thresh = 2; | 2563 | btrfs_alloc_workqueue("endio-meta", flags, max_active, 4); |
| 2532 | 2564 | fs_info->endio_meta_write_workers = | |
| 2533 | fs_info->endio_write_workers.idle_thresh = 2; | 2565 | btrfs_alloc_workqueue("endio-meta-write", flags, max_active, 2); |
| 2534 | fs_info->endio_meta_write_workers.idle_thresh = 2; | 2566 | fs_info->endio_raid56_workers = |
| 2535 | fs_info->readahead_workers.idle_thresh = 2; | 2567 | btrfs_alloc_workqueue("endio-raid56", flags, max_active, 4); |
| 2536 | 2568 | fs_info->rmw_workers = | |
| 2537 | /* | 2569 | btrfs_alloc_workqueue("rmw", flags, max_active, 2); |
| 2538 | * btrfs_start_workers can really only fail because of ENOMEM so just | 2570 | fs_info->endio_write_workers = |
| 2539 | * return -ENOMEM if any of these fail. | 2571 | btrfs_alloc_workqueue("endio-write", flags, max_active, 2); |
| 2540 | */ | 2572 | fs_info->endio_freespace_worker = |
| 2541 | ret = btrfs_start_workers(&fs_info->workers); | 2573 | btrfs_alloc_workqueue("freespace-write", flags, max_active, 0); |
| 2542 | ret |= btrfs_start_workers(&fs_info->generic_worker); | 2574 | fs_info->delayed_workers = |
| 2543 | ret |= btrfs_start_workers(&fs_info->submit_workers); | 2575 | btrfs_alloc_workqueue("delayed-meta", flags, max_active, 0); |
| 2544 | ret |= btrfs_start_workers(&fs_info->delalloc_workers); | 2576 | fs_info->readahead_workers = |
| 2545 | ret |= btrfs_start_workers(&fs_info->fixup_workers); | 2577 | btrfs_alloc_workqueue("readahead", flags, max_active, 2); |
| 2546 | ret |= btrfs_start_workers(&fs_info->endio_workers); | 2578 | fs_info->qgroup_rescan_workers = |
| 2547 | ret |= btrfs_start_workers(&fs_info->endio_meta_workers); | 2579 | btrfs_alloc_workqueue("qgroup-rescan", flags, 1, 0); |
| 2548 | ret |= btrfs_start_workers(&fs_info->rmw_workers); | 2580 | |
| 2549 | ret |= btrfs_start_workers(&fs_info->endio_raid56_workers); | 2581 | if (!(fs_info->workers && fs_info->delalloc_workers && |
| 2550 | ret |= btrfs_start_workers(&fs_info->endio_meta_write_workers); | 2582 | fs_info->submit_workers && fs_info->flush_workers && |
| 2551 | ret |= btrfs_start_workers(&fs_info->endio_write_workers); | 2583 | fs_info->endio_workers && fs_info->endio_meta_workers && |
| 2552 | ret |= btrfs_start_workers(&fs_info->endio_freespace_worker); | 2584 | fs_info->endio_meta_write_workers && |
| 2553 | ret |= btrfs_start_workers(&fs_info->delayed_workers); | 2585 | fs_info->endio_write_workers && fs_info->endio_raid56_workers && |
| 2554 | ret |= btrfs_start_workers(&fs_info->caching_workers); | 2586 | fs_info->endio_freespace_worker && fs_info->rmw_workers && |
| 2555 | ret |= btrfs_start_workers(&fs_info->readahead_workers); | 2587 | fs_info->caching_workers && fs_info->readahead_workers && |
| 2556 | ret |= btrfs_start_workers(&fs_info->flush_workers); | 2588 | fs_info->fixup_workers && fs_info->delayed_workers && |
| 2557 | ret |= btrfs_start_workers(&fs_info->qgroup_rescan_workers); | 2589 | fs_info->qgroup_rescan_workers)) { |
| 2558 | if (ret) { | ||
| 2559 | err = -ENOMEM; | 2590 | err = -ENOMEM; |
| 2560 | goto fail_sb_buffer; | 2591 | goto fail_sb_buffer; |
| 2561 | } | 2592 | } |
| @@ -2963,6 +2994,8 @@ fail_iput: | |||
| 2963 | btrfs_mapping_tree_free(&fs_info->mapping_tree); | 2994 | btrfs_mapping_tree_free(&fs_info->mapping_tree); |
| 2964 | 2995 | ||
| 2965 | iput(fs_info->btree_inode); | 2996 | iput(fs_info->btree_inode); |
| 2997 | fail_bio_counter: | ||
| 2998 | percpu_counter_destroy(&fs_info->bio_counter); | ||
| 2966 | fail_delalloc_bytes: | 2999 | fail_delalloc_bytes: |
| 2967 | percpu_counter_destroy(&fs_info->delalloc_bytes); | 3000 | percpu_counter_destroy(&fs_info->delalloc_bytes); |
| 2968 | fail_dirty_metadata_bytes: | 3001 | fail_dirty_metadata_bytes: |
| @@ -3244,6 +3277,8 @@ static int barrier_all_devices(struct btrfs_fs_info *info) | |||
| 3244 | /* send down all the barriers */ | 3277 | /* send down all the barriers */ |
| 3245 | head = &info->fs_devices->devices; | 3278 | head = &info->fs_devices->devices; |
| 3246 | list_for_each_entry_rcu(dev, head, dev_list) { | 3279 | list_for_each_entry_rcu(dev, head, dev_list) { |
| 3280 | if (dev->missing) | ||
| 3281 | continue; | ||
| 3247 | if (!dev->bdev) { | 3282 | if (!dev->bdev) { |
| 3248 | errors_send++; | 3283 | errors_send++; |
| 3249 | continue; | 3284 | continue; |
| @@ -3258,6 +3293,8 @@ static int barrier_all_devices(struct btrfs_fs_info *info) | |||
| 3258 | 3293 | ||
| 3259 | /* wait for all the barriers */ | 3294 | /* wait for all the barriers */ |
| 3260 | list_for_each_entry_rcu(dev, head, dev_list) { | 3295 | list_for_each_entry_rcu(dev, head, dev_list) { |
| 3296 | if (dev->missing) | ||
| 3297 | continue; | ||
| 3261 | if (!dev->bdev) { | 3298 | if (!dev->bdev) { |
| 3262 | errors_wait++; | 3299 | errors_wait++; |
| 3263 | continue; | 3300 | continue; |
| @@ -3477,6 +3514,8 @@ static void free_fs_root(struct btrfs_root *root) | |||
| 3477 | root->orphan_block_rsv = NULL; | 3514 | root->orphan_block_rsv = NULL; |
| 3478 | if (root->anon_dev) | 3515 | if (root->anon_dev) |
| 3479 | free_anon_bdev(root->anon_dev); | 3516 | free_anon_bdev(root->anon_dev); |
| 3517 | if (root->subv_writers) | ||
| 3518 | btrfs_free_subvolume_writers(root->subv_writers); | ||
| 3480 | free_extent_buffer(root->node); | 3519 | free_extent_buffer(root->node); |
| 3481 | free_extent_buffer(root->commit_root); | 3520 | free_extent_buffer(root->commit_root); |
| 3482 | kfree(root->free_ino_ctl); | 3521 | kfree(root->free_ino_ctl); |
| @@ -3610,6 +3649,7 @@ int close_ctree(struct btrfs_root *root) | |||
| 3610 | 3649 | ||
| 3611 | percpu_counter_destroy(&fs_info->dirty_metadata_bytes); | 3650 | percpu_counter_destroy(&fs_info->dirty_metadata_bytes); |
| 3612 | percpu_counter_destroy(&fs_info->delalloc_bytes); | 3651 | percpu_counter_destroy(&fs_info->delalloc_bytes); |
| 3652 | percpu_counter_destroy(&fs_info->bio_counter); | ||
| 3613 | bdi_destroy(&fs_info->bdi); | 3653 | bdi_destroy(&fs_info->bdi); |
| 3614 | cleanup_srcu_struct(&fs_info->subvol_srcu); | 3654 | cleanup_srcu_struct(&fs_info->subvol_srcu); |
| 3615 | 3655 | ||
| @@ -3791,9 +3831,11 @@ static void btrfs_destroy_all_ordered_extents(struct btrfs_fs_info *fs_info) | |||
| 3791 | list_move_tail(&root->ordered_root, | 3831 | list_move_tail(&root->ordered_root, |
| 3792 | &fs_info->ordered_roots); | 3832 | &fs_info->ordered_roots); |
| 3793 | 3833 | ||
| 3834 | spin_unlock(&fs_info->ordered_root_lock); | ||
| 3794 | btrfs_destroy_ordered_extents(root); | 3835 | btrfs_destroy_ordered_extents(root); |
| 3795 | 3836 | ||
| 3796 | cond_resched_lock(&fs_info->ordered_root_lock); | 3837 | cond_resched(); |
| 3838 | spin_lock(&fs_info->ordered_root_lock); | ||
| 3797 | } | 3839 | } |
| 3798 | spin_unlock(&fs_info->ordered_root_lock); | 3840 | spin_unlock(&fs_info->ordered_root_lock); |
| 3799 | } | 3841 | } |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 32312e09f0f5..1306487c82cf 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
| @@ -419,7 +419,7 @@ static noinline void caching_thread(struct btrfs_work *work) | |||
| 419 | again: | 419 | again: |
| 420 | mutex_lock(&caching_ctl->mutex); | 420 | mutex_lock(&caching_ctl->mutex); |
| 421 | /* need to make sure the commit_root doesn't disappear */ | 421 | /* need to make sure the commit_root doesn't disappear */ |
| 422 | down_read(&fs_info->extent_commit_sem); | 422 | down_read(&fs_info->commit_root_sem); |
| 423 | 423 | ||
| 424 | next: | 424 | next: |
| 425 | ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); | 425 | ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); |
| @@ -443,10 +443,10 @@ next: | |||
| 443 | break; | 443 | break; |
| 444 | 444 | ||
| 445 | if (need_resched() || | 445 | if (need_resched() || |
| 446 | rwsem_is_contended(&fs_info->extent_commit_sem)) { | 446 | rwsem_is_contended(&fs_info->commit_root_sem)) { |
| 447 | caching_ctl->progress = last; | 447 | caching_ctl->progress = last; |
| 448 | btrfs_release_path(path); | 448 | btrfs_release_path(path); |
| 449 | up_read(&fs_info->extent_commit_sem); | 449 | up_read(&fs_info->commit_root_sem); |
| 450 | mutex_unlock(&caching_ctl->mutex); | 450 | mutex_unlock(&caching_ctl->mutex); |
| 451 | cond_resched(); | 451 | cond_resched(); |
| 452 | goto again; | 452 | goto again; |
| @@ -513,7 +513,7 @@ next: | |||
| 513 | 513 | ||
| 514 | err: | 514 | err: |
| 515 | btrfs_free_path(path); | 515 | btrfs_free_path(path); |
| 516 | up_read(&fs_info->extent_commit_sem); | 516 | up_read(&fs_info->commit_root_sem); |
| 517 | 517 | ||
| 518 | free_excluded_extents(extent_root, block_group); | 518 | free_excluded_extents(extent_root, block_group); |
| 519 | 519 | ||
| @@ -549,7 +549,7 @@ static int cache_block_group(struct btrfs_block_group_cache *cache, | |||
| 549 | caching_ctl->block_group = cache; | 549 | caching_ctl->block_group = cache; |
| 550 | caching_ctl->progress = cache->key.objectid; | 550 | caching_ctl->progress = cache->key.objectid; |
| 551 | atomic_set(&caching_ctl->count, 1); | 551 | atomic_set(&caching_ctl->count, 1); |
| 552 | caching_ctl->work.func = caching_thread; | 552 | btrfs_init_work(&caching_ctl->work, caching_thread, NULL, NULL); |
| 553 | 553 | ||
| 554 | spin_lock(&cache->lock); | 554 | spin_lock(&cache->lock); |
| 555 | /* | 555 | /* |
| @@ -633,14 +633,14 @@ static int cache_block_group(struct btrfs_block_group_cache *cache, | |||
| 633 | return 0; | 633 | return 0; |
| 634 | } | 634 | } |
| 635 | 635 | ||
| 636 | down_write(&fs_info->extent_commit_sem); | 636 | down_write(&fs_info->commit_root_sem); |
| 637 | atomic_inc(&caching_ctl->count); | 637 | atomic_inc(&caching_ctl->count); |
| 638 | list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups); | 638 | list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups); |
| 639 | up_write(&fs_info->extent_commit_sem); | 639 | up_write(&fs_info->commit_root_sem); |
| 640 | 640 | ||
| 641 | btrfs_get_block_group(cache); | 641 | btrfs_get_block_group(cache); |
| 642 | 642 | ||
| 643 | btrfs_queue_worker(&fs_info->caching_workers, &caching_ctl->work); | 643 | btrfs_queue_work(fs_info->caching_workers, &caching_ctl->work); |
| 644 | 644 | ||
| 645 | return ret; | 645 | return ret; |
| 646 | } | 646 | } |
| @@ -2444,7 +2444,8 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, | |||
| 2444 | spin_unlock(&locked_ref->lock); | 2444 | spin_unlock(&locked_ref->lock); |
| 2445 | spin_lock(&delayed_refs->lock); | 2445 | spin_lock(&delayed_refs->lock); |
| 2446 | spin_lock(&locked_ref->lock); | 2446 | spin_lock(&locked_ref->lock); |
| 2447 | if (rb_first(&locked_ref->ref_root)) { | 2447 | if (rb_first(&locked_ref->ref_root) || |
| 2448 | locked_ref->extent_op) { | ||
| 2448 | spin_unlock(&locked_ref->lock); | 2449 | spin_unlock(&locked_ref->lock); |
| 2449 | spin_unlock(&delayed_refs->lock); | 2450 | spin_unlock(&delayed_refs->lock); |
| 2450 | continue; | 2451 | continue; |
| @@ -3971,7 +3972,7 @@ static int can_overcommit(struct btrfs_root *root, | |||
| 3971 | } | 3972 | } |
| 3972 | 3973 | ||
| 3973 | static void btrfs_writeback_inodes_sb_nr(struct btrfs_root *root, | 3974 | static void btrfs_writeback_inodes_sb_nr(struct btrfs_root *root, |
| 3974 | unsigned long nr_pages) | 3975 | unsigned long nr_pages, int nr_items) |
| 3975 | { | 3976 | { |
| 3976 | struct super_block *sb = root->fs_info->sb; | 3977 | struct super_block *sb = root->fs_info->sb; |
| 3977 | 3978 | ||
| @@ -3986,9 +3987,9 @@ static void btrfs_writeback_inodes_sb_nr(struct btrfs_root *root, | |||
| 3986 | * the filesystem is readonly(all dirty pages are written to | 3987 | * the filesystem is readonly(all dirty pages are written to |
| 3987 | * the disk). | 3988 | * the disk). |
| 3988 | */ | 3989 | */ |
| 3989 | btrfs_start_delalloc_roots(root->fs_info, 0); | 3990 | btrfs_start_delalloc_roots(root->fs_info, 0, nr_items); |
| 3990 | if (!current->journal_info) | 3991 | if (!current->journal_info) |
| 3991 | btrfs_wait_ordered_roots(root->fs_info, -1); | 3992 | btrfs_wait_ordered_roots(root->fs_info, nr_items); |
| 3992 | } | 3993 | } |
| 3993 | } | 3994 | } |
| 3994 | 3995 | ||
| @@ -4045,7 +4046,7 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig, | |||
| 4045 | while (delalloc_bytes && loops < 3) { | 4046 | while (delalloc_bytes && loops < 3) { |
| 4046 | max_reclaim = min(delalloc_bytes, to_reclaim); | 4047 | max_reclaim = min(delalloc_bytes, to_reclaim); |
| 4047 | nr_pages = max_reclaim >> PAGE_CACHE_SHIFT; | 4048 | nr_pages = max_reclaim >> PAGE_CACHE_SHIFT; |
| 4048 | btrfs_writeback_inodes_sb_nr(root, nr_pages); | 4049 | btrfs_writeback_inodes_sb_nr(root, nr_pages, items); |
| 4049 | /* | 4050 | /* |
| 4050 | * We need to wait for the async pages to actually start before | 4051 | * We need to wait for the async pages to actually start before |
| 4051 | * we do anything. | 4052 | * we do anything. |
| @@ -4112,13 +4113,9 @@ static int may_commit_transaction(struct btrfs_root *root, | |||
| 4112 | goto commit; | 4113 | goto commit; |
| 4113 | 4114 | ||
| 4114 | /* See if there is enough pinned space to make this reservation */ | 4115 | /* See if there is enough pinned space to make this reservation */ |
| 4115 | spin_lock(&space_info->lock); | ||
| 4116 | if (percpu_counter_compare(&space_info->total_bytes_pinned, | 4116 | if (percpu_counter_compare(&space_info->total_bytes_pinned, |
| 4117 | bytes) >= 0) { | 4117 | bytes) >= 0) |
| 4118 | spin_unlock(&space_info->lock); | ||
| 4119 | goto commit; | 4118 | goto commit; |
| 4120 | } | ||
| 4121 | spin_unlock(&space_info->lock); | ||
| 4122 | 4119 | ||
| 4123 | /* | 4120 | /* |
| 4124 | * See if there is some space in the delayed insertion reservation for | 4121 | * See if there is some space in the delayed insertion reservation for |
| @@ -4127,16 +4124,13 @@ static int may_commit_transaction(struct btrfs_root *root, | |||
| 4127 | if (space_info != delayed_rsv->space_info) | 4124 | if (space_info != delayed_rsv->space_info) |
| 4128 | return -ENOSPC; | 4125 | return -ENOSPC; |
| 4129 | 4126 | ||
| 4130 | spin_lock(&space_info->lock); | ||
| 4131 | spin_lock(&delayed_rsv->lock); | 4127 | spin_lock(&delayed_rsv->lock); |
| 4132 | if (percpu_counter_compare(&space_info->total_bytes_pinned, | 4128 | if (percpu_counter_compare(&space_info->total_bytes_pinned, |
| 4133 | bytes - delayed_rsv->size) >= 0) { | 4129 | bytes - delayed_rsv->size) >= 0) { |
| 4134 | spin_unlock(&delayed_rsv->lock); | 4130 | spin_unlock(&delayed_rsv->lock); |
| 4135 | spin_unlock(&space_info->lock); | ||
| 4136 | return -ENOSPC; | 4131 | return -ENOSPC; |
| 4137 | } | 4132 | } |
| 4138 | spin_unlock(&delayed_rsv->lock); | 4133 | spin_unlock(&delayed_rsv->lock); |
| 4139 | spin_unlock(&space_info->lock); | ||
| 4140 | 4134 | ||
| 4141 | commit: | 4135 | commit: |
| 4142 | trans = btrfs_join_transaction(root); | 4136 | trans = btrfs_join_transaction(root); |
| @@ -4181,7 +4175,7 @@ static int flush_space(struct btrfs_root *root, | |||
| 4181 | break; | 4175 | break; |
| 4182 | case FLUSH_DELALLOC: | 4176 | case FLUSH_DELALLOC: |
| 4183 | case FLUSH_DELALLOC_WAIT: | 4177 | case FLUSH_DELALLOC_WAIT: |
| 4184 | shrink_delalloc(root, num_bytes, orig_bytes, | 4178 | shrink_delalloc(root, num_bytes * 2, orig_bytes, |
| 4185 | state == FLUSH_DELALLOC_WAIT); | 4179 | state == FLUSH_DELALLOC_WAIT); |
| 4186 | break; | 4180 | break; |
| 4187 | case ALLOC_CHUNK: | 4181 | case ALLOC_CHUNK: |
| @@ -5477,7 +5471,7 @@ void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, | |||
| 5477 | struct btrfs_block_group_cache *cache; | 5471 | struct btrfs_block_group_cache *cache; |
| 5478 | struct btrfs_space_info *space_info; | 5472 | struct btrfs_space_info *space_info; |
| 5479 | 5473 | ||
| 5480 | down_write(&fs_info->extent_commit_sem); | 5474 | down_write(&fs_info->commit_root_sem); |
| 5481 | 5475 | ||
| 5482 | list_for_each_entry_safe(caching_ctl, next, | 5476 | list_for_each_entry_safe(caching_ctl, next, |
| 5483 | &fs_info->caching_block_groups, list) { | 5477 | &fs_info->caching_block_groups, list) { |
| @@ -5496,7 +5490,7 @@ void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, | |||
| 5496 | else | 5490 | else |
| 5497 | fs_info->pinned_extents = &fs_info->freed_extents[0]; | 5491 | fs_info->pinned_extents = &fs_info->freed_extents[0]; |
| 5498 | 5492 | ||
| 5499 | up_write(&fs_info->extent_commit_sem); | 5493 | up_write(&fs_info->commit_root_sem); |
| 5500 | 5494 | ||
| 5501 | list_for_each_entry_rcu(space_info, &fs_info->space_info, list) | 5495 | list_for_each_entry_rcu(space_info, &fs_info->space_info, list) |
| 5502 | percpu_counter_set(&space_info->total_bytes_pinned, 0); | 5496 | percpu_counter_set(&space_info->total_bytes_pinned, 0); |
| @@ -5751,6 +5745,8 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
| 5751 | "unable to find ref byte nr %llu parent %llu root %llu owner %llu offset %llu", | 5745 | "unable to find ref byte nr %llu parent %llu root %llu owner %llu offset %llu", |
| 5752 | bytenr, parent, root_objectid, owner_objectid, | 5746 | bytenr, parent, root_objectid, owner_objectid, |
| 5753 | owner_offset); | 5747 | owner_offset); |
| 5748 | btrfs_abort_transaction(trans, extent_root, ret); | ||
| 5749 | goto out; | ||
| 5754 | } else { | 5750 | } else { |
| 5755 | btrfs_abort_transaction(trans, extent_root, ret); | 5751 | btrfs_abort_transaction(trans, extent_root, ret); |
| 5756 | goto out; | 5752 | goto out; |
| @@ -8262,14 +8258,14 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) | |||
| 8262 | struct btrfs_caching_control *caching_ctl; | 8258 | struct btrfs_caching_control *caching_ctl; |
| 8263 | struct rb_node *n; | 8259 | struct rb_node *n; |
| 8264 | 8260 | ||
| 8265 | down_write(&info->extent_commit_sem); | 8261 | down_write(&info->commit_root_sem); |
| 8266 | while (!list_empty(&info->caching_block_groups)) { | 8262 | while (!list_empty(&info->caching_block_groups)) { |
| 8267 | caching_ctl = list_entry(info->caching_block_groups.next, | 8263 | caching_ctl = list_entry(info->caching_block_groups.next, |
| 8268 | struct btrfs_caching_control, list); | 8264 | struct btrfs_caching_control, list); |
| 8269 | list_del(&caching_ctl->list); | 8265 | list_del(&caching_ctl->list); |
| 8270 | put_caching_control(caching_ctl); | 8266 | put_caching_control(caching_ctl); |
| 8271 | } | 8267 | } |
| 8272 | up_write(&info->extent_commit_sem); | 8268 | up_write(&info->commit_root_sem); |
| 8273 | 8269 | ||
| 8274 | spin_lock(&info->block_group_cache_lock); | 8270 | spin_lock(&info->block_group_cache_lock); |
| 8275 | while ((n = rb_last(&info->block_group_cache_tree)) != NULL) { | 8271 | while ((n = rb_last(&info->block_group_cache_tree)) != NULL) { |
| @@ -8343,9 +8339,15 @@ static void __link_block_group(struct btrfs_space_info *space_info, | |||
| 8343 | struct btrfs_block_group_cache *cache) | 8339 | struct btrfs_block_group_cache *cache) |
| 8344 | { | 8340 | { |
| 8345 | int index = get_block_group_index(cache); | 8341 | int index = get_block_group_index(cache); |
| 8342 | bool first = false; | ||
| 8346 | 8343 | ||
| 8347 | down_write(&space_info->groups_sem); | 8344 | down_write(&space_info->groups_sem); |
| 8348 | if (list_empty(&space_info->block_groups[index])) { | 8345 | if (list_empty(&space_info->block_groups[index])) |
| 8346 | first = true; | ||
| 8347 | list_add_tail(&cache->list, &space_info->block_groups[index]); | ||
| 8348 | up_write(&space_info->groups_sem); | ||
| 8349 | |||
| 8350 | if (first) { | ||
| 8349 | struct kobject *kobj = &space_info->block_group_kobjs[index]; | 8351 | struct kobject *kobj = &space_info->block_group_kobjs[index]; |
| 8350 | int ret; | 8352 | int ret; |
| 8351 | 8353 | ||
| @@ -8357,8 +8359,6 @@ static void __link_block_group(struct btrfs_space_info *space_info, | |||
| 8357 | kobject_put(&space_info->kobj); | 8359 | kobject_put(&space_info->kobj); |
| 8358 | } | 8360 | } |
| 8359 | } | 8361 | } |
| 8360 | list_add_tail(&cache->list, &space_info->block_groups[index]); | ||
| 8361 | up_write(&space_info->groups_sem); | ||
| 8362 | } | 8362 | } |
| 8363 | 8363 | ||
| 8364 | static struct btrfs_block_group_cache * | 8364 | static struct btrfs_block_group_cache * |
| @@ -8938,3 +8938,38 @@ int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range) | |||
| 8938 | range->len = trimmed; | 8938 | range->len = trimmed; |
| 8939 | return ret; | 8939 | return ret; |
| 8940 | } | 8940 | } |
| 8941 | |||
| 8942 | /* | ||
| 8943 | * btrfs_{start,end}_write() is similar to mnt_{want, drop}_write(), | ||
| 8944 | * they are used to prevent the some tasks writing data into the page cache | ||
| 8945 | * by nocow before the subvolume is snapshoted, but flush the data into | ||
| 8946 | * the disk after the snapshot creation. | ||
| 8947 | */ | ||
| 8948 | void btrfs_end_nocow_write(struct btrfs_root *root) | ||
| 8949 | { | ||
| 8950 | percpu_counter_dec(&root->subv_writers->counter); | ||
| 8951 | /* | ||
| 8952 | * Make sure counter is updated before we wake up | ||
| 8953 | * waiters. | ||
| 8954 | */ | ||
| 8955 | smp_mb(); | ||
| 8956 | if (waitqueue_active(&root->subv_writers->wait)) | ||
| 8957 | wake_up(&root->subv_writers->wait); | ||
| 8958 | } | ||
| 8959 | |||
| 8960 | int btrfs_start_nocow_write(struct btrfs_root *root) | ||
| 8961 | { | ||
| 8962 | if (unlikely(atomic_read(&root->will_be_snapshoted))) | ||
| 8963 | return 0; | ||
| 8964 | |||
| 8965 | percpu_counter_inc(&root->subv_writers->counter); | ||
| 8966 | /* | ||
| 8967 | * Make sure counter is updated before we check for snapshot creation. | ||
| 8968 | */ | ||
| 8969 | smp_mb(); | ||
| 8970 | if (unlikely(atomic_read(&root->will_be_snapshoted))) { | ||
| 8971 | btrfs_end_nocow_write(root); | ||
| 8972 | return 0; | ||
| 8973 | } | ||
| 8974 | return 1; | ||
| 8975 | } | ||
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 85bbd01f1271..3955e475ceec 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
| @@ -229,12 +229,14 @@ void free_extent_state(struct extent_state *state) | |||
| 229 | } | 229 | } |
| 230 | } | 230 | } |
| 231 | 231 | ||
| 232 | static struct rb_node *tree_insert(struct rb_root *root, u64 offset, | 232 | static struct rb_node *tree_insert(struct rb_root *root, |
| 233 | struct rb_node *search_start, | ||
| 234 | u64 offset, | ||
| 233 | struct rb_node *node, | 235 | struct rb_node *node, |
| 234 | struct rb_node ***p_in, | 236 | struct rb_node ***p_in, |
| 235 | struct rb_node **parent_in) | 237 | struct rb_node **parent_in) |
| 236 | { | 238 | { |
| 237 | struct rb_node **p = &root->rb_node; | 239 | struct rb_node **p; |
| 238 | struct rb_node *parent = NULL; | 240 | struct rb_node *parent = NULL; |
| 239 | struct tree_entry *entry; | 241 | struct tree_entry *entry; |
| 240 | 242 | ||
| @@ -244,6 +246,7 @@ static struct rb_node *tree_insert(struct rb_root *root, u64 offset, | |||
| 244 | goto do_insert; | 246 | goto do_insert; |
| 245 | } | 247 | } |
| 246 | 248 | ||
| 249 | p = search_start ? &search_start : &root->rb_node; | ||
| 247 | while (*p) { | 250 | while (*p) { |
| 248 | parent = *p; | 251 | parent = *p; |
| 249 | entry = rb_entry(parent, struct tree_entry, rb_node); | 252 | entry = rb_entry(parent, struct tree_entry, rb_node); |
| @@ -430,7 +433,7 @@ static int insert_state(struct extent_io_tree *tree, | |||
| 430 | 433 | ||
| 431 | set_state_bits(tree, state, bits); | 434 | set_state_bits(tree, state, bits); |
| 432 | 435 | ||
| 433 | node = tree_insert(&tree->state, end, &state->rb_node, p, parent); | 436 | node = tree_insert(&tree->state, NULL, end, &state->rb_node, p, parent); |
| 434 | if (node) { | 437 | if (node) { |
| 435 | struct extent_state *found; | 438 | struct extent_state *found; |
| 436 | found = rb_entry(node, struct extent_state, rb_node); | 439 | found = rb_entry(node, struct extent_state, rb_node); |
| @@ -477,8 +480,8 @@ static int split_state(struct extent_io_tree *tree, struct extent_state *orig, | |||
| 477 | prealloc->state = orig->state; | 480 | prealloc->state = orig->state; |
| 478 | orig->start = split; | 481 | orig->start = split; |
| 479 | 482 | ||
| 480 | node = tree_insert(&tree->state, prealloc->end, &prealloc->rb_node, | 483 | node = tree_insert(&tree->state, &orig->rb_node, prealloc->end, |
| 481 | NULL, NULL); | 484 | &prealloc->rb_node, NULL, NULL); |
| 482 | if (node) { | 485 | if (node) { |
| 483 | free_extent_state(prealloc); | 486 | free_extent_state(prealloc); |
| 484 | return -EEXIST; | 487 | return -EEXIST; |
| @@ -746,6 +749,7 @@ again: | |||
| 746 | * our range starts | 749 | * our range starts |
| 747 | */ | 750 | */ |
| 748 | node = tree_search(tree, start); | 751 | node = tree_search(tree, start); |
| 752 | process_node: | ||
| 749 | if (!node) | 753 | if (!node) |
| 750 | break; | 754 | break; |
| 751 | 755 | ||
| @@ -766,7 +770,10 @@ again: | |||
| 766 | if (start > end) | 770 | if (start > end) |
| 767 | break; | 771 | break; |
| 768 | 772 | ||
| 769 | cond_resched_lock(&tree->lock); | 773 | if (!cond_resched_lock(&tree->lock)) { |
| 774 | node = rb_next(node); | ||
| 775 | goto process_node; | ||
| 776 | } | ||
| 770 | } | 777 | } |
| 771 | out: | 778 | out: |
| 772 | spin_unlock(&tree->lock); | 779 | spin_unlock(&tree->lock); |
| @@ -2757,7 +2764,7 @@ __get_extent_map(struct inode *inode, struct page *page, size_t pg_offset, | |||
| 2757 | 2764 | ||
| 2758 | if (em_cached && *em_cached) { | 2765 | if (em_cached && *em_cached) { |
| 2759 | em = *em_cached; | 2766 | em = *em_cached; |
| 2760 | if (em->in_tree && start >= em->start && | 2767 | if (extent_map_in_tree(em) && start >= em->start && |
| 2761 | start < extent_map_end(em)) { | 2768 | start < extent_map_end(em)) { |
| 2762 | atomic_inc(&em->refs); | 2769 | atomic_inc(&em->refs); |
| 2763 | return em; | 2770 | return em; |
| @@ -4303,7 +4310,7 @@ static void __free_extent_buffer(struct extent_buffer *eb) | |||
| 4303 | kmem_cache_free(extent_buffer_cache, eb); | 4310 | kmem_cache_free(extent_buffer_cache, eb); |
| 4304 | } | 4311 | } |
| 4305 | 4312 | ||
| 4306 | static int extent_buffer_under_io(struct extent_buffer *eb) | 4313 | int extent_buffer_under_io(struct extent_buffer *eb) |
| 4307 | { | 4314 | { |
| 4308 | return (atomic_read(&eb->io_pages) || | 4315 | return (atomic_read(&eb->io_pages) || |
| 4309 | test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags) || | 4316 | test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags) || |
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 58b27e5ab521..c488b45237bf 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h | |||
| @@ -320,6 +320,7 @@ int set_extent_buffer_dirty(struct extent_buffer *eb); | |||
| 320 | int set_extent_buffer_uptodate(struct extent_buffer *eb); | 320 | int set_extent_buffer_uptodate(struct extent_buffer *eb); |
| 321 | int clear_extent_buffer_uptodate(struct extent_buffer *eb); | 321 | int clear_extent_buffer_uptodate(struct extent_buffer *eb); |
| 322 | int extent_buffer_uptodate(struct extent_buffer *eb); | 322 | int extent_buffer_uptodate(struct extent_buffer *eb); |
| 323 | int extent_buffer_under_io(struct extent_buffer *eb); | ||
| 323 | int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset, | 324 | int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset, |
| 324 | unsigned long min_len, char **map, | 325 | unsigned long min_len, char **map, |
| 325 | unsigned long *map_start, | 326 | unsigned long *map_start, |
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 996ad56b57db..1874aee69c86 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c | |||
| @@ -51,7 +51,7 @@ struct extent_map *alloc_extent_map(void) | |||
| 51 | em = kmem_cache_zalloc(extent_map_cache, GFP_NOFS); | 51 | em = kmem_cache_zalloc(extent_map_cache, GFP_NOFS); |
| 52 | if (!em) | 52 | if (!em) |
| 53 | return NULL; | 53 | return NULL; |
| 54 | em->in_tree = 0; | 54 | RB_CLEAR_NODE(&em->rb_node); |
| 55 | em->flags = 0; | 55 | em->flags = 0; |
| 56 | em->compress_type = BTRFS_COMPRESS_NONE; | 56 | em->compress_type = BTRFS_COMPRESS_NONE; |
| 57 | em->generation = 0; | 57 | em->generation = 0; |
| @@ -73,7 +73,7 @@ void free_extent_map(struct extent_map *em) | |||
| 73 | return; | 73 | return; |
| 74 | WARN_ON(atomic_read(&em->refs) == 0); | 74 | WARN_ON(atomic_read(&em->refs) == 0); |
| 75 | if (atomic_dec_and_test(&em->refs)) { | 75 | if (atomic_dec_and_test(&em->refs)) { |
| 76 | WARN_ON(em->in_tree); | 76 | WARN_ON(extent_map_in_tree(em)); |
| 77 | WARN_ON(!list_empty(&em->list)); | 77 | WARN_ON(!list_empty(&em->list)); |
| 78 | kmem_cache_free(extent_map_cache, em); | 78 | kmem_cache_free(extent_map_cache, em); |
| 79 | } | 79 | } |
| @@ -99,8 +99,6 @@ static int tree_insert(struct rb_root *root, struct extent_map *em) | |||
| 99 | parent = *p; | 99 | parent = *p; |
| 100 | entry = rb_entry(parent, struct extent_map, rb_node); | 100 | entry = rb_entry(parent, struct extent_map, rb_node); |
| 101 | 101 | ||
| 102 | WARN_ON(!entry->in_tree); | ||
| 103 | |||
| 104 | if (em->start < entry->start) | 102 | if (em->start < entry->start) |
| 105 | p = &(*p)->rb_left; | 103 | p = &(*p)->rb_left; |
| 106 | else if (em->start >= extent_map_end(entry)) | 104 | else if (em->start >= extent_map_end(entry)) |
| @@ -128,7 +126,6 @@ static int tree_insert(struct rb_root *root, struct extent_map *em) | |||
| 128 | if (end > entry->start && em->start < extent_map_end(entry)) | 126 | if (end > entry->start && em->start < extent_map_end(entry)) |
| 129 | return -EEXIST; | 127 | return -EEXIST; |
| 130 | 128 | ||
| 131 | em->in_tree = 1; | ||
| 132 | rb_link_node(&em->rb_node, orig_parent, p); | 129 | rb_link_node(&em->rb_node, orig_parent, p); |
| 133 | rb_insert_color(&em->rb_node, root); | 130 | rb_insert_color(&em->rb_node, root); |
| 134 | return 0; | 131 | return 0; |
| @@ -153,8 +150,6 @@ static struct rb_node *__tree_search(struct rb_root *root, u64 offset, | |||
| 153 | prev = n; | 150 | prev = n; |
| 154 | prev_entry = entry; | 151 | prev_entry = entry; |
| 155 | 152 | ||
| 156 | WARN_ON(!entry->in_tree); | ||
| 157 | |||
| 158 | if (offset < entry->start) | 153 | if (offset < entry->start) |
| 159 | n = n->rb_left; | 154 | n = n->rb_left; |
| 160 | else if (offset >= extent_map_end(entry)) | 155 | else if (offset >= extent_map_end(entry)) |
| @@ -240,12 +235,12 @@ static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em) | |||
| 240 | em->len += merge->len; | 235 | em->len += merge->len; |
| 241 | em->block_len += merge->block_len; | 236 | em->block_len += merge->block_len; |
| 242 | em->block_start = merge->block_start; | 237 | em->block_start = merge->block_start; |
| 243 | merge->in_tree = 0; | ||
| 244 | em->mod_len = (em->mod_len + em->mod_start) - merge->mod_start; | 238 | em->mod_len = (em->mod_len + em->mod_start) - merge->mod_start; |
| 245 | em->mod_start = merge->mod_start; | 239 | em->mod_start = merge->mod_start; |
| 246 | em->generation = max(em->generation, merge->generation); | 240 | em->generation = max(em->generation, merge->generation); |
| 247 | 241 | ||
| 248 | rb_erase(&merge->rb_node, &tree->map); | 242 | rb_erase(&merge->rb_node, &tree->map); |
| 243 | RB_CLEAR_NODE(&merge->rb_node); | ||
| 249 | free_extent_map(merge); | 244 | free_extent_map(merge); |
| 250 | } | 245 | } |
| 251 | } | 246 | } |
| @@ -257,7 +252,7 @@ static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em) | |||
| 257 | em->len += merge->len; | 252 | em->len += merge->len; |
| 258 | em->block_len += merge->block_len; | 253 | em->block_len += merge->block_len; |
| 259 | rb_erase(&merge->rb_node, &tree->map); | 254 | rb_erase(&merge->rb_node, &tree->map); |
| 260 | merge->in_tree = 0; | 255 | RB_CLEAR_NODE(&merge->rb_node); |
| 261 | em->mod_len = (merge->mod_start + merge->mod_len) - em->mod_start; | 256 | em->mod_len = (merge->mod_start + merge->mod_len) - em->mod_start; |
| 262 | em->generation = max(em->generation, merge->generation); | 257 | em->generation = max(em->generation, merge->generation); |
| 263 | free_extent_map(merge); | 258 | free_extent_map(merge); |
| @@ -319,7 +314,21 @@ out: | |||
| 319 | void clear_em_logging(struct extent_map_tree *tree, struct extent_map *em) | 314 | void clear_em_logging(struct extent_map_tree *tree, struct extent_map *em) |
| 320 | { | 315 | { |
| 321 | clear_bit(EXTENT_FLAG_LOGGING, &em->flags); | 316 | clear_bit(EXTENT_FLAG_LOGGING, &em->flags); |
| 322 | if (em->in_tree) | 317 | if (extent_map_in_tree(em)) |
| 318 | try_merge_map(tree, em); | ||
| 319 | } | ||
| 320 | |||
| 321 | static inline void setup_extent_mapping(struct extent_map_tree *tree, | ||
| 322 | struct extent_map *em, | ||
| 323 | int modified) | ||
| 324 | { | ||
| 325 | atomic_inc(&em->refs); | ||
| 326 | em->mod_start = em->start; | ||
| 327 | em->mod_len = em->len; | ||
| 328 | |||
| 329 | if (modified) | ||
| 330 | list_move(&em->list, &tree->modified_extents); | ||
| 331 | else | ||
| 323 | try_merge_map(tree, em); | 332 | try_merge_map(tree, em); |
| 324 | } | 333 | } |
| 325 | 334 | ||
| @@ -342,15 +351,7 @@ int add_extent_mapping(struct extent_map_tree *tree, | |||
| 342 | if (ret) | 351 | if (ret) |
| 343 | goto out; | 352 | goto out; |
| 344 | 353 | ||
| 345 | atomic_inc(&em->refs); | 354 | setup_extent_mapping(tree, em, modified); |
| 346 | |||
| 347 | em->mod_start = em->start; | ||
| 348 | em->mod_len = em->len; | ||
| 349 | |||
| 350 | if (modified) | ||
| 351 | list_move(&em->list, &tree->modified_extents); | ||
| 352 | else | ||
| 353 | try_merge_map(tree, em); | ||
| 354 | out: | 355 | out: |
| 355 | return ret; | 356 | return ret; |
| 356 | } | 357 | } |
| @@ -434,6 +435,21 @@ int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em) | |||
| 434 | rb_erase(&em->rb_node, &tree->map); | 435 | rb_erase(&em->rb_node, &tree->map); |
| 435 | if (!test_bit(EXTENT_FLAG_LOGGING, &em->flags)) | 436 | if (!test_bit(EXTENT_FLAG_LOGGING, &em->flags)) |
| 436 | list_del_init(&em->list); | 437 | list_del_init(&em->list); |
| 437 | em->in_tree = 0; | 438 | RB_CLEAR_NODE(&em->rb_node); |
| 438 | return ret; | 439 | return ret; |
| 439 | } | 440 | } |
| 441 | |||
| 442 | void replace_extent_mapping(struct extent_map_tree *tree, | ||
| 443 | struct extent_map *cur, | ||
| 444 | struct extent_map *new, | ||
| 445 | int modified) | ||
| 446 | { | ||
| 447 | WARN_ON(test_bit(EXTENT_FLAG_PINNED, &cur->flags)); | ||
| 448 | ASSERT(extent_map_in_tree(cur)); | ||
| 449 | if (!test_bit(EXTENT_FLAG_LOGGING, &cur->flags)) | ||
| 450 | list_del_init(&cur->list); | ||
| 451 | rb_replace_node(&cur->rb_node, &new->rb_node, &tree->map); | ||
| 452 | RB_CLEAR_NODE(&cur->rb_node); | ||
| 453 | |||
| 454 | setup_extent_mapping(tree, new, modified); | ||
| 455 | } | ||
diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index 93fba716d7f8..e7fd8a56a140 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h | |||
| @@ -33,7 +33,6 @@ struct extent_map { | |||
| 33 | unsigned long flags; | 33 | unsigned long flags; |
| 34 | struct block_device *bdev; | 34 | struct block_device *bdev; |
| 35 | atomic_t refs; | 35 | atomic_t refs; |
| 36 | unsigned int in_tree; | ||
| 37 | unsigned int compress_type; | 36 | unsigned int compress_type; |
| 38 | struct list_head list; | 37 | struct list_head list; |
| 39 | }; | 38 | }; |
| @@ -44,6 +43,11 @@ struct extent_map_tree { | |||
| 44 | rwlock_t lock; | 43 | rwlock_t lock; |
| 45 | }; | 44 | }; |
| 46 | 45 | ||
| 46 | static inline int extent_map_in_tree(const struct extent_map *em) | ||
| 47 | { | ||
| 48 | return !RB_EMPTY_NODE(&em->rb_node); | ||
| 49 | } | ||
| 50 | |||
| 47 | static inline u64 extent_map_end(struct extent_map *em) | 51 | static inline u64 extent_map_end(struct extent_map *em) |
| 48 | { | 52 | { |
| 49 | if (em->start + em->len < em->start) | 53 | if (em->start + em->len < em->start) |
| @@ -64,6 +68,10 @@ struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree, | |||
| 64 | int add_extent_mapping(struct extent_map_tree *tree, | 68 | int add_extent_mapping(struct extent_map_tree *tree, |
| 65 | struct extent_map *em, int modified); | 69 | struct extent_map *em, int modified); |
| 66 | int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em); | 70 | int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em); |
| 71 | void replace_extent_mapping(struct extent_map_tree *tree, | ||
| 72 | struct extent_map *cur, | ||
| 73 | struct extent_map *new, | ||
| 74 | int modified); | ||
| 67 | 75 | ||
| 68 | struct extent_map *alloc_extent_map(void); | 76 | struct extent_map *alloc_extent_map(void); |
| 69 | void free_extent_map(struct extent_map *em); | 77 | void free_extent_map(struct extent_map *em); |
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 0165b8672f09..eb742c07e7a4 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
| @@ -425,13 +425,8 @@ static noinline int btrfs_copy_from_user(loff_t pos, int num_pages, | |||
| 425 | struct page *page = prepared_pages[pg]; | 425 | struct page *page = prepared_pages[pg]; |
| 426 | /* | 426 | /* |
| 427 | * Copy data from userspace to the current page | 427 | * Copy data from userspace to the current page |
| 428 | * | ||
| 429 | * Disable pagefault to avoid recursive lock since | ||
| 430 | * the pages are already locked | ||
| 431 | */ | 428 | */ |
| 432 | pagefault_disable(); | ||
| 433 | copied = iov_iter_copy_from_user_atomic(page, i, offset, count); | 429 | copied = iov_iter_copy_from_user_atomic(page, i, offset, count); |
| 434 | pagefault_enable(); | ||
| 435 | 430 | ||
| 436 | /* Flush processor's dcache for this page */ | 431 | /* Flush processor's dcache for this page */ |
| 437 | flush_dcache_page(page); | 432 | flush_dcache_page(page); |
| @@ -591,7 +586,6 @@ void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | |||
| 591 | clear_bit(EXTENT_FLAG_PINNED, &em->flags); | 586 | clear_bit(EXTENT_FLAG_PINNED, &em->flags); |
| 592 | clear_bit(EXTENT_FLAG_LOGGING, &flags); | 587 | clear_bit(EXTENT_FLAG_LOGGING, &flags); |
| 593 | modified = !list_empty(&em->list); | 588 | modified = !list_empty(&em->list); |
| 594 | remove_extent_mapping(em_tree, em); | ||
| 595 | if (no_splits) | 589 | if (no_splits) |
| 596 | goto next; | 590 | goto next; |
| 597 | 591 | ||
| @@ -622,8 +616,7 @@ void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | |||
| 622 | split->bdev = em->bdev; | 616 | split->bdev = em->bdev; |
| 623 | split->flags = flags; | 617 | split->flags = flags; |
| 624 | split->compress_type = em->compress_type; | 618 | split->compress_type = em->compress_type; |
| 625 | ret = add_extent_mapping(em_tree, split, modified); | 619 | replace_extent_mapping(em_tree, em, split, modified); |
| 626 | BUG_ON(ret); /* Logic error */ | ||
| 627 | free_extent_map(split); | 620 | free_extent_map(split); |
| 628 | split = split2; | 621 | split = split2; |
| 629 | split2 = NULL; | 622 | split2 = NULL; |
| @@ -661,12 +654,20 @@ void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | |||
| 661 | split->orig_block_len = 0; | 654 | split->orig_block_len = 0; |
| 662 | } | 655 | } |
| 663 | 656 | ||
| 664 | ret = add_extent_mapping(em_tree, split, modified); | 657 | if (extent_map_in_tree(em)) { |
| 665 | BUG_ON(ret); /* Logic error */ | 658 | replace_extent_mapping(em_tree, em, split, |
| 659 | modified); | ||
| 660 | } else { | ||
| 661 | ret = add_extent_mapping(em_tree, split, | ||
| 662 | modified); | ||
| 663 | ASSERT(ret == 0); /* Logic error */ | ||
| 664 | } | ||
| 666 | free_extent_map(split); | 665 | free_extent_map(split); |
| 667 | split = NULL; | 666 | split = NULL; |
| 668 | } | 667 | } |
| 669 | next: | 668 | next: |
| 669 | if (extent_map_in_tree(em)) | ||
| 670 | remove_extent_mapping(em_tree, em); | ||
| 670 | write_unlock(&em_tree->lock); | 671 | write_unlock(&em_tree->lock); |
| 671 | 672 | ||
| 672 | /* once for us */ | 673 | /* once for us */ |
| @@ -720,7 +721,7 @@ int __btrfs_drop_extents(struct btrfs_trans_handle *trans, | |||
| 720 | if (drop_cache) | 721 | if (drop_cache) |
| 721 | btrfs_drop_extent_cache(inode, start, end - 1, 0); | 722 | btrfs_drop_extent_cache(inode, start, end - 1, 0); |
| 722 | 723 | ||
| 723 | if (start >= BTRFS_I(inode)->disk_i_size) | 724 | if (start >= BTRFS_I(inode)->disk_i_size && !replace_extent) |
| 724 | modify_tree = 0; | 725 | modify_tree = 0; |
| 725 | 726 | ||
| 726 | while (1) { | 727 | while (1) { |
| @@ -798,7 +799,10 @@ next_slot: | |||
| 798 | */ | 799 | */ |
| 799 | if (start > key.offset && end < extent_end) { | 800 | if (start > key.offset && end < extent_end) { |
| 800 | BUG_ON(del_nr > 0); | 801 | BUG_ON(del_nr > 0); |
| 801 | BUG_ON(extent_type == BTRFS_FILE_EXTENT_INLINE); | 802 | if (extent_type == BTRFS_FILE_EXTENT_INLINE) { |
| 803 | ret = -EINVAL; | ||
| 804 | break; | ||
| 805 | } | ||
| 802 | 806 | ||
| 803 | memcpy(&new_key, &key, sizeof(new_key)); | 807 | memcpy(&new_key, &key, sizeof(new_key)); |
| 804 | new_key.offset = start; | 808 | new_key.offset = start; |
| @@ -841,7 +845,10 @@ next_slot: | |||
| 841 | * | -------- extent -------- | | 845 | * | -------- extent -------- | |
| 842 | */ | 846 | */ |
| 843 | if (start <= key.offset && end < extent_end) { | 847 | if (start <= key.offset && end < extent_end) { |
| 844 | BUG_ON(extent_type == BTRFS_FILE_EXTENT_INLINE); | 848 | if (extent_type == BTRFS_FILE_EXTENT_INLINE) { |
| 849 | ret = -EINVAL; | ||
| 850 | break; | ||
| 851 | } | ||
| 845 | 852 | ||
| 846 | memcpy(&new_key, &key, sizeof(new_key)); | 853 | memcpy(&new_key, &key, sizeof(new_key)); |
| 847 | new_key.offset = end; | 854 | new_key.offset = end; |
| @@ -864,7 +871,10 @@ next_slot: | |||
| 864 | */ | 871 | */ |
| 865 | if (start > key.offset && end >= extent_end) { | 872 | if (start > key.offset && end >= extent_end) { |
| 866 | BUG_ON(del_nr > 0); | 873 | BUG_ON(del_nr > 0); |
| 867 | BUG_ON(extent_type == BTRFS_FILE_EXTENT_INLINE); | 874 | if (extent_type == BTRFS_FILE_EXTENT_INLINE) { |
| 875 | ret = -EINVAL; | ||
| 876 | break; | ||
| 877 | } | ||
| 868 | 878 | ||
| 869 | btrfs_set_file_extent_num_bytes(leaf, fi, | 879 | btrfs_set_file_extent_num_bytes(leaf, fi, |
| 870 | start - key.offset); | 880 | start - key.offset); |
| @@ -938,34 +948,42 @@ next_slot: | |||
| 938 | * Set path->slots[0] to first slot, so that after the delete | 948 | * Set path->slots[0] to first slot, so that after the delete |
| 939 | * if items are move off from our leaf to its immediate left or | 949 | * if items are move off from our leaf to its immediate left or |
| 940 | * right neighbor leafs, we end up with a correct and adjusted | 950 | * right neighbor leafs, we end up with a correct and adjusted |
| 941 | * path->slots[0] for our insertion. | 951 | * path->slots[0] for our insertion (if replace_extent != 0). |
| 942 | */ | 952 | */ |
| 943 | path->slots[0] = del_slot; | 953 | path->slots[0] = del_slot; |
| 944 | ret = btrfs_del_items(trans, root, path, del_slot, del_nr); | 954 | ret = btrfs_del_items(trans, root, path, del_slot, del_nr); |
| 945 | if (ret) | 955 | if (ret) |
| 946 | btrfs_abort_transaction(trans, root, ret); | 956 | btrfs_abort_transaction(trans, root, ret); |
| 957 | } | ||
| 947 | 958 | ||
| 948 | leaf = path->nodes[0]; | 959 | leaf = path->nodes[0]; |
| 949 | /* | 960 | /* |
| 950 | * leaf eb has flag EXTENT_BUFFER_STALE if it was deleted (that | 961 | * If btrfs_del_items() was called, it might have deleted a leaf, in |
| 951 | * is, its contents got pushed to its neighbors), in which case | 962 | * which case it unlocked our path, so check path->locks[0] matches a |
| 952 | * it means path->locks[0] == 0 | 963 | * write lock. |
| 953 | */ | 964 | */ |
| 954 | if (!ret && replace_extent && leafs_visited == 1 && | 965 | if (!ret && replace_extent && leafs_visited == 1 && |
| 955 | path->locks[0] && | 966 | (path->locks[0] == BTRFS_WRITE_LOCK_BLOCKING || |
| 956 | btrfs_leaf_free_space(root, leaf) >= | 967 | path->locks[0] == BTRFS_WRITE_LOCK) && |
| 957 | sizeof(struct btrfs_item) + extent_item_size) { | 968 | btrfs_leaf_free_space(root, leaf) >= |
| 958 | 969 | sizeof(struct btrfs_item) + extent_item_size) { | |
| 959 | key.objectid = ino; | 970 | |
| 960 | key.type = BTRFS_EXTENT_DATA_KEY; | 971 | key.objectid = ino; |
| 961 | key.offset = start; | 972 | key.type = BTRFS_EXTENT_DATA_KEY; |
| 962 | setup_items_for_insert(root, path, &key, | 973 | key.offset = start; |
| 963 | &extent_item_size, | 974 | if (!del_nr && path->slots[0] < btrfs_header_nritems(leaf)) { |
| 964 | extent_item_size, | 975 | struct btrfs_key slot_key; |
| 965 | sizeof(struct btrfs_item) + | 976 | |
| 966 | extent_item_size, 1); | 977 | btrfs_item_key_to_cpu(leaf, &slot_key, path->slots[0]); |
| 967 | *key_inserted = 1; | 978 | if (btrfs_comp_cpu_keys(&key, &slot_key) > 0) |
| 979 | path->slots[0]++; | ||
| 968 | } | 980 | } |
| 981 | setup_items_for_insert(root, path, &key, | ||
| 982 | &extent_item_size, | ||
| 983 | extent_item_size, | ||
| 984 | sizeof(struct btrfs_item) + | ||
| 985 | extent_item_size, 1); | ||
| 986 | *key_inserted = 1; | ||
| 969 | } | 987 | } |
| 970 | 988 | ||
| 971 | if (!replace_extent || !(*key_inserted)) | 989 | if (!replace_extent || !(*key_inserted)) |
| @@ -1346,11 +1364,11 @@ lock_and_cleanup_extent_if_need(struct inode *inode, struct page **pages, | |||
| 1346 | struct btrfs_ordered_extent *ordered; | 1364 | struct btrfs_ordered_extent *ordered; |
| 1347 | lock_extent_bits(&BTRFS_I(inode)->io_tree, | 1365 | lock_extent_bits(&BTRFS_I(inode)->io_tree, |
| 1348 | start_pos, last_pos, 0, cached_state); | 1366 | start_pos, last_pos, 0, cached_state); |
| 1349 | ordered = btrfs_lookup_first_ordered_extent(inode, last_pos); | 1367 | ordered = btrfs_lookup_ordered_range(inode, start_pos, |
| 1368 | last_pos - start_pos + 1); | ||
| 1350 | if (ordered && | 1369 | if (ordered && |
| 1351 | ordered->file_offset + ordered->len > start_pos && | 1370 | ordered->file_offset + ordered->len > start_pos && |
| 1352 | ordered->file_offset <= last_pos) { | 1371 | ordered->file_offset <= last_pos) { |
| 1353 | btrfs_put_ordered_extent(ordered); | ||
| 1354 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, | 1372 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, |
| 1355 | start_pos, last_pos, | 1373 | start_pos, last_pos, |
| 1356 | cached_state, GFP_NOFS); | 1374 | cached_state, GFP_NOFS); |
| @@ -1358,12 +1376,9 @@ lock_and_cleanup_extent_if_need(struct inode *inode, struct page **pages, | |||
| 1358 | unlock_page(pages[i]); | 1376 | unlock_page(pages[i]); |
| 1359 | page_cache_release(pages[i]); | 1377 | page_cache_release(pages[i]); |
| 1360 | } | 1378 | } |
| 1361 | ret = btrfs_wait_ordered_range(inode, start_pos, | 1379 | btrfs_start_ordered_extent(inode, ordered, 1); |
| 1362 | last_pos - start_pos + 1); | 1380 | btrfs_put_ordered_extent(ordered); |
| 1363 | if (ret) | 1381 | return -EAGAIN; |
| 1364 | return ret; | ||
| 1365 | else | ||
| 1366 | return -EAGAIN; | ||
| 1367 | } | 1382 | } |
| 1368 | if (ordered) | 1383 | if (ordered) |
| 1369 | btrfs_put_ordered_extent(ordered); | 1384 | btrfs_put_ordered_extent(ordered); |
| @@ -1396,8 +1411,12 @@ static noinline int check_can_nocow(struct inode *inode, loff_t pos, | |||
| 1396 | u64 num_bytes; | 1411 | u64 num_bytes; |
| 1397 | int ret; | 1412 | int ret; |
| 1398 | 1413 | ||
| 1414 | ret = btrfs_start_nocow_write(root); | ||
| 1415 | if (!ret) | ||
| 1416 | return -ENOSPC; | ||
| 1417 | |||
| 1399 | lockstart = round_down(pos, root->sectorsize); | 1418 | lockstart = round_down(pos, root->sectorsize); |
| 1400 | lockend = lockstart + round_up(*write_bytes, root->sectorsize) - 1; | 1419 | lockend = round_up(pos + *write_bytes, root->sectorsize) - 1; |
| 1401 | 1420 | ||
| 1402 | while (1) { | 1421 | while (1) { |
| 1403 | lock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend); | 1422 | lock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend); |
| @@ -1415,12 +1434,10 @@ static noinline int check_can_nocow(struct inode *inode, loff_t pos, | |||
| 1415 | ret = can_nocow_extent(inode, lockstart, &num_bytes, NULL, NULL, NULL); | 1434 | ret = can_nocow_extent(inode, lockstart, &num_bytes, NULL, NULL, NULL); |
| 1416 | if (ret <= 0) { | 1435 | if (ret <= 0) { |
| 1417 | ret = 0; | 1436 | ret = 0; |
| 1437 | btrfs_end_nocow_write(root); | ||
| 1418 | } else { | 1438 | } else { |
| 1419 | clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend, | 1439 | *write_bytes = min_t(size_t, *write_bytes , |
| 1420 | EXTENT_DIRTY | EXTENT_DELALLOC | | 1440 | num_bytes - pos + lockstart); |
| 1421 | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 0, 0, | ||
| 1422 | NULL, GFP_NOFS); | ||
| 1423 | *write_bytes = min_t(size_t, *write_bytes, num_bytes); | ||
| 1424 | } | 1441 | } |
| 1425 | 1442 | ||
| 1426 | unlock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend); | 1443 | unlock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend); |
| @@ -1510,6 +1527,8 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, | |||
| 1510 | if (!only_release_metadata) | 1527 | if (!only_release_metadata) |
| 1511 | btrfs_free_reserved_data_space(inode, | 1528 | btrfs_free_reserved_data_space(inode, |
| 1512 | reserve_bytes); | 1529 | reserve_bytes); |
| 1530 | else | ||
| 1531 | btrfs_end_nocow_write(root); | ||
| 1513 | break; | 1532 | break; |
| 1514 | } | 1533 | } |
| 1515 | 1534 | ||
| @@ -1598,6 +1617,9 @@ again: | |||
| 1598 | } | 1617 | } |
| 1599 | 1618 | ||
| 1600 | release_bytes = 0; | 1619 | release_bytes = 0; |
| 1620 | if (only_release_metadata) | ||
| 1621 | btrfs_end_nocow_write(root); | ||
| 1622 | |||
| 1601 | if (only_release_metadata && copied > 0) { | 1623 | if (only_release_metadata && copied > 0) { |
| 1602 | u64 lockstart = round_down(pos, root->sectorsize); | 1624 | u64 lockstart = round_down(pos, root->sectorsize); |
| 1603 | u64 lockend = lockstart + | 1625 | u64 lockend = lockstart + |
| @@ -1624,10 +1646,12 @@ again: | |||
| 1624 | kfree(pages); | 1646 | kfree(pages); |
| 1625 | 1647 | ||
| 1626 | if (release_bytes) { | 1648 | if (release_bytes) { |
| 1627 | if (only_release_metadata) | 1649 | if (only_release_metadata) { |
| 1650 | btrfs_end_nocow_write(root); | ||
| 1628 | btrfs_delalloc_release_metadata(inode, release_bytes); | 1651 | btrfs_delalloc_release_metadata(inode, release_bytes); |
| 1629 | else | 1652 | } else { |
| 1630 | btrfs_delalloc_release_space(inode, release_bytes); | 1653 | btrfs_delalloc_release_space(inode, release_bytes); |
| 1654 | } | ||
| 1631 | } | 1655 | } |
| 1632 | 1656 | ||
| 1633 | return num_written ? num_written : ret; | 1657 | return num_written ? num_written : ret; |
| @@ -1636,7 +1660,7 @@ again: | |||
| 1636 | static ssize_t __btrfs_direct_write(struct kiocb *iocb, | 1660 | static ssize_t __btrfs_direct_write(struct kiocb *iocb, |
| 1637 | const struct iovec *iov, | 1661 | const struct iovec *iov, |
| 1638 | unsigned long nr_segs, loff_t pos, | 1662 | unsigned long nr_segs, loff_t pos, |
| 1639 | loff_t *ppos, size_t count, size_t ocount) | 1663 | size_t count, size_t ocount) |
| 1640 | { | 1664 | { |
| 1641 | struct file *file = iocb->ki_filp; | 1665 | struct file *file = iocb->ki_filp; |
| 1642 | struct iov_iter i; | 1666 | struct iov_iter i; |
| @@ -1645,7 +1669,7 @@ static ssize_t __btrfs_direct_write(struct kiocb *iocb, | |||
| 1645 | loff_t endbyte; | 1669 | loff_t endbyte; |
| 1646 | int err; | 1670 | int err; |
| 1647 | 1671 | ||
| 1648 | written = generic_file_direct_write(iocb, iov, &nr_segs, pos, ppos, | 1672 | written = generic_file_direct_write(iocb, iov, &nr_segs, pos, |
| 1649 | count, ocount); | 1673 | count, ocount); |
| 1650 | 1674 | ||
| 1651 | if (written < 0 || written == count) | 1675 | if (written < 0 || written == count) |
| @@ -1664,7 +1688,7 @@ static ssize_t __btrfs_direct_write(struct kiocb *iocb, | |||
| 1664 | if (err) | 1688 | if (err) |
| 1665 | goto out; | 1689 | goto out; |
| 1666 | written += written_buffered; | 1690 | written += written_buffered; |
| 1667 | *ppos = pos + written_buffered; | 1691 | iocb->ki_pos = pos + written_buffered; |
| 1668 | invalidate_mapping_pages(file->f_mapping, pos >> PAGE_CACHE_SHIFT, | 1692 | invalidate_mapping_pages(file->f_mapping, pos >> PAGE_CACHE_SHIFT, |
| 1669 | endbyte >> PAGE_CACHE_SHIFT); | 1693 | endbyte >> PAGE_CACHE_SHIFT); |
| 1670 | out: | 1694 | out: |
| @@ -1696,8 +1720,8 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, | |||
| 1696 | struct file *file = iocb->ki_filp; | 1720 | struct file *file = iocb->ki_filp; |
| 1697 | struct inode *inode = file_inode(file); | 1721 | struct inode *inode = file_inode(file); |
| 1698 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1722 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 1699 | loff_t *ppos = &iocb->ki_pos; | ||
| 1700 | u64 start_pos; | 1723 | u64 start_pos; |
| 1724 | u64 end_pos; | ||
| 1701 | ssize_t num_written = 0; | 1725 | ssize_t num_written = 0; |
| 1702 | ssize_t err = 0; | 1726 | ssize_t err = 0; |
| 1703 | size_t count, ocount; | 1727 | size_t count, ocount; |
| @@ -1752,7 +1776,9 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, | |||
| 1752 | 1776 | ||
| 1753 | start_pos = round_down(pos, root->sectorsize); | 1777 | start_pos = round_down(pos, root->sectorsize); |
| 1754 | if (start_pos > i_size_read(inode)) { | 1778 | if (start_pos > i_size_read(inode)) { |
| 1755 | err = btrfs_cont_expand(inode, i_size_read(inode), start_pos); | 1779 | /* Expand hole size to cover write data, preventing empty gap */ |
| 1780 | end_pos = round_up(pos + iov->iov_len, root->sectorsize); | ||
| 1781 | err = btrfs_cont_expand(inode, i_size_read(inode), end_pos); | ||
| 1756 | if (err) { | 1782 | if (err) { |
| 1757 | mutex_unlock(&inode->i_mutex); | 1783 | mutex_unlock(&inode->i_mutex); |
| 1758 | goto out; | 1784 | goto out; |
| @@ -1764,7 +1790,7 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, | |||
| 1764 | 1790 | ||
| 1765 | if (unlikely(file->f_flags & O_DIRECT)) { | 1791 | if (unlikely(file->f_flags & O_DIRECT)) { |
| 1766 | num_written = __btrfs_direct_write(iocb, iov, nr_segs, | 1792 | num_written = __btrfs_direct_write(iocb, iov, nr_segs, |
| 1767 | pos, ppos, count, ocount); | 1793 | pos, count, ocount); |
| 1768 | } else { | 1794 | } else { |
| 1769 | struct iov_iter i; | 1795 | struct iov_iter i; |
| 1770 | 1796 | ||
| @@ -1772,7 +1798,7 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, | |||
| 1772 | 1798 | ||
| 1773 | num_written = __btrfs_buffered_write(file, &i, pos); | 1799 | num_written = __btrfs_buffered_write(file, &i, pos); |
| 1774 | if (num_written > 0) | 1800 | if (num_written > 0) |
| 1775 | *ppos = pos + num_written; | 1801 | iocb->ki_pos = pos + num_written; |
| 1776 | } | 1802 | } |
| 1777 | 1803 | ||
| 1778 | mutex_unlock(&inode->i_mutex); | 1804 | mutex_unlock(&inode->i_mutex); |
| @@ -1797,7 +1823,7 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, | |||
| 1797 | BTRFS_I(inode)->last_sub_trans = root->log_transid; | 1823 | BTRFS_I(inode)->last_sub_trans = root->log_transid; |
| 1798 | if (num_written > 0) { | 1824 | if (num_written > 0) { |
| 1799 | err = generic_write_sync(file, pos, num_written); | 1825 | err = generic_write_sync(file, pos, num_written); |
| 1800 | if (err < 0 && num_written > 0) | 1826 | if (err < 0) |
| 1801 | num_written = err; | 1827 | num_written = err; |
| 1802 | } | 1828 | } |
| 1803 | 1829 | ||
| @@ -1856,8 +1882,9 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
| 1856 | struct dentry *dentry = file->f_path.dentry; | 1882 | struct dentry *dentry = file->f_path.dentry; |
| 1857 | struct inode *inode = dentry->d_inode; | 1883 | struct inode *inode = dentry->d_inode; |
| 1858 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1884 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 1859 | int ret = 0; | ||
| 1860 | struct btrfs_trans_handle *trans; | 1885 | struct btrfs_trans_handle *trans; |
| 1886 | struct btrfs_log_ctx ctx; | ||
| 1887 | int ret = 0; | ||
| 1861 | bool full_sync = 0; | 1888 | bool full_sync = 0; |
| 1862 | 1889 | ||
| 1863 | trace_btrfs_sync_file(file, datasync); | 1890 | trace_btrfs_sync_file(file, datasync); |
| @@ -1951,7 +1978,9 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
| 1951 | } | 1978 | } |
| 1952 | trans->sync = true; | 1979 | trans->sync = true; |
| 1953 | 1980 | ||
| 1954 | ret = btrfs_log_dentry_safe(trans, root, dentry); | 1981 | btrfs_init_log_ctx(&ctx); |
| 1982 | |||
| 1983 | ret = btrfs_log_dentry_safe(trans, root, dentry, &ctx); | ||
| 1955 | if (ret < 0) { | 1984 | if (ret < 0) { |
| 1956 | /* Fallthrough and commit/free transaction. */ | 1985 | /* Fallthrough and commit/free transaction. */ |
| 1957 | ret = 1; | 1986 | ret = 1; |
| @@ -1971,7 +2000,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
| 1971 | 2000 | ||
| 1972 | if (ret != BTRFS_NO_LOG_SYNC) { | 2001 | if (ret != BTRFS_NO_LOG_SYNC) { |
| 1973 | if (!ret) { | 2002 | if (!ret) { |
| 1974 | ret = btrfs_sync_log(trans, root); | 2003 | ret = btrfs_sync_log(trans, root, &ctx); |
| 1975 | if (!ret) { | 2004 | if (!ret) { |
| 1976 | ret = btrfs_end_transaction(trans, root); | 2005 | ret = btrfs_end_transaction(trans, root); |
| 1977 | goto out; | 2006 | goto out; |
| @@ -1993,6 +2022,7 @@ out: | |||
| 1993 | 2022 | ||
| 1994 | static const struct vm_operations_struct btrfs_file_vm_ops = { | 2023 | static const struct vm_operations_struct btrfs_file_vm_ops = { |
| 1995 | .fault = filemap_fault, | 2024 | .fault = filemap_fault, |
| 2025 | .map_pages = filemap_map_pages, | ||
| 1996 | .page_mkwrite = btrfs_page_mkwrite, | 2026 | .page_mkwrite = btrfs_page_mkwrite, |
| 1997 | .remap_pages = generic_file_remap_pages, | 2027 | .remap_pages = generic_file_remap_pages, |
| 1998 | }; | 2028 | }; |
| @@ -2157,6 +2187,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) | |||
| 2157 | bool same_page = ((offset >> PAGE_CACHE_SHIFT) == | 2187 | bool same_page = ((offset >> PAGE_CACHE_SHIFT) == |
| 2158 | ((offset + len - 1) >> PAGE_CACHE_SHIFT)); | 2188 | ((offset + len - 1) >> PAGE_CACHE_SHIFT)); |
| 2159 | bool no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES); | 2189 | bool no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES); |
| 2190 | u64 ino_size = round_up(inode->i_size, PAGE_CACHE_SIZE); | ||
| 2160 | 2191 | ||
| 2161 | ret = btrfs_wait_ordered_range(inode, offset, len); | 2192 | ret = btrfs_wait_ordered_range(inode, offset, len); |
| 2162 | if (ret) | 2193 | if (ret) |
| @@ -2172,14 +2203,14 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) | |||
| 2172 | * entire page. | 2203 | * entire page. |
| 2173 | */ | 2204 | */ |
| 2174 | if (same_page && len < PAGE_CACHE_SIZE) { | 2205 | if (same_page && len < PAGE_CACHE_SIZE) { |
| 2175 | if (offset < round_up(inode->i_size, PAGE_CACHE_SIZE)) | 2206 | if (offset < ino_size) |
| 2176 | ret = btrfs_truncate_page(inode, offset, len, 0); | 2207 | ret = btrfs_truncate_page(inode, offset, len, 0); |
| 2177 | mutex_unlock(&inode->i_mutex); | 2208 | mutex_unlock(&inode->i_mutex); |
| 2178 | return ret; | 2209 | return ret; |
| 2179 | } | 2210 | } |
| 2180 | 2211 | ||
| 2181 | /* zero back part of the first page */ | 2212 | /* zero back part of the first page */ |
| 2182 | if (offset < round_up(inode->i_size, PAGE_CACHE_SIZE)) { | 2213 | if (offset < ino_size) { |
| 2183 | ret = btrfs_truncate_page(inode, offset, 0, 0); | 2214 | ret = btrfs_truncate_page(inode, offset, 0, 0); |
| 2184 | if (ret) { | 2215 | if (ret) { |
| 2185 | mutex_unlock(&inode->i_mutex); | 2216 | mutex_unlock(&inode->i_mutex); |
| @@ -2188,7 +2219,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) | |||
| 2188 | } | 2219 | } |
| 2189 | 2220 | ||
| 2190 | /* zero the front end of the last page */ | 2221 | /* zero the front end of the last page */ |
| 2191 | if (offset + len < round_up(inode->i_size, PAGE_CACHE_SIZE)) { | 2222 | if (offset + len < ino_size) { |
| 2192 | ret = btrfs_truncate_page(inode, offset + len, 0, 1); | 2223 | ret = btrfs_truncate_page(inode, offset + len, 0, 1); |
| 2193 | if (ret) { | 2224 | if (ret) { |
| 2194 | mutex_unlock(&inode->i_mutex); | 2225 | mutex_unlock(&inode->i_mutex); |
| @@ -2277,10 +2308,13 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) | |||
| 2277 | 2308 | ||
| 2278 | trans->block_rsv = &root->fs_info->trans_block_rsv; | 2309 | trans->block_rsv = &root->fs_info->trans_block_rsv; |
| 2279 | 2310 | ||
| 2280 | ret = fill_holes(trans, inode, path, cur_offset, drop_end); | 2311 | if (cur_offset < ino_size) { |
| 2281 | if (ret) { | 2312 | ret = fill_holes(trans, inode, path, cur_offset, |
| 2282 | err = ret; | 2313 | drop_end); |
| 2283 | break; | 2314 | if (ret) { |
| 2315 | err = ret; | ||
| 2316 | break; | ||
| 2317 | } | ||
| 2284 | } | 2318 | } |
| 2285 | 2319 | ||
| 2286 | cur_offset = drop_end; | 2320 | cur_offset = drop_end; |
| @@ -2313,10 +2347,12 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) | |||
| 2313 | } | 2347 | } |
| 2314 | 2348 | ||
| 2315 | trans->block_rsv = &root->fs_info->trans_block_rsv; | 2349 | trans->block_rsv = &root->fs_info->trans_block_rsv; |
| 2316 | ret = fill_holes(trans, inode, path, cur_offset, drop_end); | 2350 | if (cur_offset < ino_size) { |
| 2317 | if (ret) { | 2351 | ret = fill_holes(trans, inode, path, cur_offset, drop_end); |
| 2318 | err = ret; | 2352 | if (ret) { |
| 2319 | goto out_trans; | 2353 | err = ret; |
| 2354 | goto out_trans; | ||
| 2355 | } | ||
| 2320 | } | 2356 | } |
| 2321 | 2357 | ||
| 2322 | out_trans: | 2358 | out_trans: |
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index ab485e57b6fe..cc8ca193d830 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c | |||
| @@ -55,7 +55,7 @@ static int caching_kthread(void *data) | |||
| 55 | key.type = BTRFS_INODE_ITEM_KEY; | 55 | key.type = BTRFS_INODE_ITEM_KEY; |
| 56 | again: | 56 | again: |
| 57 | /* need to make sure the commit_root doesn't disappear */ | 57 | /* need to make sure the commit_root doesn't disappear */ |
| 58 | mutex_lock(&root->fs_commit_mutex); | 58 | down_read(&fs_info->commit_root_sem); |
| 59 | 59 | ||
| 60 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | 60 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); |
| 61 | if (ret < 0) | 61 | if (ret < 0) |
| @@ -88,7 +88,7 @@ again: | |||
| 88 | btrfs_item_key_to_cpu(leaf, &key, 0); | 88 | btrfs_item_key_to_cpu(leaf, &key, 0); |
| 89 | btrfs_release_path(path); | 89 | btrfs_release_path(path); |
| 90 | root->cache_progress = last; | 90 | root->cache_progress = last; |
| 91 | mutex_unlock(&root->fs_commit_mutex); | 91 | up_read(&fs_info->commit_root_sem); |
| 92 | schedule_timeout(1); | 92 | schedule_timeout(1); |
| 93 | goto again; | 93 | goto again; |
| 94 | } else | 94 | } else |
| @@ -127,7 +127,7 @@ next: | |||
| 127 | btrfs_unpin_free_ino(root); | 127 | btrfs_unpin_free_ino(root); |
| 128 | out: | 128 | out: |
| 129 | wake_up(&root->cache_wait); | 129 | wake_up(&root->cache_wait); |
| 130 | mutex_unlock(&root->fs_commit_mutex); | 130 | up_read(&fs_info->commit_root_sem); |
| 131 | 131 | ||
| 132 | btrfs_free_path(path); | 132 | btrfs_free_path(path); |
| 133 | 133 | ||
| @@ -223,11 +223,11 @@ again: | |||
| 223 | * or the caching work is done. | 223 | * or the caching work is done. |
| 224 | */ | 224 | */ |
| 225 | 225 | ||
| 226 | mutex_lock(&root->fs_commit_mutex); | 226 | down_write(&root->fs_info->commit_root_sem); |
| 227 | spin_lock(&root->cache_lock); | 227 | spin_lock(&root->cache_lock); |
| 228 | if (root->cached == BTRFS_CACHE_FINISHED) { | 228 | if (root->cached == BTRFS_CACHE_FINISHED) { |
| 229 | spin_unlock(&root->cache_lock); | 229 | spin_unlock(&root->cache_lock); |
| 230 | mutex_unlock(&root->fs_commit_mutex); | 230 | up_write(&root->fs_info->commit_root_sem); |
| 231 | goto again; | 231 | goto again; |
| 232 | } | 232 | } |
| 233 | spin_unlock(&root->cache_lock); | 233 | spin_unlock(&root->cache_lock); |
| @@ -240,7 +240,7 @@ again: | |||
| 240 | else | 240 | else |
| 241 | __btrfs_add_free_space(pinned, objectid, 1); | 241 | __btrfs_add_free_space(pinned, objectid, 1); |
| 242 | 242 | ||
| 243 | mutex_unlock(&root->fs_commit_mutex); | 243 | up_write(&root->fs_info->commit_root_sem); |
| 244 | } | 244 | } |
| 245 | } | 245 | } |
| 246 | 246 | ||
| @@ -250,7 +250,7 @@ again: | |||
| 250 | * and others will just be dropped, because the commit root we were | 250 | * and others will just be dropped, because the commit root we were |
| 251 | * searching has changed. | 251 | * searching has changed. |
| 252 | * | 252 | * |
| 253 | * Must be called with root->fs_commit_mutex held | 253 | * Must be called with root->fs_info->commit_root_sem held |
| 254 | */ | 254 | */ |
| 255 | void btrfs_unpin_free_ino(struct btrfs_root *root) | 255 | void btrfs_unpin_free_ino(struct btrfs_root *root) |
| 256 | { | 256 | { |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index d3d44486290b..5f805bc944fa 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
| @@ -394,6 +394,14 @@ static noinline int compress_file_range(struct inode *inode, | |||
| 394 | (start > 0 || end + 1 < BTRFS_I(inode)->disk_i_size)) | 394 | (start > 0 || end + 1 < BTRFS_I(inode)->disk_i_size)) |
| 395 | btrfs_add_inode_defrag(NULL, inode); | 395 | btrfs_add_inode_defrag(NULL, inode); |
| 396 | 396 | ||
| 397 | /* | ||
| 398 | * skip compression for a small file range(<=blocksize) that | ||
| 399 | * isn't an inline extent, since it dosen't save disk space at all. | ||
| 400 | */ | ||
| 401 | if ((end - start + 1) <= blocksize && | ||
| 402 | (start > 0 || end + 1 < BTRFS_I(inode)->disk_i_size)) | ||
| 403 | goto cleanup_and_bail_uncompressed; | ||
| 404 | |||
| 397 | actual_end = min_t(u64, isize, end + 1); | 405 | actual_end = min_t(u64, isize, end + 1); |
| 398 | again: | 406 | again: |
| 399 | will_compress = 0; | 407 | will_compress = 0; |
| @@ -864,7 +872,8 @@ static noinline int cow_file_range(struct inode *inode, | |||
| 864 | 872 | ||
| 865 | if (btrfs_is_free_space_inode(inode)) { | 873 | if (btrfs_is_free_space_inode(inode)) { |
| 866 | WARN_ON_ONCE(1); | 874 | WARN_ON_ONCE(1); |
| 867 | return -EINVAL; | 875 | ret = -EINVAL; |
| 876 | goto out_unlock; | ||
| 868 | } | 877 | } |
| 869 | 878 | ||
| 870 | num_bytes = ALIGN(end - start + 1, blocksize); | 879 | num_bytes = ALIGN(end - start + 1, blocksize); |
| @@ -1075,17 +1084,15 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page, | |||
| 1075 | async_cow->end = cur_end; | 1084 | async_cow->end = cur_end; |
| 1076 | INIT_LIST_HEAD(&async_cow->extents); | 1085 | INIT_LIST_HEAD(&async_cow->extents); |
| 1077 | 1086 | ||
| 1078 | async_cow->work.func = async_cow_start; | 1087 | btrfs_init_work(&async_cow->work, async_cow_start, |
| 1079 | async_cow->work.ordered_func = async_cow_submit; | 1088 | async_cow_submit, async_cow_free); |
| 1080 | async_cow->work.ordered_free = async_cow_free; | ||
| 1081 | async_cow->work.flags = 0; | ||
| 1082 | 1089 | ||
| 1083 | nr_pages = (cur_end - start + PAGE_CACHE_SIZE) >> | 1090 | nr_pages = (cur_end - start + PAGE_CACHE_SIZE) >> |
| 1084 | PAGE_CACHE_SHIFT; | 1091 | PAGE_CACHE_SHIFT; |
| 1085 | atomic_add(nr_pages, &root->fs_info->async_delalloc_pages); | 1092 | atomic_add(nr_pages, &root->fs_info->async_delalloc_pages); |
| 1086 | 1093 | ||
| 1087 | btrfs_queue_worker(&root->fs_info->delalloc_workers, | 1094 | btrfs_queue_work(root->fs_info->delalloc_workers, |
| 1088 | &async_cow->work); | 1095 | &async_cow->work); |
| 1089 | 1096 | ||
| 1090 | if (atomic_read(&root->fs_info->async_delalloc_pages) > limit) { | 1097 | if (atomic_read(&root->fs_info->async_delalloc_pages) > limit) { |
| 1091 | wait_event(root->fs_info->async_submit_wait, | 1098 | wait_event(root->fs_info->async_submit_wait, |
| @@ -1272,6 +1279,15 @@ next_slot: | |||
| 1272 | disk_bytenr += cur_offset - found_key.offset; | 1279 | disk_bytenr += cur_offset - found_key.offset; |
| 1273 | num_bytes = min(end + 1, extent_end) - cur_offset; | 1280 | num_bytes = min(end + 1, extent_end) - cur_offset; |
| 1274 | /* | 1281 | /* |
| 1282 | * if there are pending snapshots for this root, | ||
| 1283 | * we fall into common COW way. | ||
| 1284 | */ | ||
| 1285 | if (!nolock) { | ||
| 1286 | err = btrfs_start_nocow_write(root); | ||
| 1287 | if (!err) | ||
| 1288 | goto out_check; | ||
| 1289 | } | ||
| 1290 | /* | ||
| 1275 | * force cow if csum exists in the range. | 1291 | * force cow if csum exists in the range. |
| 1276 | * this ensure that csum for a given extent are | 1292 | * this ensure that csum for a given extent are |
| 1277 | * either valid or do not exist. | 1293 | * either valid or do not exist. |
| @@ -1290,6 +1306,8 @@ next_slot: | |||
| 1290 | out_check: | 1306 | out_check: |
| 1291 | if (extent_end <= start) { | 1307 | if (extent_end <= start) { |
| 1292 | path->slots[0]++; | 1308 | path->slots[0]++; |
| 1309 | if (!nolock && nocow) | ||
| 1310 | btrfs_end_nocow_write(root); | ||
| 1293 | goto next_slot; | 1311 | goto next_slot; |
| 1294 | } | 1312 | } |
| 1295 | if (!nocow) { | 1313 | if (!nocow) { |
| @@ -1307,8 +1325,11 @@ out_check: | |||
| 1307 | ret = cow_file_range(inode, locked_page, | 1325 | ret = cow_file_range(inode, locked_page, |
| 1308 | cow_start, found_key.offset - 1, | 1326 | cow_start, found_key.offset - 1, |
| 1309 | page_started, nr_written, 1); | 1327 | page_started, nr_written, 1); |
| 1310 | if (ret) | 1328 | if (ret) { |
| 1329 | if (!nolock && nocow) | ||
| 1330 | btrfs_end_nocow_write(root); | ||
| 1311 | goto error; | 1331 | goto error; |
| 1332 | } | ||
| 1312 | cow_start = (u64)-1; | 1333 | cow_start = (u64)-1; |
| 1313 | } | 1334 | } |
| 1314 | 1335 | ||
| @@ -1355,8 +1376,11 @@ out_check: | |||
| 1355 | BTRFS_DATA_RELOC_TREE_OBJECTID) { | 1376 | BTRFS_DATA_RELOC_TREE_OBJECTID) { |
| 1356 | ret = btrfs_reloc_clone_csums(inode, cur_offset, | 1377 | ret = btrfs_reloc_clone_csums(inode, cur_offset, |
| 1357 | num_bytes); | 1378 | num_bytes); |
| 1358 | if (ret) | 1379 | if (ret) { |
| 1380 | if (!nolock && nocow) | ||
| 1381 | btrfs_end_nocow_write(root); | ||
| 1359 | goto error; | 1382 | goto error; |
| 1383 | } | ||
| 1360 | } | 1384 | } |
| 1361 | 1385 | ||
| 1362 | extent_clear_unlock_delalloc(inode, cur_offset, | 1386 | extent_clear_unlock_delalloc(inode, cur_offset, |
| @@ -1364,6 +1388,8 @@ out_check: | |||
| 1364 | locked_page, EXTENT_LOCKED | | 1388 | locked_page, EXTENT_LOCKED | |
| 1365 | EXTENT_DELALLOC, PAGE_UNLOCK | | 1389 | EXTENT_DELALLOC, PAGE_UNLOCK | |
| 1366 | PAGE_SET_PRIVATE2); | 1390 | PAGE_SET_PRIVATE2); |
| 1391 | if (!nolock && nocow) | ||
| 1392 | btrfs_end_nocow_write(root); | ||
| 1367 | cur_offset = extent_end; | 1393 | cur_offset = extent_end; |
| 1368 | if (cur_offset > end) | 1394 | if (cur_offset > end) |
| 1369 | break; | 1395 | break; |
| @@ -1843,9 +1869,9 @@ static int btrfs_writepage_start_hook(struct page *page, u64 start, u64 end) | |||
| 1843 | 1869 | ||
| 1844 | SetPageChecked(page); | 1870 | SetPageChecked(page); |
| 1845 | page_cache_get(page); | 1871 | page_cache_get(page); |
| 1846 | fixup->work.func = btrfs_writepage_fixup_worker; | 1872 | btrfs_init_work(&fixup->work, btrfs_writepage_fixup_worker, NULL, NULL); |
| 1847 | fixup->page = page; | 1873 | fixup->page = page; |
| 1848 | btrfs_queue_worker(&root->fs_info->fixup_workers, &fixup->work); | 1874 | btrfs_queue_work(root->fs_info->fixup_workers, &fixup->work); |
| 1849 | return -EBUSY; | 1875 | return -EBUSY; |
| 1850 | } | 1876 | } |
| 1851 | 1877 | ||
| @@ -2239,6 +2265,11 @@ static noinline int relink_extent_backref(struct btrfs_path *path, | |||
| 2239 | return PTR_ERR(root); | 2265 | return PTR_ERR(root); |
| 2240 | } | 2266 | } |
| 2241 | 2267 | ||
| 2268 | if (btrfs_root_readonly(root)) { | ||
| 2269 | srcu_read_unlock(&fs_info->subvol_srcu, index); | ||
| 2270 | return 0; | ||
| 2271 | } | ||
| 2272 | |||
| 2242 | /* step 2: get inode */ | 2273 | /* step 2: get inode */ |
| 2243 | key.objectid = backref->inum; | 2274 | key.objectid = backref->inum; |
| 2244 | key.type = BTRFS_INODE_ITEM_KEY; | 2275 | key.type = BTRFS_INODE_ITEM_KEY; |
| @@ -2759,7 +2790,7 @@ static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, | |||
| 2759 | struct inode *inode = page->mapping->host; | 2790 | struct inode *inode = page->mapping->host; |
| 2760 | struct btrfs_root *root = BTRFS_I(inode)->root; | 2791 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 2761 | struct btrfs_ordered_extent *ordered_extent = NULL; | 2792 | struct btrfs_ordered_extent *ordered_extent = NULL; |
| 2762 | struct btrfs_workers *workers; | 2793 | struct btrfs_workqueue *workers; |
| 2763 | 2794 | ||
| 2764 | trace_btrfs_writepage_end_io_hook(page, start, end, uptodate); | 2795 | trace_btrfs_writepage_end_io_hook(page, start, end, uptodate); |
| 2765 | 2796 | ||
| @@ -2768,14 +2799,13 @@ static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, | |||
| 2768 | end - start + 1, uptodate)) | 2799 | end - start + 1, uptodate)) |
| 2769 | return 0; | 2800 | return 0; |
| 2770 | 2801 | ||
| 2771 | ordered_extent->work.func = finish_ordered_fn; | 2802 | btrfs_init_work(&ordered_extent->work, finish_ordered_fn, NULL, NULL); |
| 2772 | ordered_extent->work.flags = 0; | ||
| 2773 | 2803 | ||
| 2774 | if (btrfs_is_free_space_inode(inode)) | 2804 | if (btrfs_is_free_space_inode(inode)) |
| 2775 | workers = &root->fs_info->endio_freespace_worker; | 2805 | workers = root->fs_info->endio_freespace_worker; |
| 2776 | else | 2806 | else |
| 2777 | workers = &root->fs_info->endio_write_workers; | 2807 | workers = root->fs_info->endio_write_workers; |
| 2778 | btrfs_queue_worker(workers, &ordered_extent->work); | 2808 | btrfs_queue_work(workers, &ordered_extent->work); |
| 2779 | 2809 | ||
| 2780 | return 0; | 2810 | return 0; |
| 2781 | } | 2811 | } |
| @@ -4593,7 +4623,7 @@ static void evict_inode_truncate_pages(struct inode *inode) | |||
| 4593 | struct rb_node *node; | 4623 | struct rb_node *node; |
| 4594 | 4624 | ||
| 4595 | ASSERT(inode->i_state & I_FREEING); | 4625 | ASSERT(inode->i_state & I_FREEING); |
| 4596 | truncate_inode_pages(&inode->i_data, 0); | 4626 | truncate_inode_pages_final(&inode->i_data); |
| 4597 | 4627 | ||
| 4598 | write_lock(&map_tree->lock); | 4628 | write_lock(&map_tree->lock); |
| 4599 | while (!RB_EMPTY_ROOT(&map_tree->map)) { | 4629 | while (!RB_EMPTY_ROOT(&map_tree->map)) { |
| @@ -4924,7 +4954,8 @@ void btrfs_invalidate_inodes(struct btrfs_root *root) | |||
| 4924 | struct inode *inode; | 4954 | struct inode *inode; |
| 4925 | u64 objectid = 0; | 4955 | u64 objectid = 0; |
| 4926 | 4956 | ||
| 4927 | WARN_ON(btrfs_root_refs(&root->root_item) != 0); | 4957 | if (!test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state)) |
| 4958 | WARN_ON(btrfs_root_refs(&root->root_item) != 0); | ||
| 4928 | 4959 | ||
| 4929 | spin_lock(&root->inode_lock); | 4960 | spin_lock(&root->inode_lock); |
| 4930 | again: | 4961 | again: |
| @@ -5799,6 +5830,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, | |||
| 5799 | } | 5830 | } |
| 5800 | out_unlock: | 5831 | out_unlock: |
| 5801 | btrfs_end_transaction(trans, root); | 5832 | btrfs_end_transaction(trans, root); |
| 5833 | btrfs_balance_delayed_items(root); | ||
| 5802 | btrfs_btree_balance_dirty(root); | 5834 | btrfs_btree_balance_dirty(root); |
| 5803 | if (drop_inode) { | 5835 | if (drop_inode) { |
| 5804 | inode_dec_link_count(inode); | 5836 | inode_dec_link_count(inode); |
| @@ -5872,6 +5904,7 @@ out_unlock: | |||
| 5872 | inode_dec_link_count(inode); | 5904 | inode_dec_link_count(inode); |
| 5873 | iput(inode); | 5905 | iput(inode); |
| 5874 | } | 5906 | } |
| 5907 | btrfs_balance_delayed_items(root); | ||
| 5875 | btrfs_btree_balance_dirty(root); | 5908 | btrfs_btree_balance_dirty(root); |
| 5876 | return err; | 5909 | return err; |
| 5877 | } | 5910 | } |
| @@ -5930,6 +5963,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, | |||
| 5930 | } | 5963 | } |
| 5931 | 5964 | ||
| 5932 | btrfs_end_transaction(trans, root); | 5965 | btrfs_end_transaction(trans, root); |
| 5966 | btrfs_balance_delayed_items(root); | ||
| 5933 | fail: | 5967 | fail: |
| 5934 | if (drop_inode) { | 5968 | if (drop_inode) { |
| 5935 | inode_dec_link_count(inode); | 5969 | inode_dec_link_count(inode); |
| @@ -5996,6 +6030,7 @@ out_fail: | |||
| 5996 | btrfs_end_transaction(trans, root); | 6030 | btrfs_end_transaction(trans, root); |
| 5997 | if (drop_on_err) | 6031 | if (drop_on_err) |
| 5998 | iput(inode); | 6032 | iput(inode); |
| 6033 | btrfs_balance_delayed_items(root); | ||
| 5999 | btrfs_btree_balance_dirty(root); | 6034 | btrfs_btree_balance_dirty(root); |
| 6000 | return err; | 6035 | return err; |
| 6001 | } | 6036 | } |
| @@ -6550,6 +6585,7 @@ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len, | |||
| 6550 | int ret; | 6585 | int ret; |
| 6551 | struct extent_buffer *leaf; | 6586 | struct extent_buffer *leaf; |
| 6552 | struct btrfs_root *root = BTRFS_I(inode)->root; | 6587 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 6588 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | ||
| 6553 | struct btrfs_file_extent_item *fi; | 6589 | struct btrfs_file_extent_item *fi; |
| 6554 | struct btrfs_key key; | 6590 | struct btrfs_key key; |
| 6555 | u64 disk_bytenr; | 6591 | u64 disk_bytenr; |
| @@ -6626,6 +6662,20 @@ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len, | |||
| 6626 | 6662 | ||
| 6627 | if (btrfs_extent_readonly(root, disk_bytenr)) | 6663 | if (btrfs_extent_readonly(root, disk_bytenr)) |
| 6628 | goto out; | 6664 | goto out; |
| 6665 | |||
| 6666 | num_bytes = min(offset + *len, extent_end) - offset; | ||
| 6667 | if (!nocow && found_type == BTRFS_FILE_EXTENT_PREALLOC) { | ||
| 6668 | u64 range_end; | ||
| 6669 | |||
| 6670 | range_end = round_up(offset + num_bytes, root->sectorsize) - 1; | ||
| 6671 | ret = test_range_bit(io_tree, offset, range_end, | ||
| 6672 | EXTENT_DELALLOC, 0, NULL); | ||
| 6673 | if (ret) { | ||
| 6674 | ret = -EAGAIN; | ||
| 6675 | goto out; | ||
| 6676 | } | ||
| 6677 | } | ||
| 6678 | |||
| 6629 | btrfs_release_path(path); | 6679 | btrfs_release_path(path); |
| 6630 | 6680 | ||
| 6631 | /* | 6681 | /* |
| @@ -6654,7 +6704,6 @@ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len, | |||
| 6654 | */ | 6704 | */ |
| 6655 | disk_bytenr += backref_offset; | 6705 | disk_bytenr += backref_offset; |
| 6656 | disk_bytenr += offset - key.offset; | 6706 | disk_bytenr += offset - key.offset; |
| 6657 | num_bytes = min(offset + *len, extent_end) - offset; | ||
| 6658 | if (csum_exist_in_range(root, disk_bytenr, num_bytes)) | 6707 | if (csum_exist_in_range(root, disk_bytenr, num_bytes)) |
| 6659 | goto out; | 6708 | goto out; |
| 6660 | /* | 6709 | /* |
| @@ -7024,10 +7073,9 @@ again: | |||
| 7024 | if (!ret) | 7073 | if (!ret) |
| 7025 | goto out_test; | 7074 | goto out_test; |
| 7026 | 7075 | ||
| 7027 | ordered->work.func = finish_ordered_fn; | 7076 | btrfs_init_work(&ordered->work, finish_ordered_fn, NULL, NULL); |
| 7028 | ordered->work.flags = 0; | 7077 | btrfs_queue_work(root->fs_info->endio_write_workers, |
| 7029 | btrfs_queue_worker(&root->fs_info->endio_write_workers, | 7078 | &ordered->work); |
| 7030 | &ordered->work); | ||
| 7031 | out_test: | 7079 | out_test: |
| 7032 | /* | 7080 | /* |
| 7033 | * our bio might span multiple ordered extents. If we haven't | 7081 | * our bio might span multiple ordered extents. If we haven't |
| @@ -7404,15 +7452,15 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, | |||
| 7404 | smp_mb__after_atomic_inc(); | 7452 | smp_mb__after_atomic_inc(); |
| 7405 | 7453 | ||
| 7406 | /* | 7454 | /* |
| 7407 | * The generic stuff only does filemap_write_and_wait_range, which isn't | 7455 | * The generic stuff only does filemap_write_and_wait_range, which |
| 7408 | * enough if we've written compressed pages to this area, so we need to | 7456 | * isn't enough if we've written compressed pages to this area, so |
| 7409 | * call btrfs_wait_ordered_range to make absolutely sure that any | 7457 | * we need to flush the dirty pages again to make absolutely sure |
| 7410 | * outstanding dirty pages are on disk. | 7458 | * that any outstanding dirty pages are on disk. |
| 7411 | */ | 7459 | */ |
| 7412 | count = iov_length(iov, nr_segs); | 7460 | count = iov_length(iov, nr_segs); |
| 7413 | ret = btrfs_wait_ordered_range(inode, offset, count); | 7461 | if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, |
| 7414 | if (ret) | 7462 | &BTRFS_I(inode)->runtime_flags)) |
| 7415 | return ret; | 7463 | filemap_fdatawrite_range(inode->i_mapping, offset, count); |
| 7416 | 7464 | ||
| 7417 | if (rw & WRITE) { | 7465 | if (rw & WRITE) { |
| 7418 | /* | 7466 | /* |
| @@ -8404,7 +8452,7 @@ struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode, | |||
| 8404 | work->inode = inode; | 8452 | work->inode = inode; |
| 8405 | work->wait = wait; | 8453 | work->wait = wait; |
| 8406 | work->delay_iput = delay_iput; | 8454 | work->delay_iput = delay_iput; |
| 8407 | work->work.func = btrfs_run_delalloc_work; | 8455 | btrfs_init_work(&work->work, btrfs_run_delalloc_work, NULL, NULL); |
| 8408 | 8456 | ||
| 8409 | return work; | 8457 | return work; |
| 8410 | } | 8458 | } |
| @@ -8419,7 +8467,8 @@ void btrfs_wait_and_free_delalloc_work(struct btrfs_delalloc_work *work) | |||
| 8419 | * some fairly slow code that needs optimization. This walks the list | 8467 | * some fairly slow code that needs optimization. This walks the list |
| 8420 | * of all the inodes with pending delalloc and forces them to disk. | 8468 | * of all the inodes with pending delalloc and forces them to disk. |
| 8421 | */ | 8469 | */ |
| 8422 | static int __start_delalloc_inodes(struct btrfs_root *root, int delay_iput) | 8470 | static int __start_delalloc_inodes(struct btrfs_root *root, int delay_iput, |
| 8471 | int nr) | ||
| 8423 | { | 8472 | { |
| 8424 | struct btrfs_inode *binode; | 8473 | struct btrfs_inode *binode; |
| 8425 | struct inode *inode; | 8474 | struct inode *inode; |
| @@ -8431,6 +8480,7 @@ static int __start_delalloc_inodes(struct btrfs_root *root, int delay_iput) | |||
| 8431 | INIT_LIST_HEAD(&works); | 8480 | INIT_LIST_HEAD(&works); |
| 8432 | INIT_LIST_HEAD(&splice); | 8481 | INIT_LIST_HEAD(&splice); |
| 8433 | 8482 | ||
| 8483 | mutex_lock(&root->delalloc_mutex); | ||
| 8434 | spin_lock(&root->delalloc_lock); | 8484 | spin_lock(&root->delalloc_lock); |
| 8435 | list_splice_init(&root->delalloc_inodes, &splice); | 8485 | list_splice_init(&root->delalloc_inodes, &splice); |
| 8436 | while (!list_empty(&splice)) { | 8486 | while (!list_empty(&splice)) { |
| @@ -8456,19 +8506,16 @@ static int __start_delalloc_inodes(struct btrfs_root *root, int delay_iput) | |||
| 8456 | goto out; | 8506 | goto out; |
| 8457 | } | 8507 | } |
| 8458 | list_add_tail(&work->list, &works); | 8508 | list_add_tail(&work->list, &works); |
| 8459 | btrfs_queue_worker(&root->fs_info->flush_workers, | 8509 | btrfs_queue_work(root->fs_info->flush_workers, |
| 8460 | &work->work); | 8510 | &work->work); |
| 8461 | 8511 | ret++; | |
| 8512 | if (nr != -1 && ret >= nr) | ||
| 8513 | goto out; | ||
| 8462 | cond_resched(); | 8514 | cond_resched(); |
| 8463 | spin_lock(&root->delalloc_lock); | 8515 | spin_lock(&root->delalloc_lock); |
| 8464 | } | 8516 | } |
| 8465 | spin_unlock(&root->delalloc_lock); | 8517 | spin_unlock(&root->delalloc_lock); |
| 8466 | 8518 | ||
| 8467 | list_for_each_entry_safe(work, next, &works, list) { | ||
| 8468 | list_del_init(&work->list); | ||
| 8469 | btrfs_wait_and_free_delalloc_work(work); | ||
| 8470 | } | ||
| 8471 | return 0; | ||
| 8472 | out: | 8519 | out: |
| 8473 | list_for_each_entry_safe(work, next, &works, list) { | 8520 | list_for_each_entry_safe(work, next, &works, list) { |
| 8474 | list_del_init(&work->list); | 8521 | list_del_init(&work->list); |
| @@ -8480,6 +8527,7 @@ out: | |||
| 8480 | list_splice_tail(&splice, &root->delalloc_inodes); | 8527 | list_splice_tail(&splice, &root->delalloc_inodes); |
| 8481 | spin_unlock(&root->delalloc_lock); | 8528 | spin_unlock(&root->delalloc_lock); |
| 8482 | } | 8529 | } |
| 8530 | mutex_unlock(&root->delalloc_mutex); | ||
| 8483 | return ret; | 8531 | return ret; |
| 8484 | } | 8532 | } |
| 8485 | 8533 | ||
| @@ -8490,7 +8538,9 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput) | |||
| 8490 | if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state)) | 8538 | if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state)) |
| 8491 | return -EROFS; | 8539 | return -EROFS; |
| 8492 | 8540 | ||
| 8493 | ret = __start_delalloc_inodes(root, delay_iput); | 8541 | ret = __start_delalloc_inodes(root, delay_iput, -1); |
| 8542 | if (ret > 0) | ||
| 8543 | ret = 0; | ||
| 8494 | /* | 8544 | /* |
| 8495 | * the filemap_flush will queue IO into the worker threads, but | 8545 | * the filemap_flush will queue IO into the worker threads, but |
| 8496 | * we have to make sure the IO is actually started and that | 8546 | * we have to make sure the IO is actually started and that |
| @@ -8507,7 +8557,8 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput) | |||
| 8507 | return ret; | 8557 | return ret; |
| 8508 | } | 8558 | } |
| 8509 | 8559 | ||
| 8510 | int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int delay_iput) | 8560 | int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int delay_iput, |
| 8561 | int nr) | ||
| 8511 | { | 8562 | { |
| 8512 | struct btrfs_root *root; | 8563 | struct btrfs_root *root; |
| 8513 | struct list_head splice; | 8564 | struct list_head splice; |
| @@ -8518,9 +8569,10 @@ int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int delay_iput) | |||
| 8518 | 8569 | ||
| 8519 | INIT_LIST_HEAD(&splice); | 8570 | INIT_LIST_HEAD(&splice); |
| 8520 | 8571 | ||
| 8572 | mutex_lock(&fs_info->delalloc_root_mutex); | ||
| 8521 | spin_lock(&fs_info->delalloc_root_lock); | 8573 | spin_lock(&fs_info->delalloc_root_lock); |
| 8522 | list_splice_init(&fs_info->delalloc_roots, &splice); | 8574 | list_splice_init(&fs_info->delalloc_roots, &splice); |
| 8523 | while (!list_empty(&splice)) { | 8575 | while (!list_empty(&splice) && nr) { |
| 8524 | root = list_first_entry(&splice, struct btrfs_root, | 8576 | root = list_first_entry(&splice, struct btrfs_root, |
| 8525 | delalloc_root); | 8577 | delalloc_root); |
| 8526 | root = btrfs_grab_fs_root(root); | 8578 | root = btrfs_grab_fs_root(root); |
| @@ -8529,15 +8581,20 @@ int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int delay_iput) | |||
| 8529 | &fs_info->delalloc_roots); | 8581 | &fs_info->delalloc_roots); |
| 8530 | spin_unlock(&fs_info->delalloc_root_lock); | 8582 | spin_unlock(&fs_info->delalloc_root_lock); |
| 8531 | 8583 | ||
| 8532 | ret = __start_delalloc_inodes(root, delay_iput); | 8584 | ret = __start_delalloc_inodes(root, delay_iput, nr); |
| 8533 | btrfs_put_fs_root(root); | 8585 | btrfs_put_fs_root(root); |
| 8534 | if (ret) | 8586 | if (ret < 0) |
| 8535 | goto out; | 8587 | goto out; |
| 8536 | 8588 | ||
| 8589 | if (nr != -1) { | ||
| 8590 | nr -= ret; | ||
| 8591 | WARN_ON(nr < 0); | ||
| 8592 | } | ||
| 8537 | spin_lock(&fs_info->delalloc_root_lock); | 8593 | spin_lock(&fs_info->delalloc_root_lock); |
| 8538 | } | 8594 | } |
| 8539 | spin_unlock(&fs_info->delalloc_root_lock); | 8595 | spin_unlock(&fs_info->delalloc_root_lock); |
| 8540 | 8596 | ||
| 8597 | ret = 0; | ||
| 8541 | atomic_inc(&fs_info->async_submit_draining); | 8598 | atomic_inc(&fs_info->async_submit_draining); |
| 8542 | while (atomic_read(&fs_info->nr_async_submits) || | 8599 | while (atomic_read(&fs_info->nr_async_submits) || |
| 8543 | atomic_read(&fs_info->async_delalloc_pages)) { | 8600 | atomic_read(&fs_info->async_delalloc_pages)) { |
| @@ -8546,13 +8603,13 @@ int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int delay_iput) | |||
| 8546 | atomic_read(&fs_info->async_delalloc_pages) == 0)); | 8603 | atomic_read(&fs_info->async_delalloc_pages) == 0)); |
| 8547 | } | 8604 | } |
| 8548 | atomic_dec(&fs_info->async_submit_draining); | 8605 | atomic_dec(&fs_info->async_submit_draining); |
| 8549 | return 0; | ||
| 8550 | out: | 8606 | out: |
| 8551 | if (!list_empty_careful(&splice)) { | 8607 | if (!list_empty_careful(&splice)) { |
| 8552 | spin_lock(&fs_info->delalloc_root_lock); | 8608 | spin_lock(&fs_info->delalloc_root_lock); |
| 8553 | list_splice_tail(&splice, &fs_info->delalloc_roots); | 8609 | list_splice_tail(&splice, &fs_info->delalloc_roots); |
| 8554 | spin_unlock(&fs_info->delalloc_root_lock); | 8610 | spin_unlock(&fs_info->delalloc_root_lock); |
| 8555 | } | 8611 | } |
| 8612 | mutex_unlock(&fs_info->delalloc_root_mutex); | ||
| 8556 | return ret; | 8613 | return ret; |
| 8557 | } | 8614 | } |
| 8558 | 8615 | ||
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index a6d8efa46bfe..e79ff6b90cb7 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
| @@ -59,6 +59,32 @@ | |||
| 59 | #include "props.h" | 59 | #include "props.h" |
| 60 | #include "sysfs.h" | 60 | #include "sysfs.h" |
| 61 | 61 | ||
| 62 | #ifdef CONFIG_64BIT | ||
| 63 | /* If we have a 32-bit userspace and 64-bit kernel, then the UAPI | ||
| 64 | * structures are incorrect, as the timespec structure from userspace | ||
| 65 | * is 4 bytes too small. We define these alternatives here to teach | ||
| 66 | * the kernel about the 32-bit struct packing. | ||
| 67 | */ | ||
| 68 | struct btrfs_ioctl_timespec_32 { | ||
| 69 | __u64 sec; | ||
| 70 | __u32 nsec; | ||
| 71 | } __attribute__ ((__packed__)); | ||
| 72 | |||
| 73 | struct btrfs_ioctl_received_subvol_args_32 { | ||
| 74 | char uuid[BTRFS_UUID_SIZE]; /* in */ | ||
| 75 | __u64 stransid; /* in */ | ||
| 76 | __u64 rtransid; /* out */ | ||
| 77 | struct btrfs_ioctl_timespec_32 stime; /* in */ | ||
| 78 | struct btrfs_ioctl_timespec_32 rtime; /* out */ | ||
| 79 | __u64 flags; /* in */ | ||
| 80 | __u64 reserved[16]; /* in */ | ||
| 81 | } __attribute__ ((__packed__)); | ||
| 82 | |||
| 83 | #define BTRFS_IOC_SET_RECEIVED_SUBVOL_32 _IOWR(BTRFS_IOCTL_MAGIC, 37, \ | ||
| 84 | struct btrfs_ioctl_received_subvol_args_32) | ||
| 85 | #endif | ||
| 86 | |||
| 87 | |||
| 62 | static int btrfs_clone(struct inode *src, struct inode *inode, | 88 | static int btrfs_clone(struct inode *src, struct inode *inode, |
| 63 | u64 off, u64 olen, u64 olen_aligned, u64 destoff); | 89 | u64 off, u64 olen, u64 olen_aligned, u64 destoff); |
| 64 | 90 | ||
| @@ -585,6 +611,23 @@ fail: | |||
| 585 | return ret; | 611 | return ret; |
| 586 | } | 612 | } |
| 587 | 613 | ||
| 614 | static void btrfs_wait_nocow_write(struct btrfs_root *root) | ||
| 615 | { | ||
| 616 | s64 writers; | ||
| 617 | DEFINE_WAIT(wait); | ||
| 618 | |||
| 619 | do { | ||
| 620 | prepare_to_wait(&root->subv_writers->wait, &wait, | ||
| 621 | TASK_UNINTERRUPTIBLE); | ||
| 622 | |||
| 623 | writers = percpu_counter_sum(&root->subv_writers->counter); | ||
| 624 | if (writers) | ||
| 625 | schedule(); | ||
| 626 | |||
| 627 | finish_wait(&root->subv_writers->wait, &wait); | ||
| 628 | } while (writers); | ||
| 629 | } | ||
| 630 | |||
| 588 | static int create_snapshot(struct btrfs_root *root, struct inode *dir, | 631 | static int create_snapshot(struct btrfs_root *root, struct inode *dir, |
| 589 | struct dentry *dentry, char *name, int namelen, | 632 | struct dentry *dentry, char *name, int namelen, |
| 590 | u64 *async_transid, bool readonly, | 633 | u64 *async_transid, bool readonly, |
| @@ -598,15 +641,21 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir, | |||
| 598 | if (!root->ref_cows) | 641 | if (!root->ref_cows) |
| 599 | return -EINVAL; | 642 | return -EINVAL; |
| 600 | 643 | ||
| 644 | atomic_inc(&root->will_be_snapshoted); | ||
| 645 | smp_mb__after_atomic_inc(); | ||
| 646 | btrfs_wait_nocow_write(root); | ||
| 647 | |||
| 601 | ret = btrfs_start_delalloc_inodes(root, 0); | 648 | ret = btrfs_start_delalloc_inodes(root, 0); |
| 602 | if (ret) | 649 | if (ret) |
| 603 | return ret; | 650 | goto out; |
| 604 | 651 | ||
| 605 | btrfs_wait_ordered_extents(root, -1); | 652 | btrfs_wait_ordered_extents(root, -1); |
| 606 | 653 | ||
| 607 | pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS); | 654 | pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS); |
| 608 | if (!pending_snapshot) | 655 | if (!pending_snapshot) { |
| 609 | return -ENOMEM; | 656 | ret = -ENOMEM; |
| 657 | goto out; | ||
| 658 | } | ||
| 610 | 659 | ||
| 611 | btrfs_init_block_rsv(&pending_snapshot->block_rsv, | 660 | btrfs_init_block_rsv(&pending_snapshot->block_rsv, |
| 612 | BTRFS_BLOCK_RSV_TEMP); | 661 | BTRFS_BLOCK_RSV_TEMP); |
| @@ -623,7 +672,7 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir, | |||
| 623 | &pending_snapshot->qgroup_reserved, | 672 | &pending_snapshot->qgroup_reserved, |
| 624 | false); | 673 | false); |
| 625 | if (ret) | 674 | if (ret) |
| 626 | goto out; | 675 | goto free; |
| 627 | 676 | ||
| 628 | pending_snapshot->dentry = dentry; | 677 | pending_snapshot->dentry = dentry; |
| 629 | pending_snapshot->root = root; | 678 | pending_snapshot->root = root; |
| @@ -674,8 +723,10 @@ fail: | |||
| 674 | btrfs_subvolume_release_metadata(BTRFS_I(dir)->root, | 723 | btrfs_subvolume_release_metadata(BTRFS_I(dir)->root, |
| 675 | &pending_snapshot->block_rsv, | 724 | &pending_snapshot->block_rsv, |
| 676 | pending_snapshot->qgroup_reserved); | 725 | pending_snapshot->qgroup_reserved); |
| 677 | out: | 726 | free: |
| 678 | kfree(pending_snapshot); | 727 | kfree(pending_snapshot); |
| 728 | out: | ||
| 729 | atomic_dec(&root->will_be_snapshoted); | ||
| 679 | return ret; | 730 | return ret; |
| 680 | } | 731 | } |
| 681 | 732 | ||
| @@ -884,12 +935,14 @@ static int find_new_extents(struct btrfs_root *root, | |||
| 884 | min_key.type = BTRFS_EXTENT_DATA_KEY; | 935 | min_key.type = BTRFS_EXTENT_DATA_KEY; |
| 885 | min_key.offset = *off; | 936 | min_key.offset = *off; |
| 886 | 937 | ||
| 887 | path->keep_locks = 1; | ||
| 888 | |||
| 889 | while (1) { | 938 | while (1) { |
| 939 | path->keep_locks = 1; | ||
| 890 | ret = btrfs_search_forward(root, &min_key, path, newer_than); | 940 | ret = btrfs_search_forward(root, &min_key, path, newer_than); |
| 891 | if (ret != 0) | 941 | if (ret != 0) |
| 892 | goto none; | 942 | goto none; |
| 943 | path->keep_locks = 0; | ||
| 944 | btrfs_unlock_up_safe(path, 1); | ||
| 945 | process_slot: | ||
| 893 | if (min_key.objectid != ino) | 946 | if (min_key.objectid != ino) |
| 894 | goto none; | 947 | goto none; |
| 895 | if (min_key.type != BTRFS_EXTENT_DATA_KEY) | 948 | if (min_key.type != BTRFS_EXTENT_DATA_KEY) |
| @@ -908,6 +961,12 @@ static int find_new_extents(struct btrfs_root *root, | |||
| 908 | return 0; | 961 | return 0; |
| 909 | } | 962 | } |
| 910 | 963 | ||
| 964 | path->slots[0]++; | ||
| 965 | if (path->slots[0] < btrfs_header_nritems(leaf)) { | ||
| 966 | btrfs_item_key_to_cpu(leaf, &min_key, path->slots[0]); | ||
| 967 | goto process_slot; | ||
| 968 | } | ||
| 969 | |||
| 911 | if (min_key.offset == (u64)-1) | 970 | if (min_key.offset == (u64)-1) |
| 912 | goto none; | 971 | goto none; |
| 913 | 972 | ||
| @@ -935,10 +994,13 @@ static struct extent_map *defrag_lookup_extent(struct inode *inode, u64 start) | |||
| 935 | read_unlock(&em_tree->lock); | 994 | read_unlock(&em_tree->lock); |
| 936 | 995 | ||
| 937 | if (!em) { | 996 | if (!em) { |
| 997 | struct extent_state *cached = NULL; | ||
| 998 | u64 end = start + len - 1; | ||
| 999 | |||
| 938 | /* get the big lock and read metadata off disk */ | 1000 | /* get the big lock and read metadata off disk */ |
| 939 | lock_extent(io_tree, start, start + len - 1); | 1001 | lock_extent_bits(io_tree, start, end, 0, &cached); |
| 940 | em = btrfs_get_extent(inode, NULL, 0, start, len, 0); | 1002 | em = btrfs_get_extent(inode, NULL, 0, start, len, 0); |
| 941 | unlock_extent(io_tree, start, start + len - 1); | 1003 | unlock_extent_cached(io_tree, start, end, &cached, GFP_NOFS); |
| 942 | 1004 | ||
| 943 | if (IS_ERR(em)) | 1005 | if (IS_ERR(em)) |
| 944 | return NULL; | 1006 | return NULL; |
| @@ -957,7 +1019,8 @@ static bool defrag_check_next_extent(struct inode *inode, struct extent_map *em) | |||
| 957 | return false; | 1019 | return false; |
| 958 | 1020 | ||
| 959 | next = defrag_lookup_extent(inode, em->start + em->len); | 1021 | next = defrag_lookup_extent(inode, em->start + em->len); |
| 960 | if (!next || next->block_start >= EXTENT_MAP_LAST_BYTE) | 1022 | if (!next || next->block_start >= EXTENT_MAP_LAST_BYTE || |
| 1023 | (em->block_start + em->block_len == next->block_start)) | ||
| 961 | ret = false; | 1024 | ret = false; |
| 962 | 1025 | ||
| 963 | free_extent_map(next); | 1026 | free_extent_map(next); |
| @@ -1076,10 +1139,12 @@ again: | |||
| 1076 | page_start = page_offset(page); | 1139 | page_start = page_offset(page); |
| 1077 | page_end = page_start + PAGE_CACHE_SIZE - 1; | 1140 | page_end = page_start + PAGE_CACHE_SIZE - 1; |
| 1078 | while (1) { | 1141 | while (1) { |
| 1079 | lock_extent(tree, page_start, page_end); | 1142 | lock_extent_bits(tree, page_start, page_end, |
| 1143 | 0, &cached_state); | ||
| 1080 | ordered = btrfs_lookup_ordered_extent(inode, | 1144 | ordered = btrfs_lookup_ordered_extent(inode, |
| 1081 | page_start); | 1145 | page_start); |
| 1082 | unlock_extent(tree, page_start, page_end); | 1146 | unlock_extent_cached(tree, page_start, page_end, |
| 1147 | &cached_state, GFP_NOFS); | ||
| 1083 | if (!ordered) | 1148 | if (!ordered) |
| 1084 | break; | 1149 | break; |
| 1085 | 1150 | ||
| @@ -1356,8 +1421,12 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, | |||
| 1356 | } | 1421 | } |
| 1357 | } | 1422 | } |
| 1358 | 1423 | ||
| 1359 | if ((range->flags & BTRFS_DEFRAG_RANGE_START_IO)) | 1424 | if ((range->flags & BTRFS_DEFRAG_RANGE_START_IO)) { |
| 1360 | filemap_flush(inode->i_mapping); | 1425 | filemap_flush(inode->i_mapping); |
| 1426 | if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, | ||
| 1427 | &BTRFS_I(inode)->runtime_flags)) | ||
| 1428 | filemap_flush(inode->i_mapping); | ||
| 1429 | } | ||
| 1361 | 1430 | ||
| 1362 | if ((range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)) { | 1431 | if ((range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)) { |
| 1363 | /* the filemap_flush will queue IO into the worker threads, but | 1432 | /* the filemap_flush will queue IO into the worker threads, but |
| @@ -1403,6 +1472,7 @@ static noinline int btrfs_ioctl_resize(struct file *file, | |||
| 1403 | struct btrfs_trans_handle *trans; | 1472 | struct btrfs_trans_handle *trans; |
| 1404 | struct btrfs_device *device = NULL; | 1473 | struct btrfs_device *device = NULL; |
| 1405 | char *sizestr; | 1474 | char *sizestr; |
| 1475 | char *retptr; | ||
| 1406 | char *devstr = NULL; | 1476 | char *devstr = NULL; |
| 1407 | int ret = 0; | 1477 | int ret = 0; |
| 1408 | int mod = 0; | 1478 | int mod = 0; |
| @@ -1470,8 +1540,8 @@ static noinline int btrfs_ioctl_resize(struct file *file, | |||
| 1470 | mod = 1; | 1540 | mod = 1; |
| 1471 | sizestr++; | 1541 | sizestr++; |
| 1472 | } | 1542 | } |
| 1473 | new_size = memparse(sizestr, NULL); | 1543 | new_size = memparse(sizestr, &retptr); |
| 1474 | if (new_size == 0) { | 1544 | if (*retptr != '\0' || new_size == 0) { |
| 1475 | ret = -EINVAL; | 1545 | ret = -EINVAL; |
| 1476 | goto out_free; | 1546 | goto out_free; |
| 1477 | } | 1547 | } |
| @@ -1573,7 +1643,7 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file, | |||
| 1573 | if (src_inode->i_sb != file_inode(file)->i_sb) { | 1643 | if (src_inode->i_sb != file_inode(file)->i_sb) { |
| 1574 | btrfs_info(BTRFS_I(src_inode)->root->fs_info, | 1644 | btrfs_info(BTRFS_I(src_inode)->root->fs_info, |
| 1575 | "Snapshot src from another FS"); | 1645 | "Snapshot src from another FS"); |
| 1576 | ret = -EINVAL; | 1646 | ret = -EXDEV; |
| 1577 | } else if (!inode_owner_or_capable(src_inode)) { | 1647 | } else if (!inode_owner_or_capable(src_inode)) { |
| 1578 | /* | 1648 | /* |
| 1579 | * Subvolume creation is not restricted, but snapshots | 1649 | * Subvolume creation is not restricted, but snapshots |
| @@ -1797,7 +1867,9 @@ static noinline int may_destroy_subvol(struct btrfs_root *root) | |||
| 1797 | if (di && !IS_ERR(di)) { | 1867 | if (di && !IS_ERR(di)) { |
| 1798 | btrfs_dir_item_key_to_cpu(path->nodes[0], di, &key); | 1868 | btrfs_dir_item_key_to_cpu(path->nodes[0], di, &key); |
| 1799 | if (key.objectid == root->root_key.objectid) { | 1869 | if (key.objectid == root->root_key.objectid) { |
| 1800 | ret = -ENOTEMPTY; | 1870 | ret = -EPERM; |
| 1871 | btrfs_err(root->fs_info, "deleting default subvolume " | ||
| 1872 | "%llu is not allowed", key.objectid); | ||
| 1801 | goto out; | 1873 | goto out; |
| 1802 | } | 1874 | } |
| 1803 | btrfs_release_path(path); | 1875 | btrfs_release_path(path); |
| @@ -2994,8 +3066,9 @@ process_slot: | |||
| 2994 | new_key.offset + datal, | 3066 | new_key.offset + datal, |
| 2995 | 1); | 3067 | 1); |
| 2996 | if (ret) { | 3068 | if (ret) { |
| 2997 | btrfs_abort_transaction(trans, root, | 3069 | if (ret != -EINVAL) |
| 2998 | ret); | 3070 | btrfs_abort_transaction(trans, |
| 3071 | root, ret); | ||
| 2999 | btrfs_end_transaction(trans, root); | 3072 | btrfs_end_transaction(trans, root); |
| 3000 | goto out; | 3073 | goto out; |
| 3001 | } | 3074 | } |
| @@ -3068,8 +3141,9 @@ process_slot: | |||
| 3068 | new_key.offset + datal, | 3141 | new_key.offset + datal, |
| 3069 | 1); | 3142 | 1); |
| 3070 | if (ret) { | 3143 | if (ret) { |
| 3071 | btrfs_abort_transaction(trans, root, | 3144 | if (ret != -EINVAL) |
| 3072 | ret); | 3145 | btrfs_abort_transaction(trans, |
| 3146 | root, ret); | ||
| 3073 | btrfs_end_transaction(trans, root); | 3147 | btrfs_end_transaction(trans, root); |
| 3074 | goto out; | 3148 | goto out; |
| 3075 | } | 3149 | } |
| @@ -3153,8 +3227,9 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
| 3153 | * decompress into destination's address_space (the file offset | 3227 | * decompress into destination's address_space (the file offset |
| 3154 | * may change, so source mapping won't do), then recompress (or | 3228 | * may change, so source mapping won't do), then recompress (or |
| 3155 | * otherwise reinsert) a subrange. | 3229 | * otherwise reinsert) a subrange. |
| 3156 | * - allow ranges within the same file to be cloned (provided | 3230 | * |
| 3157 | * they don't overlap)? | 3231 | * - split destination inode's inline extents. The inline extents can |
| 3232 | * be either compressed or non-compressed. | ||
| 3158 | */ | 3233 | */ |
| 3159 | 3234 | ||
| 3160 | /* the destination must be opened for writing */ | 3235 | /* the destination must be opened for writing */ |
| @@ -3465,6 +3540,11 @@ static long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg) | |||
| 3465 | up_read(&info->groups_sem); | 3540 | up_read(&info->groups_sem); |
| 3466 | } | 3541 | } |
| 3467 | 3542 | ||
| 3543 | /* | ||
| 3544 | * Global block reserve, exported as a space_info | ||
| 3545 | */ | ||
| 3546 | slot_count++; | ||
| 3547 | |||
| 3468 | /* space_slots == 0 means they are asking for a count */ | 3548 | /* space_slots == 0 means they are asking for a count */ |
| 3469 | if (space_args.space_slots == 0) { | 3549 | if (space_args.space_slots == 0) { |
| 3470 | space_args.total_spaces = slot_count; | 3550 | space_args.total_spaces = slot_count; |
| @@ -3523,6 +3603,21 @@ static long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg) | |||
| 3523 | up_read(&info->groups_sem); | 3603 | up_read(&info->groups_sem); |
| 3524 | } | 3604 | } |
| 3525 | 3605 | ||
| 3606 | /* | ||
| 3607 | * Add global block reserve | ||
| 3608 | */ | ||
| 3609 | if (slot_count) { | ||
| 3610 | struct btrfs_block_rsv *block_rsv = &root->fs_info->global_block_rsv; | ||
| 3611 | |||
| 3612 | spin_lock(&block_rsv->lock); | ||
| 3613 | space.total_bytes = block_rsv->size; | ||
| 3614 | space.used_bytes = block_rsv->size - block_rsv->reserved; | ||
| 3615 | spin_unlock(&block_rsv->lock); | ||
| 3616 | space.flags = BTRFS_SPACE_INFO_GLOBAL_RSV; | ||
| 3617 | memcpy(dest, &space, sizeof(space)); | ||
| 3618 | space_args.total_spaces++; | ||
| 3619 | } | ||
| 3620 | |||
| 3526 | user_dest = (struct btrfs_ioctl_space_info __user *) | 3621 | user_dest = (struct btrfs_ioctl_space_info __user *) |
| 3527 | (arg + sizeof(struct btrfs_ioctl_space_args)); | 3622 | (arg + sizeof(struct btrfs_ioctl_space_args)); |
| 3528 | 3623 | ||
| @@ -4353,10 +4448,9 @@ static long btrfs_ioctl_quota_rescan_wait(struct file *file, void __user *arg) | |||
| 4353 | return btrfs_qgroup_wait_for_completion(root->fs_info); | 4448 | return btrfs_qgroup_wait_for_completion(root->fs_info); |
| 4354 | } | 4449 | } |
| 4355 | 4450 | ||
| 4356 | static long btrfs_ioctl_set_received_subvol(struct file *file, | 4451 | static long _btrfs_ioctl_set_received_subvol(struct file *file, |
| 4357 | void __user *arg) | 4452 | struct btrfs_ioctl_received_subvol_args *sa) |
| 4358 | { | 4453 | { |
| 4359 | struct btrfs_ioctl_received_subvol_args *sa = NULL; | ||
| 4360 | struct inode *inode = file_inode(file); | 4454 | struct inode *inode = file_inode(file); |
| 4361 | struct btrfs_root *root = BTRFS_I(inode)->root; | 4455 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 4362 | struct btrfs_root_item *root_item = &root->root_item; | 4456 | struct btrfs_root_item *root_item = &root->root_item; |
| @@ -4384,13 +4478,6 @@ static long btrfs_ioctl_set_received_subvol(struct file *file, | |||
| 4384 | goto out; | 4478 | goto out; |
| 4385 | } | 4479 | } |
| 4386 | 4480 | ||
| 4387 | sa = memdup_user(arg, sizeof(*sa)); | ||
| 4388 | if (IS_ERR(sa)) { | ||
| 4389 | ret = PTR_ERR(sa); | ||
| 4390 | sa = NULL; | ||
| 4391 | goto out; | ||
| 4392 | } | ||
| 4393 | |||
| 4394 | /* | 4481 | /* |
| 4395 | * 1 - root item | 4482 | * 1 - root item |
| 4396 | * 2 - uuid items (received uuid + subvol uuid) | 4483 | * 2 - uuid items (received uuid + subvol uuid) |
| @@ -4444,14 +4531,90 @@ static long btrfs_ioctl_set_received_subvol(struct file *file, | |||
| 4444 | goto out; | 4531 | goto out; |
| 4445 | } | 4532 | } |
| 4446 | 4533 | ||
| 4534 | out: | ||
| 4535 | up_write(&root->fs_info->subvol_sem); | ||
| 4536 | mnt_drop_write_file(file); | ||
| 4537 | return ret; | ||
| 4538 | } | ||
| 4539 | |||
| 4540 | #ifdef CONFIG_64BIT | ||
| 4541 | static long btrfs_ioctl_set_received_subvol_32(struct file *file, | ||
| 4542 | void __user *arg) | ||
| 4543 | { | ||
| 4544 | struct btrfs_ioctl_received_subvol_args_32 *args32 = NULL; | ||
| 4545 | struct btrfs_ioctl_received_subvol_args *args64 = NULL; | ||
| 4546 | int ret = 0; | ||
| 4547 | |||
| 4548 | args32 = memdup_user(arg, sizeof(*args32)); | ||
| 4549 | if (IS_ERR(args32)) { | ||
| 4550 | ret = PTR_ERR(args32); | ||
| 4551 | args32 = NULL; | ||
| 4552 | goto out; | ||
| 4553 | } | ||
| 4554 | |||
| 4555 | args64 = kmalloc(sizeof(*args64), GFP_NOFS); | ||
| 4556 | if (!args64) { | ||
| 4557 | ret = -ENOMEM; | ||
| 4558 | goto out; | ||
| 4559 | } | ||
| 4560 | |||
| 4561 | memcpy(args64->uuid, args32->uuid, BTRFS_UUID_SIZE); | ||
| 4562 | args64->stransid = args32->stransid; | ||
| 4563 | args64->rtransid = args32->rtransid; | ||
| 4564 | args64->stime.sec = args32->stime.sec; | ||
| 4565 | args64->stime.nsec = args32->stime.nsec; | ||
| 4566 | args64->rtime.sec = args32->rtime.sec; | ||
| 4567 | args64->rtime.nsec = args32->rtime.nsec; | ||
| 4568 | args64->flags = args32->flags; | ||
| 4569 | |||
| 4570 | ret = _btrfs_ioctl_set_received_subvol(file, args64); | ||
| 4571 | if (ret) | ||
| 4572 | goto out; | ||
| 4573 | |||
| 4574 | memcpy(args32->uuid, args64->uuid, BTRFS_UUID_SIZE); | ||
| 4575 | args32->stransid = args64->stransid; | ||
| 4576 | args32->rtransid = args64->rtransid; | ||
| 4577 | args32->stime.sec = args64->stime.sec; | ||
| 4578 | args32->stime.nsec = args64->stime.nsec; | ||
| 4579 | args32->rtime.sec = args64->rtime.sec; | ||
| 4580 | args32->rtime.nsec = args64->rtime.nsec; | ||
| 4581 | args32->flags = args64->flags; | ||
| 4582 | |||
| 4583 | ret = copy_to_user(arg, args32, sizeof(*args32)); | ||
| 4584 | if (ret) | ||
| 4585 | ret = -EFAULT; | ||
| 4586 | |||
| 4587 | out: | ||
| 4588 | kfree(args32); | ||
| 4589 | kfree(args64); | ||
| 4590 | return ret; | ||
| 4591 | } | ||
| 4592 | #endif | ||
| 4593 | |||
| 4594 | static long btrfs_ioctl_set_received_subvol(struct file *file, | ||
| 4595 | void __user *arg) | ||
| 4596 | { | ||
| 4597 | struct btrfs_ioctl_received_subvol_args *sa = NULL; | ||
| 4598 | int ret = 0; | ||
| 4599 | |||
| 4600 | sa = memdup_user(arg, sizeof(*sa)); | ||
| 4601 | if (IS_ERR(sa)) { | ||
| 4602 | ret = PTR_ERR(sa); | ||
| 4603 | sa = NULL; | ||
| 4604 | goto out; | ||
| 4605 | } | ||
| 4606 | |||
| 4607 | ret = _btrfs_ioctl_set_received_subvol(file, sa); | ||
| 4608 | |||
| 4609 | if (ret) | ||
| 4610 | goto out; | ||
| 4611 | |||
| 4447 | ret = copy_to_user(arg, sa, sizeof(*sa)); | 4612 | ret = copy_to_user(arg, sa, sizeof(*sa)); |
| 4448 | if (ret) | 4613 | if (ret) |
| 4449 | ret = -EFAULT; | 4614 | ret = -EFAULT; |
| 4450 | 4615 | ||
| 4451 | out: | 4616 | out: |
| 4452 | kfree(sa); | 4617 | kfree(sa); |
| 4453 | up_write(&root->fs_info->subvol_sem); | ||
| 4454 | mnt_drop_write_file(file); | ||
| 4455 | return ret; | 4618 | return ret; |
| 4456 | } | 4619 | } |
| 4457 | 4620 | ||
| @@ -4746,7 +4909,7 @@ long btrfs_ioctl(struct file *file, unsigned int | |||
| 4746 | case BTRFS_IOC_SYNC: { | 4909 | case BTRFS_IOC_SYNC: { |
| 4747 | int ret; | 4910 | int ret; |
| 4748 | 4911 | ||
| 4749 | ret = btrfs_start_delalloc_roots(root->fs_info, 0); | 4912 | ret = btrfs_start_delalloc_roots(root->fs_info, 0, -1); |
| 4750 | if (ret) | 4913 | if (ret) |
| 4751 | return ret; | 4914 | return ret; |
| 4752 | ret = btrfs_sync_fs(file->f_dentry->d_sb, 1); | 4915 | ret = btrfs_sync_fs(file->f_dentry->d_sb, 1); |
| @@ -4770,6 +4933,10 @@ long btrfs_ioctl(struct file *file, unsigned int | |||
| 4770 | return btrfs_ioctl_balance_progress(root, argp); | 4933 | return btrfs_ioctl_balance_progress(root, argp); |
| 4771 | case BTRFS_IOC_SET_RECEIVED_SUBVOL: | 4934 | case BTRFS_IOC_SET_RECEIVED_SUBVOL: |
| 4772 | return btrfs_ioctl_set_received_subvol(file, argp); | 4935 | return btrfs_ioctl_set_received_subvol(file, argp); |
| 4936 | #ifdef CONFIG_64BIT | ||
| 4937 | case BTRFS_IOC_SET_RECEIVED_SUBVOL_32: | ||
| 4938 | return btrfs_ioctl_set_received_subvol_32(file, argp); | ||
| 4939 | #endif | ||
| 4773 | case BTRFS_IOC_SEND: | 4940 | case BTRFS_IOC_SEND: |
| 4774 | return btrfs_ioctl_send(file, argp); | 4941 | return btrfs_ioctl_send(file, argp); |
| 4775 | case BTRFS_IOC_GET_DEV_STATS: | 4942 | case BTRFS_IOC_GET_DEV_STATS: |
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index b16450b840e7..a94b05f72869 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c | |||
| @@ -349,10 +349,13 @@ int btrfs_dec_test_first_ordered_pending(struct inode *inode, | |||
| 349 | if (!uptodate) | 349 | if (!uptodate) |
| 350 | set_bit(BTRFS_ORDERED_IOERR, &entry->flags); | 350 | set_bit(BTRFS_ORDERED_IOERR, &entry->flags); |
| 351 | 351 | ||
| 352 | if (entry->bytes_left == 0) | 352 | if (entry->bytes_left == 0) { |
| 353 | ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags); | 353 | ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags); |
| 354 | else | 354 | if (waitqueue_active(&entry->wait)) |
| 355 | wake_up(&entry->wait); | ||
| 356 | } else { | ||
| 355 | ret = 1; | 357 | ret = 1; |
| 358 | } | ||
| 356 | out: | 359 | out: |
| 357 | if (!ret && cached && entry) { | 360 | if (!ret && cached && entry) { |
| 358 | *cached = entry; | 361 | *cached = entry; |
| @@ -410,10 +413,13 @@ have_entry: | |||
| 410 | if (!uptodate) | 413 | if (!uptodate) |
| 411 | set_bit(BTRFS_ORDERED_IOERR, &entry->flags); | 414 | set_bit(BTRFS_ORDERED_IOERR, &entry->flags); |
| 412 | 415 | ||
| 413 | if (entry->bytes_left == 0) | 416 | if (entry->bytes_left == 0) { |
| 414 | ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags); | 417 | ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags); |
| 415 | else | 418 | if (waitqueue_active(&entry->wait)) |
| 419 | wake_up(&entry->wait); | ||
| 420 | } else { | ||
| 416 | ret = 1; | 421 | ret = 1; |
| 422 | } | ||
| 417 | out: | 423 | out: |
| 418 | if (!ret && cached && entry) { | 424 | if (!ret && cached && entry) { |
| 419 | *cached = entry; | 425 | *cached = entry; |
| @@ -424,27 +430,48 @@ out: | |||
| 424 | } | 430 | } |
| 425 | 431 | ||
| 426 | /* Needs to either be called under a log transaction or the log_mutex */ | 432 | /* Needs to either be called under a log transaction or the log_mutex */ |
| 427 | void btrfs_get_logged_extents(struct btrfs_root *log, struct inode *inode) | 433 | void btrfs_get_logged_extents(struct inode *inode, |
| 434 | struct list_head *logged_list) | ||
| 428 | { | 435 | { |
| 429 | struct btrfs_ordered_inode_tree *tree; | 436 | struct btrfs_ordered_inode_tree *tree; |
| 430 | struct btrfs_ordered_extent *ordered; | 437 | struct btrfs_ordered_extent *ordered; |
| 431 | struct rb_node *n; | 438 | struct rb_node *n; |
| 432 | int index = log->log_transid % 2; | ||
| 433 | 439 | ||
| 434 | tree = &BTRFS_I(inode)->ordered_tree; | 440 | tree = &BTRFS_I(inode)->ordered_tree; |
| 435 | spin_lock_irq(&tree->lock); | 441 | spin_lock_irq(&tree->lock); |
| 436 | for (n = rb_first(&tree->tree); n; n = rb_next(n)) { | 442 | for (n = rb_first(&tree->tree); n; n = rb_next(n)) { |
| 437 | ordered = rb_entry(n, struct btrfs_ordered_extent, rb_node); | 443 | ordered = rb_entry(n, struct btrfs_ordered_extent, rb_node); |
| 438 | spin_lock(&log->log_extents_lock[index]); | 444 | if (!list_empty(&ordered->log_list)) |
| 439 | if (list_empty(&ordered->log_list)) { | 445 | continue; |
| 440 | list_add_tail(&ordered->log_list, &log->logged_list[index]); | 446 | list_add_tail(&ordered->log_list, logged_list); |
| 441 | atomic_inc(&ordered->refs); | 447 | atomic_inc(&ordered->refs); |
| 442 | } | ||
| 443 | spin_unlock(&log->log_extents_lock[index]); | ||
| 444 | } | 448 | } |
| 445 | spin_unlock_irq(&tree->lock); | 449 | spin_unlock_irq(&tree->lock); |
| 446 | } | 450 | } |
| 447 | 451 | ||
| 452 | void btrfs_put_logged_extents(struct list_head *logged_list) | ||
| 453 | { | ||
| 454 | struct btrfs_ordered_extent *ordered; | ||
| 455 | |||
| 456 | while (!list_empty(logged_list)) { | ||
| 457 | ordered = list_first_entry(logged_list, | ||
| 458 | struct btrfs_ordered_extent, | ||
| 459 | log_list); | ||
| 460 | list_del_init(&ordered->log_list); | ||
| 461 | btrfs_put_ordered_extent(ordered); | ||
| 462 | } | ||
| 463 | } | ||
| 464 | |||
| 465 | void btrfs_submit_logged_extents(struct list_head *logged_list, | ||
| 466 | struct btrfs_root *log) | ||
| 467 | { | ||
| 468 | int index = log->log_transid % 2; | ||
| 469 | |||
| 470 | spin_lock_irq(&log->log_extents_lock[index]); | ||
| 471 | list_splice_tail(logged_list, &log->logged_list[index]); | ||
| 472 | spin_unlock_irq(&log->log_extents_lock[index]); | ||
| 473 | } | ||
| 474 | |||
| 448 | void btrfs_wait_logged_extents(struct btrfs_root *log, u64 transid) | 475 | void btrfs_wait_logged_extents(struct btrfs_root *log, u64 transid) |
| 449 | { | 476 | { |
| 450 | struct btrfs_ordered_extent *ordered; | 477 | struct btrfs_ordered_extent *ordered; |
| @@ -577,7 +604,7 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr) | |||
| 577 | INIT_LIST_HEAD(&splice); | 604 | INIT_LIST_HEAD(&splice); |
| 578 | INIT_LIST_HEAD(&works); | 605 | INIT_LIST_HEAD(&works); |
| 579 | 606 | ||
| 580 | mutex_lock(&root->fs_info->ordered_operations_mutex); | 607 | mutex_lock(&root->ordered_extent_mutex); |
| 581 | spin_lock(&root->ordered_extent_lock); | 608 | spin_lock(&root->ordered_extent_lock); |
| 582 | list_splice_init(&root->ordered_extents, &splice); | 609 | list_splice_init(&root->ordered_extents, &splice); |
| 583 | while (!list_empty(&splice) && nr) { | 610 | while (!list_empty(&splice) && nr) { |
| @@ -588,10 +615,11 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr) | |||
| 588 | atomic_inc(&ordered->refs); | 615 | atomic_inc(&ordered->refs); |
| 589 | spin_unlock(&root->ordered_extent_lock); | 616 | spin_unlock(&root->ordered_extent_lock); |
| 590 | 617 | ||
| 591 | ordered->flush_work.func = btrfs_run_ordered_extent_work; | 618 | btrfs_init_work(&ordered->flush_work, |
| 619 | btrfs_run_ordered_extent_work, NULL, NULL); | ||
| 592 | list_add_tail(&ordered->work_list, &works); | 620 | list_add_tail(&ordered->work_list, &works); |
| 593 | btrfs_queue_worker(&root->fs_info->flush_workers, | 621 | btrfs_queue_work(root->fs_info->flush_workers, |
| 594 | &ordered->flush_work); | 622 | &ordered->flush_work); |
| 595 | 623 | ||
| 596 | cond_resched(); | 624 | cond_resched(); |
| 597 | spin_lock(&root->ordered_extent_lock); | 625 | spin_lock(&root->ordered_extent_lock); |
| @@ -608,7 +636,7 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr) | |||
| 608 | btrfs_put_ordered_extent(ordered); | 636 | btrfs_put_ordered_extent(ordered); |
| 609 | cond_resched(); | 637 | cond_resched(); |
| 610 | } | 638 | } |
| 611 | mutex_unlock(&root->fs_info->ordered_operations_mutex); | 639 | mutex_unlock(&root->ordered_extent_mutex); |
| 612 | 640 | ||
| 613 | return count; | 641 | return count; |
| 614 | } | 642 | } |
| @@ -621,6 +649,7 @@ void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr) | |||
| 621 | 649 | ||
| 622 | INIT_LIST_HEAD(&splice); | 650 | INIT_LIST_HEAD(&splice); |
| 623 | 651 | ||
| 652 | mutex_lock(&fs_info->ordered_operations_mutex); | ||
| 624 | spin_lock(&fs_info->ordered_root_lock); | 653 | spin_lock(&fs_info->ordered_root_lock); |
| 625 | list_splice_init(&fs_info->ordered_roots, &splice); | 654 | list_splice_init(&fs_info->ordered_roots, &splice); |
| 626 | while (!list_empty(&splice) && nr) { | 655 | while (!list_empty(&splice) && nr) { |
| @@ -643,6 +672,7 @@ void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr) | |||
| 643 | } | 672 | } |
| 644 | list_splice_tail(&splice, &fs_info->ordered_roots); | 673 | list_splice_tail(&splice, &fs_info->ordered_roots); |
| 645 | spin_unlock(&fs_info->ordered_root_lock); | 674 | spin_unlock(&fs_info->ordered_root_lock); |
| 675 | mutex_unlock(&fs_info->ordered_operations_mutex); | ||
| 646 | } | 676 | } |
| 647 | 677 | ||
| 648 | /* | 678 | /* |
| @@ -704,8 +734,8 @@ int btrfs_run_ordered_operations(struct btrfs_trans_handle *trans, | |||
| 704 | goto out; | 734 | goto out; |
| 705 | } | 735 | } |
| 706 | list_add_tail(&work->list, &works); | 736 | list_add_tail(&work->list, &works); |
| 707 | btrfs_queue_worker(&root->fs_info->flush_workers, | 737 | btrfs_queue_work(root->fs_info->flush_workers, |
| 708 | &work->work); | 738 | &work->work); |
| 709 | 739 | ||
| 710 | cond_resched(); | 740 | cond_resched(); |
| 711 | spin_lock(&root->fs_info->ordered_root_lock); | 741 | spin_lock(&root->fs_info->ordered_root_lock); |
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index 9b0450f7ac20..246897058efb 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h | |||
| @@ -197,7 +197,11 @@ void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans, | |||
| 197 | struct inode *inode); | 197 | struct inode *inode); |
| 198 | int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr); | 198 | int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr); |
| 199 | void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr); | 199 | void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr); |
| 200 | void btrfs_get_logged_extents(struct btrfs_root *log, struct inode *inode); | 200 | void btrfs_get_logged_extents(struct inode *inode, |
| 201 | struct list_head *logged_list); | ||
| 202 | void btrfs_put_logged_extents(struct list_head *logged_list); | ||
| 203 | void btrfs_submit_logged_extents(struct list_head *logged_list, | ||
| 204 | struct btrfs_root *log); | ||
| 201 | void btrfs_wait_logged_extents(struct btrfs_root *log, u64 transid); | 205 | void btrfs_wait_logged_extents(struct btrfs_root *log, u64 transid); |
| 202 | void btrfs_free_logged_extents(struct btrfs_root *log, u64 transid); | 206 | void btrfs_free_logged_extents(struct btrfs_root *log, u64 transid); |
| 203 | int __init ordered_data_init(void); | 207 | int __init ordered_data_init(void); |
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 472302a2d745..2cf905877aaf 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c | |||
| @@ -1509,8 +1509,8 @@ int btrfs_run_qgroups(struct btrfs_trans_handle *trans, | |||
| 1509 | ret = qgroup_rescan_init(fs_info, 0, 1); | 1509 | ret = qgroup_rescan_init(fs_info, 0, 1); |
| 1510 | if (!ret) { | 1510 | if (!ret) { |
| 1511 | qgroup_rescan_zero_tracking(fs_info); | 1511 | qgroup_rescan_zero_tracking(fs_info); |
| 1512 | btrfs_queue_worker(&fs_info->qgroup_rescan_workers, | 1512 | btrfs_queue_work(fs_info->qgroup_rescan_workers, |
| 1513 | &fs_info->qgroup_rescan_work); | 1513 | &fs_info->qgroup_rescan_work); |
| 1514 | } | 1514 | } |
| 1515 | ret = 0; | 1515 | ret = 0; |
| 1516 | } | 1516 | } |
| @@ -2095,7 +2095,8 @@ qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid, | |||
| 2095 | 2095 | ||
| 2096 | memset(&fs_info->qgroup_rescan_work, 0, | 2096 | memset(&fs_info->qgroup_rescan_work, 0, |
| 2097 | sizeof(fs_info->qgroup_rescan_work)); | 2097 | sizeof(fs_info->qgroup_rescan_work)); |
| 2098 | fs_info->qgroup_rescan_work.func = btrfs_qgroup_rescan_worker; | 2098 | btrfs_init_work(&fs_info->qgroup_rescan_work, |
| 2099 | btrfs_qgroup_rescan_worker, NULL, NULL); | ||
| 2099 | 2100 | ||
| 2100 | if (ret) { | 2101 | if (ret) { |
| 2101 | err: | 2102 | err: |
| @@ -2158,8 +2159,8 @@ btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info) | |||
| 2158 | 2159 | ||
| 2159 | qgroup_rescan_zero_tracking(fs_info); | 2160 | qgroup_rescan_zero_tracking(fs_info); |
| 2160 | 2161 | ||
| 2161 | btrfs_queue_worker(&fs_info->qgroup_rescan_workers, | 2162 | btrfs_queue_work(fs_info->qgroup_rescan_workers, |
| 2162 | &fs_info->qgroup_rescan_work); | 2163 | &fs_info->qgroup_rescan_work); |
| 2163 | 2164 | ||
| 2164 | return 0; | 2165 | return 0; |
| 2165 | } | 2166 | } |
| @@ -2190,6 +2191,6 @@ void | |||
| 2190 | btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info) | 2191 | btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info) |
| 2191 | { | 2192 | { |
| 2192 | if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) | 2193 | if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) |
| 2193 | btrfs_queue_worker(&fs_info->qgroup_rescan_workers, | 2194 | btrfs_queue_work(fs_info->qgroup_rescan_workers, |
| 2194 | &fs_info->qgroup_rescan_work); | 2195 | &fs_info->qgroup_rescan_work); |
| 2195 | } | 2196 | } |
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index 9af0b25d991a..4055291a523e 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c | |||
| @@ -1416,20 +1416,18 @@ cleanup: | |||
| 1416 | 1416 | ||
| 1417 | static void async_rmw_stripe(struct btrfs_raid_bio *rbio) | 1417 | static void async_rmw_stripe(struct btrfs_raid_bio *rbio) |
| 1418 | { | 1418 | { |
| 1419 | rbio->work.flags = 0; | 1419 | btrfs_init_work(&rbio->work, rmw_work, NULL, NULL); |
| 1420 | rbio->work.func = rmw_work; | ||
| 1421 | 1420 | ||
| 1422 | btrfs_queue_worker(&rbio->fs_info->rmw_workers, | 1421 | btrfs_queue_work(rbio->fs_info->rmw_workers, |
| 1423 | &rbio->work); | 1422 | &rbio->work); |
| 1424 | } | 1423 | } |
| 1425 | 1424 | ||
| 1426 | static void async_read_rebuild(struct btrfs_raid_bio *rbio) | 1425 | static void async_read_rebuild(struct btrfs_raid_bio *rbio) |
| 1427 | { | 1426 | { |
| 1428 | rbio->work.flags = 0; | 1427 | btrfs_init_work(&rbio->work, read_rebuild_work, NULL, NULL); |
| 1429 | rbio->work.func = read_rebuild_work; | ||
| 1430 | 1428 | ||
| 1431 | btrfs_queue_worker(&rbio->fs_info->rmw_workers, | 1429 | btrfs_queue_work(rbio->fs_info->rmw_workers, |
| 1432 | &rbio->work); | 1430 | &rbio->work); |
| 1433 | } | 1431 | } |
| 1434 | 1432 | ||
| 1435 | /* | 1433 | /* |
| @@ -1667,10 +1665,9 @@ static void btrfs_raid_unplug(struct blk_plug_cb *cb, bool from_schedule) | |||
| 1667 | plug = container_of(cb, struct btrfs_plug_cb, cb); | 1665 | plug = container_of(cb, struct btrfs_plug_cb, cb); |
| 1668 | 1666 | ||
| 1669 | if (from_schedule) { | 1667 | if (from_schedule) { |
| 1670 | plug->work.flags = 0; | 1668 | btrfs_init_work(&plug->work, unplug_work, NULL, NULL); |
| 1671 | plug->work.func = unplug_work; | 1669 | btrfs_queue_work(plug->info->rmw_workers, |
| 1672 | btrfs_queue_worker(&plug->info->rmw_workers, | 1670 | &plug->work); |
| 1673 | &plug->work); | ||
| 1674 | return; | 1671 | return; |
| 1675 | } | 1672 | } |
| 1676 | run_plug(plug); | 1673 | run_plug(plug); |
diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c index 31c797c48c3e..30947f923620 100644 --- a/fs/btrfs/reada.c +++ b/fs/btrfs/reada.c | |||
| @@ -793,10 +793,10 @@ static void reada_start_machine(struct btrfs_fs_info *fs_info) | |||
| 793 | /* FIXME we cannot handle this properly right now */ | 793 | /* FIXME we cannot handle this properly right now */ |
| 794 | BUG(); | 794 | BUG(); |
| 795 | } | 795 | } |
| 796 | rmw->work.func = reada_start_machine_worker; | 796 | btrfs_init_work(&rmw->work, reada_start_machine_worker, NULL, NULL); |
| 797 | rmw->fs_info = fs_info; | 797 | rmw->fs_info = fs_info; |
| 798 | 798 | ||
| 799 | btrfs_queue_worker(&fs_info->readahead_workers, &rmw->work); | 799 | btrfs_queue_work(fs_info->readahead_workers, &rmw->work); |
| 800 | } | 800 | } |
| 801 | 801 | ||
| 802 | #ifdef DEBUG | 802 | #ifdef DEBUG |
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 07b3b36f40ee..7f92ab1daa87 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c | |||
| @@ -2317,7 +2317,6 @@ void free_reloc_roots(struct list_head *list) | |||
| 2317 | static noinline_for_stack | 2317 | static noinline_for_stack |
| 2318 | int merge_reloc_roots(struct reloc_control *rc) | 2318 | int merge_reloc_roots(struct reloc_control *rc) |
| 2319 | { | 2319 | { |
| 2320 | struct btrfs_trans_handle *trans; | ||
| 2321 | struct btrfs_root *root; | 2320 | struct btrfs_root *root; |
| 2322 | struct btrfs_root *reloc_root; | 2321 | struct btrfs_root *reloc_root; |
| 2323 | u64 last_snap; | 2322 | u64 last_snap; |
| @@ -2375,26 +2374,6 @@ again: | |||
| 2375 | list_add_tail(&reloc_root->root_list, | 2374 | list_add_tail(&reloc_root->root_list, |
| 2376 | &reloc_roots); | 2375 | &reloc_roots); |
| 2377 | goto out; | 2376 | goto out; |
| 2378 | } else if (!ret) { | ||
| 2379 | /* | ||
| 2380 | * recover the last snapshot tranid to avoid | ||
| 2381 | * the space balance break NOCOW. | ||
| 2382 | */ | ||
| 2383 | root = read_fs_root(rc->extent_root->fs_info, | ||
| 2384 | objectid); | ||
| 2385 | if (IS_ERR(root)) | ||
| 2386 | continue; | ||
| 2387 | |||
| 2388 | trans = btrfs_join_transaction(root); | ||
| 2389 | BUG_ON(IS_ERR(trans)); | ||
| 2390 | |||
| 2391 | /* Check if the fs/file tree was snapshoted or not. */ | ||
| 2392 | if (btrfs_root_last_snapshot(&root->root_item) == | ||
| 2393 | otransid - 1) | ||
| 2394 | btrfs_set_root_last_snapshot(&root->root_item, | ||
| 2395 | last_snap); | ||
| 2396 | |||
| 2397 | btrfs_end_transaction(trans, root); | ||
| 2398 | } | 2377 | } |
| 2399 | } | 2378 | } |
| 2400 | 2379 | ||
| @@ -4248,7 +4227,7 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) | |||
| 4248 | btrfs_info(extent_root->fs_info, "relocating block group %llu flags %llu", | 4227 | btrfs_info(extent_root->fs_info, "relocating block group %llu flags %llu", |
| 4249 | rc->block_group->key.objectid, rc->block_group->flags); | 4228 | rc->block_group->key.objectid, rc->block_group->flags); |
| 4250 | 4229 | ||
| 4251 | ret = btrfs_start_delalloc_roots(fs_info, 0); | 4230 | ret = btrfs_start_delalloc_roots(fs_info, 0, -1); |
| 4252 | if (ret < 0) { | 4231 | if (ret < 0) { |
| 4253 | err = ret; | 4232 | err = ret; |
| 4254 | goto out; | 4233 | goto out; |
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 1389b69059de..38bb47e7d6b1 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c | |||
| @@ -16,6 +16,7 @@ | |||
| 16 | * Boston, MA 021110-1307, USA. | 16 | * Boston, MA 021110-1307, USA. |
| 17 | */ | 17 | */ |
| 18 | 18 | ||
| 19 | #include <linux/err.h> | ||
| 19 | #include <linux/uuid.h> | 20 | #include <linux/uuid.h> |
| 20 | #include "ctree.h" | 21 | #include "ctree.h" |
| 21 | #include "transaction.h" | 22 | #include "transaction.h" |
| @@ -271,7 +272,7 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root) | |||
| 271 | key.offset++; | 272 | key.offset++; |
| 272 | 273 | ||
| 273 | root = btrfs_read_fs_root(tree_root, &root_key); | 274 | root = btrfs_read_fs_root(tree_root, &root_key); |
| 274 | err = PTR_RET(root); | 275 | err = PTR_ERR_OR_ZERO(root); |
| 275 | if (err && err != -ENOENT) { | 276 | if (err && err != -ENOENT) { |
| 276 | break; | 277 | break; |
| 277 | } else if (err == -ENOENT) { | 278 | } else if (err == -ENOENT) { |
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index efba5d1282ee..0be77993378e 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c | |||
| @@ -315,6 +315,16 @@ static void scrub_pending_trans_workers_inc(struct scrub_ctx *sctx) | |||
| 315 | atomic_inc(&fs_info->scrubs_running); | 315 | atomic_inc(&fs_info->scrubs_running); |
| 316 | atomic_inc(&fs_info->scrubs_paused); | 316 | atomic_inc(&fs_info->scrubs_paused); |
| 317 | mutex_unlock(&fs_info->scrub_lock); | 317 | mutex_unlock(&fs_info->scrub_lock); |
| 318 | |||
| 319 | /* | ||
| 320 | * check if @scrubs_running=@scrubs_paused condition | ||
| 321 | * inside wait_event() is not an atomic operation. | ||
| 322 | * which means we may inc/dec @scrub_running/paused | ||
| 323 | * at any time. Let's wake up @scrub_pause_wait as | ||
| 324 | * much as we can to let commit transaction blocked less. | ||
| 325 | */ | ||
| 326 | wake_up(&fs_info->scrub_pause_wait); | ||
| 327 | |||
| 318 | atomic_inc(&sctx->workers_pending); | 328 | atomic_inc(&sctx->workers_pending); |
| 319 | } | 329 | } |
| 320 | 330 | ||
| @@ -418,7 +428,8 @@ struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace) | |||
| 418 | sbio->index = i; | 428 | sbio->index = i; |
| 419 | sbio->sctx = sctx; | 429 | sbio->sctx = sctx; |
| 420 | sbio->page_count = 0; | 430 | sbio->page_count = 0; |
| 421 | sbio->work.func = scrub_bio_end_io_worker; | 431 | btrfs_init_work(&sbio->work, scrub_bio_end_io_worker, |
| 432 | NULL, NULL); | ||
| 422 | 433 | ||
| 423 | if (i != SCRUB_BIOS_PER_SCTX - 1) | 434 | if (i != SCRUB_BIOS_PER_SCTX - 1) |
| 424 | sctx->bios[i]->next_free = i + 1; | 435 | sctx->bios[i]->next_free = i + 1; |
| @@ -987,9 +998,10 @@ nodatasum_case: | |||
| 987 | fixup_nodatasum->root = fs_info->extent_root; | 998 | fixup_nodatasum->root = fs_info->extent_root; |
| 988 | fixup_nodatasum->mirror_num = failed_mirror_index + 1; | 999 | fixup_nodatasum->mirror_num = failed_mirror_index + 1; |
| 989 | scrub_pending_trans_workers_inc(sctx); | 1000 | scrub_pending_trans_workers_inc(sctx); |
| 990 | fixup_nodatasum->work.func = scrub_fixup_nodatasum; | 1001 | btrfs_init_work(&fixup_nodatasum->work, scrub_fixup_nodatasum, |
| 991 | btrfs_queue_worker(&fs_info->scrub_workers, | 1002 | NULL, NULL); |
| 992 | &fixup_nodatasum->work); | 1003 | btrfs_queue_work(fs_info->scrub_workers, |
| 1004 | &fixup_nodatasum->work); | ||
| 993 | goto out; | 1005 | goto out; |
| 994 | } | 1006 | } |
| 995 | 1007 | ||
| @@ -1603,8 +1615,8 @@ static void scrub_wr_bio_end_io(struct bio *bio, int err) | |||
| 1603 | sbio->err = err; | 1615 | sbio->err = err; |
| 1604 | sbio->bio = bio; | 1616 | sbio->bio = bio; |
| 1605 | 1617 | ||
| 1606 | sbio->work.func = scrub_wr_bio_end_io_worker; | 1618 | btrfs_init_work(&sbio->work, scrub_wr_bio_end_io_worker, NULL, NULL); |
| 1607 | btrfs_queue_worker(&fs_info->scrub_wr_completion_workers, &sbio->work); | 1619 | btrfs_queue_work(fs_info->scrub_wr_completion_workers, &sbio->work); |
| 1608 | } | 1620 | } |
| 1609 | 1621 | ||
| 1610 | static void scrub_wr_bio_end_io_worker(struct btrfs_work *work) | 1622 | static void scrub_wr_bio_end_io_worker(struct btrfs_work *work) |
| @@ -2072,7 +2084,7 @@ static void scrub_bio_end_io(struct bio *bio, int err) | |||
| 2072 | sbio->err = err; | 2084 | sbio->err = err; |
| 2073 | sbio->bio = bio; | 2085 | sbio->bio = bio; |
| 2074 | 2086 | ||
| 2075 | btrfs_queue_worker(&fs_info->scrub_workers, &sbio->work); | 2087 | btrfs_queue_work(fs_info->scrub_workers, &sbio->work); |
| 2076 | } | 2088 | } |
| 2077 | 2089 | ||
| 2078 | static void scrub_bio_end_io_worker(struct btrfs_work *work) | 2090 | static void scrub_bio_end_io_worker(struct btrfs_work *work) |
| @@ -2223,6 +2235,47 @@ behind_scrub_pages: | |||
| 2223 | return 0; | 2235 | return 0; |
| 2224 | } | 2236 | } |
| 2225 | 2237 | ||
| 2238 | /* | ||
| 2239 | * Given a physical address, this will calculate it's | ||
| 2240 | * logical offset. if this is a parity stripe, it will return | ||
| 2241 | * the most left data stripe's logical offset. | ||
| 2242 | * | ||
| 2243 | * return 0 if it is a data stripe, 1 means parity stripe. | ||
| 2244 | */ | ||
| 2245 | static int get_raid56_logic_offset(u64 physical, int num, | ||
| 2246 | struct map_lookup *map, u64 *offset) | ||
| 2247 | { | ||
| 2248 | int i; | ||
| 2249 | int j = 0; | ||
| 2250 | u64 stripe_nr; | ||
| 2251 | u64 last_offset; | ||
| 2252 | int stripe_index; | ||
| 2253 | int rot; | ||
| 2254 | |||
| 2255 | last_offset = (physical - map->stripes[num].physical) * | ||
| 2256 | nr_data_stripes(map); | ||
| 2257 | *offset = last_offset; | ||
| 2258 | for (i = 0; i < nr_data_stripes(map); i++) { | ||
| 2259 | *offset = last_offset + i * map->stripe_len; | ||
| 2260 | |||
| 2261 | stripe_nr = *offset; | ||
| 2262 | do_div(stripe_nr, map->stripe_len); | ||
| 2263 | do_div(stripe_nr, nr_data_stripes(map)); | ||
| 2264 | |||
| 2265 | /* Work out the disk rotation on this stripe-set */ | ||
| 2266 | rot = do_div(stripe_nr, map->num_stripes); | ||
| 2267 | /* calculate which stripe this data locates */ | ||
| 2268 | rot += i; | ||
| 2269 | stripe_index = rot % map->num_stripes; | ||
| 2270 | if (stripe_index == num) | ||
| 2271 | return 0; | ||
| 2272 | if (stripe_index < num) | ||
| 2273 | j++; | ||
| 2274 | } | ||
| 2275 | *offset = last_offset + j * map->stripe_len; | ||
| 2276 | return 1; | ||
| 2277 | } | ||
| 2278 | |||
| 2226 | static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, | 2279 | static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, |
| 2227 | struct map_lookup *map, | 2280 | struct map_lookup *map, |
| 2228 | struct btrfs_device *scrub_dev, | 2281 | struct btrfs_device *scrub_dev, |
| @@ -2244,6 +2297,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, | |||
| 2244 | u64 physical; | 2297 | u64 physical; |
| 2245 | u64 logical; | 2298 | u64 logical; |
| 2246 | u64 logic_end; | 2299 | u64 logic_end; |
| 2300 | u64 physical_end; | ||
| 2247 | u64 generation; | 2301 | u64 generation; |
| 2248 | int mirror_num; | 2302 | int mirror_num; |
| 2249 | struct reada_control *reada1; | 2303 | struct reada_control *reada1; |
| @@ -2257,16 +2311,10 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, | |||
| 2257 | u64 extent_len; | 2311 | u64 extent_len; |
| 2258 | struct btrfs_device *extent_dev; | 2312 | struct btrfs_device *extent_dev; |
| 2259 | int extent_mirror_num; | 2313 | int extent_mirror_num; |
| 2260 | int stop_loop; | 2314 | int stop_loop = 0; |
| 2261 | |||
| 2262 | if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | | ||
| 2263 | BTRFS_BLOCK_GROUP_RAID6)) { | ||
| 2264 | if (num >= nr_data_stripes(map)) { | ||
| 2265 | return 0; | ||
| 2266 | } | ||
| 2267 | } | ||
| 2268 | 2315 | ||
| 2269 | nstripes = length; | 2316 | nstripes = length; |
| 2317 | physical = map->stripes[num].physical; | ||
| 2270 | offset = 0; | 2318 | offset = 0; |
| 2271 | do_div(nstripes, map->stripe_len); | 2319 | do_div(nstripes, map->stripe_len); |
| 2272 | if (map->type & BTRFS_BLOCK_GROUP_RAID0) { | 2320 | if (map->type & BTRFS_BLOCK_GROUP_RAID0) { |
| @@ -2284,6 +2332,11 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, | |||
| 2284 | } else if (map->type & BTRFS_BLOCK_GROUP_DUP) { | 2332 | } else if (map->type & BTRFS_BLOCK_GROUP_DUP) { |
| 2285 | increment = map->stripe_len; | 2333 | increment = map->stripe_len; |
| 2286 | mirror_num = num % map->num_stripes + 1; | 2334 | mirror_num = num % map->num_stripes + 1; |
| 2335 | } else if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | | ||
| 2336 | BTRFS_BLOCK_GROUP_RAID6)) { | ||
| 2337 | get_raid56_logic_offset(physical, num, map, &offset); | ||
| 2338 | increment = map->stripe_len * nr_data_stripes(map); | ||
| 2339 | mirror_num = 1; | ||
| 2287 | } else { | 2340 | } else { |
| 2288 | increment = map->stripe_len; | 2341 | increment = map->stripe_len; |
| 2289 | mirror_num = 1; | 2342 | mirror_num = 1; |
| @@ -2307,7 +2360,15 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, | |||
| 2307 | * to not hold off transaction commits | 2360 | * to not hold off transaction commits |
| 2308 | */ | 2361 | */ |
| 2309 | logical = base + offset; | 2362 | logical = base + offset; |
| 2310 | 2363 | physical_end = physical + nstripes * map->stripe_len; | |
| 2364 | if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | | ||
| 2365 | BTRFS_BLOCK_GROUP_RAID6)) { | ||
| 2366 | get_raid56_logic_offset(physical_end, num, | ||
| 2367 | map, &logic_end); | ||
| 2368 | logic_end += base; | ||
| 2369 | } else { | ||
| 2370 | logic_end = logical + increment * nstripes; | ||
| 2371 | } | ||
| 2311 | wait_event(sctx->list_wait, | 2372 | wait_event(sctx->list_wait, |
| 2312 | atomic_read(&sctx->bios_in_flight) == 0); | 2373 | atomic_read(&sctx->bios_in_flight) == 0); |
| 2313 | scrub_blocked_if_needed(fs_info); | 2374 | scrub_blocked_if_needed(fs_info); |
| @@ -2316,7 +2377,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, | |||
| 2316 | key_start.objectid = logical; | 2377 | key_start.objectid = logical; |
| 2317 | key_start.type = BTRFS_EXTENT_ITEM_KEY; | 2378 | key_start.type = BTRFS_EXTENT_ITEM_KEY; |
| 2318 | key_start.offset = (u64)0; | 2379 | key_start.offset = (u64)0; |
| 2319 | key_end.objectid = base + offset + nstripes * increment; | 2380 | key_end.objectid = logic_end; |
| 2320 | key_end.type = BTRFS_METADATA_ITEM_KEY; | 2381 | key_end.type = BTRFS_METADATA_ITEM_KEY; |
| 2321 | key_end.offset = (u64)-1; | 2382 | key_end.offset = (u64)-1; |
| 2322 | reada1 = btrfs_reada_add(root, &key_start, &key_end); | 2383 | reada1 = btrfs_reada_add(root, &key_start, &key_end); |
| @@ -2326,7 +2387,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, | |||
| 2326 | key_start.offset = logical; | 2387 | key_start.offset = logical; |
| 2327 | key_end.objectid = BTRFS_EXTENT_CSUM_OBJECTID; | 2388 | key_end.objectid = BTRFS_EXTENT_CSUM_OBJECTID; |
| 2328 | key_end.type = BTRFS_EXTENT_CSUM_KEY; | 2389 | key_end.type = BTRFS_EXTENT_CSUM_KEY; |
| 2329 | key_end.offset = base + offset + nstripes * increment; | 2390 | key_end.offset = logic_end; |
| 2330 | reada2 = btrfs_reada_add(csum_root, &key_start, &key_end); | 2391 | reada2 = btrfs_reada_add(csum_root, &key_start, &key_end); |
| 2331 | 2392 | ||
| 2332 | if (!IS_ERR(reada1)) | 2393 | if (!IS_ERR(reada1)) |
| @@ -2344,11 +2405,17 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, | |||
| 2344 | /* | 2405 | /* |
| 2345 | * now find all extents for each stripe and scrub them | 2406 | * now find all extents for each stripe and scrub them |
| 2346 | */ | 2407 | */ |
| 2347 | logical = base + offset; | ||
| 2348 | physical = map->stripes[num].physical; | ||
| 2349 | logic_end = logical + increment * nstripes; | ||
| 2350 | ret = 0; | 2408 | ret = 0; |
| 2351 | while (logical < logic_end) { | 2409 | while (physical < physical_end) { |
| 2410 | /* for raid56, we skip parity stripe */ | ||
| 2411 | if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | | ||
| 2412 | BTRFS_BLOCK_GROUP_RAID6)) { | ||
| 2413 | ret = get_raid56_logic_offset(physical, num, | ||
| 2414 | map, &logical); | ||
| 2415 | logical += base; | ||
| 2416 | if (ret) | ||
| 2417 | goto skip; | ||
| 2418 | } | ||
| 2352 | /* | 2419 | /* |
| 2353 | * canceled? | 2420 | * canceled? |
| 2354 | */ | 2421 | */ |
| @@ -2492,15 +2559,29 @@ again: | |||
| 2492 | scrub_free_csums(sctx); | 2559 | scrub_free_csums(sctx); |
| 2493 | if (extent_logical + extent_len < | 2560 | if (extent_logical + extent_len < |
| 2494 | key.objectid + bytes) { | 2561 | key.objectid + bytes) { |
| 2495 | logical += increment; | 2562 | if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | |
| 2496 | physical += map->stripe_len; | 2563 | BTRFS_BLOCK_GROUP_RAID6)) { |
| 2497 | 2564 | /* | |
| 2565 | * loop until we find next data stripe | ||
| 2566 | * or we have finished all stripes. | ||
| 2567 | */ | ||
| 2568 | do { | ||
| 2569 | physical += map->stripe_len; | ||
| 2570 | ret = get_raid56_logic_offset( | ||
| 2571 | physical, num, | ||
| 2572 | map, &logical); | ||
| 2573 | logical += base; | ||
| 2574 | } while (physical < physical_end && ret); | ||
| 2575 | } else { | ||
| 2576 | physical += map->stripe_len; | ||
| 2577 | logical += increment; | ||
| 2578 | } | ||
| 2498 | if (logical < key.objectid + bytes) { | 2579 | if (logical < key.objectid + bytes) { |
| 2499 | cond_resched(); | 2580 | cond_resched(); |
| 2500 | goto again; | 2581 | goto again; |
| 2501 | } | 2582 | } |
| 2502 | 2583 | ||
| 2503 | if (logical >= logic_end) { | 2584 | if (physical >= physical_end) { |
| 2504 | stop_loop = 1; | 2585 | stop_loop = 1; |
| 2505 | break; | 2586 | break; |
| 2506 | } | 2587 | } |
| @@ -2509,6 +2590,7 @@ next: | |||
| 2509 | path->slots[0]++; | 2590 | path->slots[0]++; |
| 2510 | } | 2591 | } |
| 2511 | btrfs_release_path(path); | 2592 | btrfs_release_path(path); |
| 2593 | skip: | ||
| 2512 | logical += increment; | 2594 | logical += increment; |
| 2513 | physical += map->stripe_len; | 2595 | physical += map->stripe_len; |
| 2514 | spin_lock(&sctx->stat_lock); | 2596 | spin_lock(&sctx->stat_lock); |
| @@ -2686,10 +2768,23 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx, | |||
| 2686 | 2768 | ||
| 2687 | wait_event(sctx->list_wait, | 2769 | wait_event(sctx->list_wait, |
| 2688 | atomic_read(&sctx->bios_in_flight) == 0); | 2770 | atomic_read(&sctx->bios_in_flight) == 0); |
| 2689 | atomic_set(&sctx->wr_ctx.flush_all_writes, 0); | 2771 | atomic_inc(&fs_info->scrubs_paused); |
| 2772 | wake_up(&fs_info->scrub_pause_wait); | ||
| 2773 | |||
| 2774 | /* | ||
| 2775 | * must be called before we decrease @scrub_paused. | ||
| 2776 | * make sure we don't block transaction commit while | ||
| 2777 | * we are waiting pending workers finished. | ||
| 2778 | */ | ||
| 2690 | wait_event(sctx->list_wait, | 2779 | wait_event(sctx->list_wait, |
| 2691 | atomic_read(&sctx->workers_pending) == 0); | 2780 | atomic_read(&sctx->workers_pending) == 0); |
| 2692 | scrub_blocked_if_needed(fs_info); | 2781 | atomic_set(&sctx->wr_ctx.flush_all_writes, 0); |
| 2782 | |||
| 2783 | mutex_lock(&fs_info->scrub_lock); | ||
| 2784 | __scrub_blocked_if_needed(fs_info); | ||
| 2785 | atomic_dec(&fs_info->scrubs_paused); | ||
| 2786 | mutex_unlock(&fs_info->scrub_lock); | ||
| 2787 | wake_up(&fs_info->scrub_pause_wait); | ||
| 2693 | 2788 | ||
| 2694 | btrfs_put_block_group(cache); | 2789 | btrfs_put_block_group(cache); |
| 2695 | if (ret) | 2790 | if (ret) |
| @@ -2757,33 +2852,35 @@ static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info, | |||
| 2757 | int is_dev_replace) | 2852 | int is_dev_replace) |
| 2758 | { | 2853 | { |
| 2759 | int ret = 0; | 2854 | int ret = 0; |
| 2855 | int flags = WQ_FREEZABLE | WQ_UNBOUND; | ||
| 2856 | int max_active = fs_info->thread_pool_size; | ||
| 2760 | 2857 | ||
| 2761 | if (fs_info->scrub_workers_refcnt == 0) { | 2858 | if (fs_info->scrub_workers_refcnt == 0) { |
| 2762 | if (is_dev_replace) | 2859 | if (is_dev_replace) |
| 2763 | btrfs_init_workers(&fs_info->scrub_workers, "scrub", 1, | 2860 | fs_info->scrub_workers = |
| 2764 | &fs_info->generic_worker); | 2861 | btrfs_alloc_workqueue("btrfs-scrub", flags, |
| 2862 | 1, 4); | ||
| 2765 | else | 2863 | else |
| 2766 | btrfs_init_workers(&fs_info->scrub_workers, "scrub", | 2864 | fs_info->scrub_workers = |
| 2767 | fs_info->thread_pool_size, | 2865 | btrfs_alloc_workqueue("btrfs-scrub", flags, |
| 2768 | &fs_info->generic_worker); | 2866 | max_active, 4); |
| 2769 | fs_info->scrub_workers.idle_thresh = 4; | 2867 | if (!fs_info->scrub_workers) { |
| 2770 | ret = btrfs_start_workers(&fs_info->scrub_workers); | 2868 | ret = -ENOMEM; |
| 2771 | if (ret) | ||
| 2772 | goto out; | 2869 | goto out; |
| 2773 | btrfs_init_workers(&fs_info->scrub_wr_completion_workers, | 2870 | } |
| 2774 | "scrubwrc", | 2871 | fs_info->scrub_wr_completion_workers = |
| 2775 | fs_info->thread_pool_size, | 2872 | btrfs_alloc_workqueue("btrfs-scrubwrc", flags, |
| 2776 | &fs_info->generic_worker); | 2873 | max_active, 2); |
| 2777 | fs_info->scrub_wr_completion_workers.idle_thresh = 2; | 2874 | if (!fs_info->scrub_wr_completion_workers) { |
| 2778 | ret = btrfs_start_workers( | 2875 | ret = -ENOMEM; |
| 2779 | &fs_info->scrub_wr_completion_workers); | ||
| 2780 | if (ret) | ||
| 2781 | goto out; | 2876 | goto out; |
| 2782 | btrfs_init_workers(&fs_info->scrub_nocow_workers, "scrubnc", 1, | 2877 | } |
| 2783 | &fs_info->generic_worker); | 2878 | fs_info->scrub_nocow_workers = |
| 2784 | ret = btrfs_start_workers(&fs_info->scrub_nocow_workers); | 2879 | btrfs_alloc_workqueue("btrfs-scrubnc", flags, 1, 0); |
| 2785 | if (ret) | 2880 | if (!fs_info->scrub_nocow_workers) { |
| 2881 | ret = -ENOMEM; | ||
| 2786 | goto out; | 2882 | goto out; |
| 2883 | } | ||
| 2787 | } | 2884 | } |
| 2788 | ++fs_info->scrub_workers_refcnt; | 2885 | ++fs_info->scrub_workers_refcnt; |
| 2789 | out: | 2886 | out: |
| @@ -2793,9 +2890,9 @@ out: | |||
| 2793 | static noinline_for_stack void scrub_workers_put(struct btrfs_fs_info *fs_info) | 2890 | static noinline_for_stack void scrub_workers_put(struct btrfs_fs_info *fs_info) |
| 2794 | { | 2891 | { |
| 2795 | if (--fs_info->scrub_workers_refcnt == 0) { | 2892 | if (--fs_info->scrub_workers_refcnt == 0) { |
| 2796 | btrfs_stop_workers(&fs_info->scrub_workers); | 2893 | btrfs_destroy_workqueue(fs_info->scrub_workers); |
| 2797 | btrfs_stop_workers(&fs_info->scrub_wr_completion_workers); | 2894 | btrfs_destroy_workqueue(fs_info->scrub_wr_completion_workers); |
| 2798 | btrfs_stop_workers(&fs_info->scrub_nocow_workers); | 2895 | btrfs_destroy_workqueue(fs_info->scrub_nocow_workers); |
| 2799 | } | 2896 | } |
| 2800 | WARN_ON(fs_info->scrub_workers_refcnt < 0); | 2897 | WARN_ON(fs_info->scrub_workers_refcnt < 0); |
| 2801 | } | 2898 | } |
| @@ -3106,10 +3203,10 @@ static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len, | |||
| 3106 | nocow_ctx->len = len; | 3203 | nocow_ctx->len = len; |
| 3107 | nocow_ctx->mirror_num = mirror_num; | 3204 | nocow_ctx->mirror_num = mirror_num; |
| 3108 | nocow_ctx->physical_for_dev_replace = physical_for_dev_replace; | 3205 | nocow_ctx->physical_for_dev_replace = physical_for_dev_replace; |
| 3109 | nocow_ctx->work.func = copy_nocow_pages_worker; | 3206 | btrfs_init_work(&nocow_ctx->work, copy_nocow_pages_worker, NULL, NULL); |
| 3110 | INIT_LIST_HEAD(&nocow_ctx->inodes); | 3207 | INIT_LIST_HEAD(&nocow_ctx->inodes); |
| 3111 | btrfs_queue_worker(&fs_info->scrub_nocow_workers, | 3208 | btrfs_queue_work(fs_info->scrub_nocow_workers, |
| 3112 | &nocow_ctx->work); | 3209 | &nocow_ctx->work); |
| 3113 | 3210 | ||
| 3114 | return 0; | 3211 | return 0; |
| 3115 | } | 3212 | } |
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 9dde9717c1b9..1ac3ca98c429 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c | |||
| @@ -51,15 +51,18 @@ struct fs_path { | |||
| 51 | struct { | 51 | struct { |
| 52 | char *start; | 52 | char *start; |
| 53 | char *end; | 53 | char *end; |
| 54 | char *prepared; | ||
| 55 | 54 | ||
| 56 | char *buf; | 55 | char *buf; |
| 57 | int buf_len; | 56 | unsigned short buf_len:15; |
| 58 | unsigned int reversed:1; | 57 | unsigned short reversed:1; |
| 59 | unsigned int virtual_mem:1; | ||
| 60 | char inline_buf[]; | 58 | char inline_buf[]; |
| 61 | }; | 59 | }; |
| 62 | char pad[PAGE_SIZE]; | 60 | /* |
| 61 | * Average path length does not exceed 200 bytes, we'll have | ||
| 62 | * better packing in the slab and higher chance to satisfy | ||
| 63 | * a allocation later during send. | ||
| 64 | */ | ||
| 65 | char pad[256]; | ||
| 63 | }; | 66 | }; |
| 64 | }; | 67 | }; |
| 65 | #define FS_PATH_INLINE_SIZE \ | 68 | #define FS_PATH_INLINE_SIZE \ |
| @@ -109,6 +112,7 @@ struct send_ctx { | |||
| 109 | int cur_inode_deleted; | 112 | int cur_inode_deleted; |
| 110 | u64 cur_inode_size; | 113 | u64 cur_inode_size; |
| 111 | u64 cur_inode_mode; | 114 | u64 cur_inode_mode; |
| 115 | u64 cur_inode_rdev; | ||
| 112 | u64 cur_inode_last_extent; | 116 | u64 cur_inode_last_extent; |
| 113 | 117 | ||
| 114 | u64 send_progress; | 118 | u64 send_progress; |
| @@ -120,6 +124,8 @@ struct send_ctx { | |||
| 120 | struct list_head name_cache_list; | 124 | struct list_head name_cache_list; |
| 121 | int name_cache_size; | 125 | int name_cache_size; |
| 122 | 126 | ||
| 127 | struct file_ra_state ra; | ||
| 128 | |||
| 123 | char *read_buf; | 129 | char *read_buf; |
| 124 | 130 | ||
| 125 | /* | 131 | /* |
| @@ -175,6 +181,47 @@ struct send_ctx { | |||
| 175 | * own move/rename can be performed. | 181 | * own move/rename can be performed. |
| 176 | */ | 182 | */ |
| 177 | struct rb_root waiting_dir_moves; | 183 | struct rb_root waiting_dir_moves; |
| 184 | |||
| 185 | /* | ||
| 186 | * A directory that is going to be rm'ed might have a child directory | ||
| 187 | * which is in the pending directory moves index above. In this case, | ||
| 188 | * the directory can only be removed after the move/rename of its child | ||
| 189 | * is performed. Example: | ||
| 190 | * | ||
| 191 | * Parent snapshot: | ||
| 192 | * | ||
| 193 | * . (ino 256) | ||
| 194 | * |-- a/ (ino 257) | ||
| 195 | * |-- b/ (ino 258) | ||
| 196 | * |-- c/ (ino 259) | ||
| 197 | * | |-- x/ (ino 260) | ||
| 198 | * | | ||
| 199 | * |-- y/ (ino 261) | ||
| 200 | * | ||
| 201 | * Send snapshot: | ||
| 202 | * | ||
| 203 | * . (ino 256) | ||
| 204 | * |-- a/ (ino 257) | ||
| 205 | * |-- b/ (ino 258) | ||
| 206 | * |-- YY/ (ino 261) | ||
| 207 | * |-- x/ (ino 260) | ||
| 208 | * | ||
| 209 | * Sequence of steps that lead to the send snapshot: | ||
| 210 | * rm -f /a/b/c/foo.txt | ||
| 211 | * mv /a/b/y /a/b/YY | ||
| 212 | * mv /a/b/c/x /a/b/YY | ||
| 213 | * rmdir /a/b/c | ||
| 214 | * | ||
| 215 | * When the child is processed, its move/rename is delayed until its | ||
| 216 | * parent is processed (as explained above), but all other operations | ||
| 217 | * like update utimes, chown, chgrp, etc, are performed and the paths | ||
| 218 | * that it uses for those operations must use the orphanized name of | ||
| 219 | * its parent (the directory we're going to rm later), so we need to | ||
| 220 | * memorize that name. | ||
| 221 | * | ||
| 222 | * Indexed by the inode number of the directory to be deleted. | ||
| 223 | */ | ||
| 224 | struct rb_root orphan_dirs; | ||
| 178 | }; | 225 | }; |
| 179 | 226 | ||
| 180 | struct pending_dir_move { | 227 | struct pending_dir_move { |
| @@ -189,6 +236,18 @@ struct pending_dir_move { | |||
| 189 | struct waiting_dir_move { | 236 | struct waiting_dir_move { |
| 190 | struct rb_node node; | 237 | struct rb_node node; |
| 191 | u64 ino; | 238 | u64 ino; |
| 239 | /* | ||
| 240 | * There might be some directory that could not be removed because it | ||
| 241 | * was waiting for this directory inode to be moved first. Therefore | ||
| 242 | * after this directory is moved, we can try to rmdir the ino rmdir_ino. | ||
| 243 | */ | ||
| 244 | u64 rmdir_ino; | ||
| 245 | }; | ||
| 246 | |||
| 247 | struct orphan_dir_info { | ||
| 248 | struct rb_node node; | ||
| 249 | u64 ino; | ||
| 250 | u64 gen; | ||
| 192 | }; | 251 | }; |
| 193 | 252 | ||
| 194 | struct name_cache_entry { | 253 | struct name_cache_entry { |
| @@ -214,6 +273,11 @@ struct name_cache_entry { | |||
| 214 | 273 | ||
| 215 | static int is_waiting_for_move(struct send_ctx *sctx, u64 ino); | 274 | static int is_waiting_for_move(struct send_ctx *sctx, u64 ino); |
| 216 | 275 | ||
| 276 | static struct waiting_dir_move * | ||
| 277 | get_waiting_dir_move(struct send_ctx *sctx, u64 ino); | ||
| 278 | |||
| 279 | static int is_waiting_for_rm(struct send_ctx *sctx, u64 dir_ino); | ||
| 280 | |||
| 217 | static int need_send_hole(struct send_ctx *sctx) | 281 | static int need_send_hole(struct send_ctx *sctx) |
| 218 | { | 282 | { |
| 219 | return (sctx->parent_root && !sctx->cur_inode_new && | 283 | return (sctx->parent_root && !sctx->cur_inode_new && |
| @@ -242,7 +306,6 @@ static struct fs_path *fs_path_alloc(void) | |||
| 242 | if (!p) | 306 | if (!p) |
| 243 | return NULL; | 307 | return NULL; |
| 244 | p->reversed = 0; | 308 | p->reversed = 0; |
| 245 | p->virtual_mem = 0; | ||
| 246 | p->buf = p->inline_buf; | 309 | p->buf = p->inline_buf; |
| 247 | p->buf_len = FS_PATH_INLINE_SIZE; | 310 | p->buf_len = FS_PATH_INLINE_SIZE; |
| 248 | fs_path_reset(p); | 311 | fs_path_reset(p); |
| @@ -265,12 +328,8 @@ static void fs_path_free(struct fs_path *p) | |||
| 265 | { | 328 | { |
| 266 | if (!p) | 329 | if (!p) |
| 267 | return; | 330 | return; |
| 268 | if (p->buf != p->inline_buf) { | 331 | if (p->buf != p->inline_buf) |
| 269 | if (p->virtual_mem) | 332 | kfree(p->buf); |
| 270 | vfree(p->buf); | ||
| 271 | else | ||
| 272 | kfree(p->buf); | ||
| 273 | } | ||
| 274 | kfree(p); | 333 | kfree(p); |
| 275 | } | 334 | } |
| 276 | 335 | ||
| @@ -292,40 +351,23 @@ static int fs_path_ensure_buf(struct fs_path *p, int len) | |||
| 292 | 351 | ||
| 293 | path_len = p->end - p->start; | 352 | path_len = p->end - p->start; |
| 294 | old_buf_len = p->buf_len; | 353 | old_buf_len = p->buf_len; |
| 295 | len = PAGE_ALIGN(len); | 354 | |
| 296 | 355 | /* | |
| 297 | if (p->buf == p->inline_buf) { | 356 | * First time the inline_buf does not suffice |
| 298 | tmp_buf = kmalloc(len, GFP_NOFS | __GFP_NOWARN); | 357 | */ |
| 299 | if (!tmp_buf) { | 358 | if (p->buf == p->inline_buf) |
| 300 | tmp_buf = vmalloc(len); | 359 | tmp_buf = kmalloc(len, GFP_NOFS); |
| 301 | if (!tmp_buf) | 360 | else |
| 302 | return -ENOMEM; | 361 | tmp_buf = krealloc(p->buf, len, GFP_NOFS); |
| 303 | p->virtual_mem = 1; | 362 | if (!tmp_buf) |
| 304 | } | 363 | return -ENOMEM; |
| 305 | memcpy(tmp_buf, p->buf, p->buf_len); | 364 | p->buf = tmp_buf; |
| 306 | p->buf = tmp_buf; | 365 | /* |
| 307 | p->buf_len = len; | 366 | * The real size of the buffer is bigger, this will let the fast path |
| 308 | } else { | 367 | * happen most of the time |
| 309 | if (p->virtual_mem) { | 368 | */ |
| 310 | tmp_buf = vmalloc(len); | 369 | p->buf_len = ksize(p->buf); |
| 311 | if (!tmp_buf) | 370 | |
| 312 | return -ENOMEM; | ||
| 313 | memcpy(tmp_buf, p->buf, p->buf_len); | ||
| 314 | vfree(p->buf); | ||
| 315 | } else { | ||
| 316 | tmp_buf = krealloc(p->buf, len, GFP_NOFS); | ||
| 317 | if (!tmp_buf) { | ||
| 318 | tmp_buf = vmalloc(len); | ||
| 319 | if (!tmp_buf) | ||
| 320 | return -ENOMEM; | ||
| 321 | memcpy(tmp_buf, p->buf, p->buf_len); | ||
| 322 | kfree(p->buf); | ||
| 323 | p->virtual_mem = 1; | ||
| 324 | } | ||
| 325 | } | ||
| 326 | p->buf = tmp_buf; | ||
| 327 | p->buf_len = len; | ||
| 328 | } | ||
| 329 | if (p->reversed) { | 371 | if (p->reversed) { |
| 330 | tmp_buf = p->buf + old_buf_len - path_len - 1; | 372 | tmp_buf = p->buf + old_buf_len - path_len - 1; |
| 331 | p->end = p->buf + p->buf_len - 1; | 373 | p->end = p->buf + p->buf_len - 1; |
| @@ -338,7 +380,8 @@ static int fs_path_ensure_buf(struct fs_path *p, int len) | |||
| 338 | return 0; | 380 | return 0; |
| 339 | } | 381 | } |
| 340 | 382 | ||
| 341 | static int fs_path_prepare_for_add(struct fs_path *p, int name_len) | 383 | static int fs_path_prepare_for_add(struct fs_path *p, int name_len, |
| 384 | char **prepared) | ||
| 342 | { | 385 | { |
| 343 | int ret; | 386 | int ret; |
| 344 | int new_len; | 387 | int new_len; |
| @@ -354,11 +397,11 @@ static int fs_path_prepare_for_add(struct fs_path *p, int name_len) | |||
| 354 | if (p->start != p->end) | 397 | if (p->start != p->end) |
| 355 | *--p->start = '/'; | 398 | *--p->start = '/'; |
| 356 | p->start -= name_len; | 399 | p->start -= name_len; |
| 357 | p->prepared = p->start; | 400 | *prepared = p->start; |
| 358 | } else { | 401 | } else { |
| 359 | if (p->start != p->end) | 402 | if (p->start != p->end) |
| 360 | *p->end++ = '/'; | 403 | *p->end++ = '/'; |
| 361 | p->prepared = p->end; | 404 | *prepared = p->end; |
| 362 | p->end += name_len; | 405 | p->end += name_len; |
| 363 | *p->end = 0; | 406 | *p->end = 0; |
| 364 | } | 407 | } |
| @@ -370,12 +413,12 @@ out: | |||
| 370 | static int fs_path_add(struct fs_path *p, const char *name, int name_len) | 413 | static int fs_path_add(struct fs_path *p, const char *name, int name_len) |
| 371 | { | 414 | { |
| 372 | int ret; | 415 | int ret; |
| 416 | char *prepared; | ||
| 373 | 417 | ||
| 374 | ret = fs_path_prepare_for_add(p, name_len); | 418 | ret = fs_path_prepare_for_add(p, name_len, &prepared); |
| 375 | if (ret < 0) | 419 | if (ret < 0) |
| 376 | goto out; | 420 | goto out; |
| 377 | memcpy(p->prepared, name, name_len); | 421 | memcpy(prepared, name, name_len); |
| 378 | p->prepared = NULL; | ||
| 379 | 422 | ||
| 380 | out: | 423 | out: |
| 381 | return ret; | 424 | return ret; |
| @@ -384,12 +427,12 @@ out: | |||
| 384 | static int fs_path_add_path(struct fs_path *p, struct fs_path *p2) | 427 | static int fs_path_add_path(struct fs_path *p, struct fs_path *p2) |
| 385 | { | 428 | { |
| 386 | int ret; | 429 | int ret; |
| 430 | char *prepared; | ||
| 387 | 431 | ||
| 388 | ret = fs_path_prepare_for_add(p, p2->end - p2->start); | 432 | ret = fs_path_prepare_for_add(p, p2->end - p2->start, &prepared); |
| 389 | if (ret < 0) | 433 | if (ret < 0) |
| 390 | goto out; | 434 | goto out; |
| 391 | memcpy(p->prepared, p2->start, p2->end - p2->start); | 435 | memcpy(prepared, p2->start, p2->end - p2->start); |
| 392 | p->prepared = NULL; | ||
| 393 | 436 | ||
| 394 | out: | 437 | out: |
| 395 | return ret; | 438 | return ret; |
| @@ -400,13 +443,13 @@ static int fs_path_add_from_extent_buffer(struct fs_path *p, | |||
| 400 | unsigned long off, int len) | 443 | unsigned long off, int len) |
| 401 | { | 444 | { |
| 402 | int ret; | 445 | int ret; |
| 446 | char *prepared; | ||
| 403 | 447 | ||
| 404 | ret = fs_path_prepare_for_add(p, len); | 448 | ret = fs_path_prepare_for_add(p, len, &prepared); |
| 405 | if (ret < 0) | 449 | if (ret < 0) |
| 406 | goto out; | 450 | goto out; |
| 407 | 451 | ||
| 408 | read_extent_buffer(eb, p->prepared, off, len); | 452 | read_extent_buffer(eb, prepared, off, len); |
| 409 | p->prepared = NULL; | ||
| 410 | 453 | ||
| 411 | out: | 454 | out: |
| 412 | return ret; | 455 | return ret; |
| @@ -450,6 +493,7 @@ static struct btrfs_path *alloc_path_for_send(void) | |||
| 450 | return NULL; | 493 | return NULL; |
| 451 | path->search_commit_root = 1; | 494 | path->search_commit_root = 1; |
| 452 | path->skip_locking = 1; | 495 | path->skip_locking = 1; |
| 496 | path->need_commit_sem = 1; | ||
| 453 | return path; | 497 | return path; |
| 454 | } | 498 | } |
| 455 | 499 | ||
| @@ -728,29 +772,22 @@ out: | |||
| 728 | /* | 772 | /* |
| 729 | * Helper function to retrieve some fields from an inode item. | 773 | * Helper function to retrieve some fields from an inode item. |
| 730 | */ | 774 | */ |
| 731 | static int get_inode_info(struct btrfs_root *root, | 775 | static int __get_inode_info(struct btrfs_root *root, struct btrfs_path *path, |
| 732 | u64 ino, u64 *size, u64 *gen, | 776 | u64 ino, u64 *size, u64 *gen, u64 *mode, u64 *uid, |
| 733 | u64 *mode, u64 *uid, u64 *gid, | 777 | u64 *gid, u64 *rdev) |
| 734 | u64 *rdev) | ||
| 735 | { | 778 | { |
| 736 | int ret; | 779 | int ret; |
| 737 | struct btrfs_inode_item *ii; | 780 | struct btrfs_inode_item *ii; |
| 738 | struct btrfs_key key; | 781 | struct btrfs_key key; |
| 739 | struct btrfs_path *path; | ||
| 740 | |||
| 741 | path = alloc_path_for_send(); | ||
| 742 | if (!path) | ||
| 743 | return -ENOMEM; | ||
| 744 | 782 | ||
| 745 | key.objectid = ino; | 783 | key.objectid = ino; |
| 746 | key.type = BTRFS_INODE_ITEM_KEY; | 784 | key.type = BTRFS_INODE_ITEM_KEY; |
| 747 | key.offset = 0; | 785 | key.offset = 0; |
| 748 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | 786 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); |
| 749 | if (ret < 0) | ||
| 750 | goto out; | ||
| 751 | if (ret) { | 787 | if (ret) { |
| 752 | ret = -ENOENT; | 788 | if (ret > 0) |
| 753 | goto out; | 789 | ret = -ENOENT; |
| 790 | return ret; | ||
| 754 | } | 791 | } |
| 755 | 792 | ||
| 756 | ii = btrfs_item_ptr(path->nodes[0], path->slots[0], | 793 | ii = btrfs_item_ptr(path->nodes[0], path->slots[0], |
| @@ -768,7 +805,22 @@ static int get_inode_info(struct btrfs_root *root, | |||
| 768 | if (rdev) | 805 | if (rdev) |
| 769 | *rdev = btrfs_inode_rdev(path->nodes[0], ii); | 806 | *rdev = btrfs_inode_rdev(path->nodes[0], ii); |
| 770 | 807 | ||
| 771 | out: | 808 | return ret; |
| 809 | } | ||
| 810 | |||
| 811 | static int get_inode_info(struct btrfs_root *root, | ||
| 812 | u64 ino, u64 *size, u64 *gen, | ||
| 813 | u64 *mode, u64 *uid, u64 *gid, | ||
| 814 | u64 *rdev) | ||
| 815 | { | ||
| 816 | struct btrfs_path *path; | ||
| 817 | int ret; | ||
| 818 | |||
| 819 | path = alloc_path_for_send(); | ||
| 820 | if (!path) | ||
| 821 | return -ENOMEM; | ||
| 822 | ret = __get_inode_info(root, path, ino, size, gen, mode, uid, gid, | ||
| 823 | rdev); | ||
| 772 | btrfs_free_path(path); | 824 | btrfs_free_path(path); |
| 773 | return ret; | 825 | return ret; |
| 774 | } | 826 | } |
| @@ -915,9 +967,7 @@ static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path, | |||
| 915 | struct btrfs_dir_item *di; | 967 | struct btrfs_dir_item *di; |
| 916 | struct btrfs_key di_key; | 968 | struct btrfs_key di_key; |
| 917 | char *buf = NULL; | 969 | char *buf = NULL; |
| 918 | char *buf2 = NULL; | 970 | const int buf_len = PATH_MAX; |
| 919 | int buf_len; | ||
| 920 | int buf_virtual = 0; | ||
| 921 | u32 name_len; | 971 | u32 name_len; |
| 922 | u32 data_len; | 972 | u32 data_len; |
| 923 | u32 cur; | 973 | u32 cur; |
| @@ -927,7 +977,6 @@ static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path, | |||
| 927 | int num; | 977 | int num; |
| 928 | u8 type; | 978 | u8 type; |
| 929 | 979 | ||
| 930 | buf_len = PAGE_SIZE; | ||
| 931 | buf = kmalloc(buf_len, GFP_NOFS); | 980 | buf = kmalloc(buf_len, GFP_NOFS); |
| 932 | if (!buf) { | 981 | if (!buf) { |
| 933 | ret = -ENOMEM; | 982 | ret = -ENOMEM; |
| @@ -949,30 +998,12 @@ static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path, | |||
| 949 | type = btrfs_dir_type(eb, di); | 998 | type = btrfs_dir_type(eb, di); |
| 950 | btrfs_dir_item_key_to_cpu(eb, di, &di_key); | 999 | btrfs_dir_item_key_to_cpu(eb, di, &di_key); |
| 951 | 1000 | ||
| 1001 | /* | ||
| 1002 | * Path too long | ||
| 1003 | */ | ||
| 952 | if (name_len + data_len > buf_len) { | 1004 | if (name_len + data_len > buf_len) { |
| 953 | buf_len = PAGE_ALIGN(name_len + data_len); | 1005 | ret = -ENAMETOOLONG; |
| 954 | if (buf_virtual) { | 1006 | goto out; |
| 955 | buf2 = vmalloc(buf_len); | ||
| 956 | if (!buf2) { | ||
| 957 | ret = -ENOMEM; | ||
| 958 | goto out; | ||
| 959 | } | ||
| 960 | vfree(buf); | ||
| 961 | } else { | ||
| 962 | buf2 = krealloc(buf, buf_len, GFP_NOFS); | ||
| 963 | if (!buf2) { | ||
| 964 | buf2 = vmalloc(buf_len); | ||
| 965 | if (!buf2) { | ||
| 966 | ret = -ENOMEM; | ||
| 967 | goto out; | ||
| 968 | } | ||
| 969 | kfree(buf); | ||
| 970 | buf_virtual = 1; | ||
| 971 | } | ||
| 972 | } | ||
| 973 | |||
| 974 | buf = buf2; | ||
| 975 | buf2 = NULL; | ||
| 976 | } | 1007 | } |
| 977 | 1008 | ||
| 978 | read_extent_buffer(eb, buf, (unsigned long)(di + 1), | 1009 | read_extent_buffer(eb, buf, (unsigned long)(di + 1), |
| @@ -995,10 +1026,7 @@ static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path, | |||
| 995 | } | 1026 | } |
| 996 | 1027 | ||
| 997 | out: | 1028 | out: |
| 998 | if (buf_virtual) | 1029 | kfree(buf); |
| 999 | vfree(buf); | ||
| 1000 | else | ||
| 1001 | kfree(buf); | ||
| 1002 | return ret; | 1030 | return ret; |
| 1003 | } | 1031 | } |
| 1004 | 1032 | ||
| @@ -1066,6 +1094,7 @@ out: | |||
| 1066 | struct backref_ctx { | 1094 | struct backref_ctx { |
| 1067 | struct send_ctx *sctx; | 1095 | struct send_ctx *sctx; |
| 1068 | 1096 | ||
| 1097 | struct btrfs_path *path; | ||
| 1069 | /* number of total found references */ | 1098 | /* number of total found references */ |
| 1070 | u64 found; | 1099 | u64 found; |
| 1071 | 1100 | ||
| @@ -1136,8 +1165,9 @@ static int __iterate_backrefs(u64 ino, u64 offset, u64 root, void *ctx_) | |||
| 1136 | * There are inodes that have extents that lie behind its i_size. Don't | 1165 | * There are inodes that have extents that lie behind its i_size. Don't |
| 1137 | * accept clones from these extents. | 1166 | * accept clones from these extents. |
| 1138 | */ | 1167 | */ |
| 1139 | ret = get_inode_info(found->root, ino, &i_size, NULL, NULL, NULL, NULL, | 1168 | ret = __get_inode_info(found->root, bctx->path, ino, &i_size, NULL, NULL, |
| 1140 | NULL); | 1169 | NULL, NULL, NULL); |
| 1170 | btrfs_release_path(bctx->path); | ||
| 1141 | if (ret < 0) | 1171 | if (ret < 0) |
| 1142 | return ret; | 1172 | return ret; |
| 1143 | 1173 | ||
| @@ -1216,12 +1246,17 @@ static int find_extent_clone(struct send_ctx *sctx, | |||
| 1216 | if (!tmp_path) | 1246 | if (!tmp_path) |
| 1217 | return -ENOMEM; | 1247 | return -ENOMEM; |
| 1218 | 1248 | ||
| 1249 | /* We only use this path under the commit sem */ | ||
| 1250 | tmp_path->need_commit_sem = 0; | ||
| 1251 | |||
| 1219 | backref_ctx = kmalloc(sizeof(*backref_ctx), GFP_NOFS); | 1252 | backref_ctx = kmalloc(sizeof(*backref_ctx), GFP_NOFS); |
| 1220 | if (!backref_ctx) { | 1253 | if (!backref_ctx) { |
| 1221 | ret = -ENOMEM; | 1254 | ret = -ENOMEM; |
| 1222 | goto out; | 1255 | goto out; |
| 1223 | } | 1256 | } |
| 1224 | 1257 | ||
| 1258 | backref_ctx->path = tmp_path; | ||
| 1259 | |||
| 1225 | if (data_offset >= ino_size) { | 1260 | if (data_offset >= ino_size) { |
| 1226 | /* | 1261 | /* |
| 1227 | * There may be extents that lie behind the file's size. | 1262 | * There may be extents that lie behind the file's size. |
| @@ -1249,8 +1284,10 @@ static int find_extent_clone(struct send_ctx *sctx, | |||
| 1249 | } | 1284 | } |
| 1250 | logical = disk_byte + btrfs_file_extent_offset(eb, fi); | 1285 | logical = disk_byte + btrfs_file_extent_offset(eb, fi); |
| 1251 | 1286 | ||
| 1287 | down_read(&sctx->send_root->fs_info->commit_root_sem); | ||
| 1252 | ret = extent_from_logical(sctx->send_root->fs_info, disk_byte, tmp_path, | 1288 | ret = extent_from_logical(sctx->send_root->fs_info, disk_byte, tmp_path, |
| 1253 | &found_key, &flags); | 1289 | &found_key, &flags); |
| 1290 | up_read(&sctx->send_root->fs_info->commit_root_sem); | ||
| 1254 | btrfs_release_path(tmp_path); | 1291 | btrfs_release_path(tmp_path); |
| 1255 | 1292 | ||
| 1256 | if (ret < 0) | 1293 | if (ret < 0) |
| @@ -1292,8 +1329,6 @@ static int find_extent_clone(struct send_ctx *sctx, | |||
| 1292 | extent_item_pos = logical - found_key.objectid; | 1329 | extent_item_pos = logical - found_key.objectid; |
| 1293 | else | 1330 | else |
| 1294 | extent_item_pos = 0; | 1331 | extent_item_pos = 0; |
| 1295 | |||
| 1296 | extent_item_pos = logical - found_key.objectid; | ||
| 1297 | ret = iterate_extent_inodes(sctx->send_root->fs_info, | 1332 | ret = iterate_extent_inodes(sctx->send_root->fs_info, |
| 1298 | found_key.objectid, extent_item_pos, 1, | 1333 | found_key.objectid, extent_item_pos, 1, |
| 1299 | __iterate_backrefs, backref_ctx); | 1334 | __iterate_backrefs, backref_ctx); |
| @@ -1418,11 +1453,7 @@ static int gen_unique_name(struct send_ctx *sctx, | |||
| 1418 | while (1) { | 1453 | while (1) { |
| 1419 | len = snprintf(tmp, sizeof(tmp), "o%llu-%llu-%llu", | 1454 | len = snprintf(tmp, sizeof(tmp), "o%llu-%llu-%llu", |
| 1420 | ino, gen, idx); | 1455 | ino, gen, idx); |
| 1421 | if (len >= sizeof(tmp)) { | 1456 | ASSERT(len < sizeof(tmp)); |
| 1422 | /* should really not happen */ | ||
| 1423 | ret = -EOVERFLOW; | ||
| 1424 | goto out; | ||
| 1425 | } | ||
| 1426 | 1457 | ||
| 1427 | di = btrfs_lookup_dir_item(NULL, sctx->send_root, | 1458 | di = btrfs_lookup_dir_item(NULL, sctx->send_root, |
| 1428 | path, BTRFS_FIRST_FREE_OBJECTID, | 1459 | path, BTRFS_FIRST_FREE_OBJECTID, |
| @@ -1898,13 +1929,20 @@ static void name_cache_delete(struct send_ctx *sctx, | |||
| 1898 | 1929 | ||
| 1899 | nce_head = radix_tree_lookup(&sctx->name_cache, | 1930 | nce_head = radix_tree_lookup(&sctx->name_cache, |
| 1900 | (unsigned long)nce->ino); | 1931 | (unsigned long)nce->ino); |
| 1901 | BUG_ON(!nce_head); | 1932 | if (!nce_head) { |
| 1933 | btrfs_err(sctx->send_root->fs_info, | ||
| 1934 | "name_cache_delete lookup failed ino %llu cache size %d, leaking memory", | ||
| 1935 | nce->ino, sctx->name_cache_size); | ||
| 1936 | } | ||
| 1902 | 1937 | ||
| 1903 | list_del(&nce->radix_list); | 1938 | list_del(&nce->radix_list); |
| 1904 | list_del(&nce->list); | 1939 | list_del(&nce->list); |
| 1905 | sctx->name_cache_size--; | 1940 | sctx->name_cache_size--; |
| 1906 | 1941 | ||
| 1907 | if (list_empty(nce_head)) { | 1942 | /* |
| 1943 | * We may not get to the final release of nce_head if the lookup fails | ||
| 1944 | */ | ||
| 1945 | if (nce_head && list_empty(nce_head)) { | ||
| 1908 | radix_tree_delete(&sctx->name_cache, (unsigned long)nce->ino); | 1946 | radix_tree_delete(&sctx->name_cache, (unsigned long)nce->ino); |
| 1909 | kfree(nce_head); | 1947 | kfree(nce_head); |
| 1910 | } | 1948 | } |
| @@ -1977,7 +2015,6 @@ static void name_cache_free(struct send_ctx *sctx) | |||
| 1977 | */ | 2015 | */ |
| 1978 | static int __get_cur_name_and_parent(struct send_ctx *sctx, | 2016 | static int __get_cur_name_and_parent(struct send_ctx *sctx, |
| 1979 | u64 ino, u64 gen, | 2017 | u64 ino, u64 gen, |
| 1980 | int skip_name_cache, | ||
| 1981 | u64 *parent_ino, | 2018 | u64 *parent_ino, |
| 1982 | u64 *parent_gen, | 2019 | u64 *parent_gen, |
| 1983 | struct fs_path *dest) | 2020 | struct fs_path *dest) |
| @@ -1987,8 +2024,6 @@ static int __get_cur_name_and_parent(struct send_ctx *sctx, | |||
| 1987 | struct btrfs_path *path = NULL; | 2024 | struct btrfs_path *path = NULL; |
| 1988 | struct name_cache_entry *nce = NULL; | 2025 | struct name_cache_entry *nce = NULL; |
| 1989 | 2026 | ||
| 1990 | if (skip_name_cache) | ||
| 1991 | goto get_ref; | ||
| 1992 | /* | 2027 | /* |
| 1993 | * First check if we already did a call to this function with the same | 2028 | * First check if we already did a call to this function with the same |
| 1994 | * ino/gen. If yes, check if the cache entry is still up-to-date. If yes | 2029 | * ino/gen. If yes, check if the cache entry is still up-to-date. If yes |
| @@ -2033,12 +2068,11 @@ static int __get_cur_name_and_parent(struct send_ctx *sctx, | |||
| 2033 | goto out_cache; | 2068 | goto out_cache; |
| 2034 | } | 2069 | } |
| 2035 | 2070 | ||
| 2036 | get_ref: | ||
| 2037 | /* | 2071 | /* |
| 2038 | * Depending on whether the inode was already processed or not, use | 2072 | * Depending on whether the inode was already processed or not, use |
| 2039 | * send_root or parent_root for ref lookup. | 2073 | * send_root or parent_root for ref lookup. |
| 2040 | */ | 2074 | */ |
| 2041 | if (ino < sctx->send_progress && !skip_name_cache) | 2075 | if (ino < sctx->send_progress) |
| 2042 | ret = get_first_ref(sctx->send_root, ino, | 2076 | ret = get_first_ref(sctx->send_root, ino, |
| 2043 | parent_ino, parent_gen, dest); | 2077 | parent_ino, parent_gen, dest); |
| 2044 | else | 2078 | else |
| @@ -2062,8 +2096,6 @@ get_ref: | |||
| 2062 | goto out; | 2096 | goto out; |
| 2063 | ret = 1; | 2097 | ret = 1; |
| 2064 | } | 2098 | } |
| 2065 | if (skip_name_cache) | ||
| 2066 | goto out; | ||
| 2067 | 2099 | ||
| 2068 | out_cache: | 2100 | out_cache: |
| 2069 | /* | 2101 | /* |
| @@ -2131,9 +2163,6 @@ static int get_cur_path(struct send_ctx *sctx, u64 ino, u64 gen, | |||
| 2131 | u64 parent_inode = 0; | 2163 | u64 parent_inode = 0; |
| 2132 | u64 parent_gen = 0; | 2164 | u64 parent_gen = 0; |
| 2133 | int stop = 0; | 2165 | int stop = 0; |
| 2134 | u64 start_ino = ino; | ||
| 2135 | u64 start_gen = gen; | ||
| 2136 | int skip_name_cache = 0; | ||
| 2137 | 2166 | ||
| 2138 | name = fs_path_alloc(); | 2167 | name = fs_path_alloc(); |
| 2139 | if (!name) { | 2168 | if (!name) { |
| @@ -2141,31 +2170,33 @@ static int get_cur_path(struct send_ctx *sctx, u64 ino, u64 gen, | |||
| 2141 | goto out; | 2170 | goto out; |
| 2142 | } | 2171 | } |
| 2143 | 2172 | ||
| 2144 | if (is_waiting_for_move(sctx, ino)) | ||
| 2145 | skip_name_cache = 1; | ||
| 2146 | |||
| 2147 | again: | ||
| 2148 | dest->reversed = 1; | 2173 | dest->reversed = 1; |
| 2149 | fs_path_reset(dest); | 2174 | fs_path_reset(dest); |
| 2150 | 2175 | ||
| 2151 | while (!stop && ino != BTRFS_FIRST_FREE_OBJECTID) { | 2176 | while (!stop && ino != BTRFS_FIRST_FREE_OBJECTID) { |
| 2152 | fs_path_reset(name); | 2177 | fs_path_reset(name); |
| 2153 | 2178 | ||
| 2154 | ret = __get_cur_name_and_parent(sctx, ino, gen, skip_name_cache, | 2179 | if (is_waiting_for_rm(sctx, ino)) { |
| 2155 | &parent_inode, &parent_gen, name); | 2180 | ret = gen_unique_name(sctx, ino, gen, name); |
| 2181 | if (ret < 0) | ||
| 2182 | goto out; | ||
| 2183 | ret = fs_path_add_path(dest, name); | ||
| 2184 | break; | ||
| 2185 | } | ||
| 2186 | |||
| 2187 | if (is_waiting_for_move(sctx, ino)) { | ||
| 2188 | ret = get_first_ref(sctx->parent_root, ino, | ||
| 2189 | &parent_inode, &parent_gen, name); | ||
| 2190 | } else { | ||
| 2191 | ret = __get_cur_name_and_parent(sctx, ino, gen, | ||
| 2192 | &parent_inode, | ||
| 2193 | &parent_gen, name); | ||
| 2194 | if (ret) | ||
| 2195 | stop = 1; | ||
| 2196 | } | ||
| 2197 | |||
| 2156 | if (ret < 0) | 2198 | if (ret < 0) |
| 2157 | goto out; | 2199 | goto out; |
| 2158 | if (ret) | ||
| 2159 | stop = 1; | ||
| 2160 | |||
| 2161 | if (!skip_name_cache && | ||
| 2162 | is_waiting_for_move(sctx, parent_inode)) { | ||
| 2163 | ino = start_ino; | ||
| 2164 | gen = start_gen; | ||
| 2165 | stop = 0; | ||
| 2166 | skip_name_cache = 1; | ||
| 2167 | goto again; | ||
| 2168 | } | ||
| 2169 | 2200 | ||
| 2170 | ret = fs_path_add_path(dest, name); | 2201 | ret = fs_path_add_path(dest, name); |
| 2171 | if (ret < 0) | 2202 | if (ret < 0) |
| @@ -2429,10 +2460,16 @@ verbose_printk("btrfs: send_create_inode %llu\n", ino); | |||
| 2429 | if (!p) | 2460 | if (!p) |
| 2430 | return -ENOMEM; | 2461 | return -ENOMEM; |
| 2431 | 2462 | ||
| 2432 | ret = get_inode_info(sctx->send_root, ino, NULL, &gen, &mode, NULL, | 2463 | if (ino != sctx->cur_ino) { |
| 2433 | NULL, &rdev); | 2464 | ret = get_inode_info(sctx->send_root, ino, NULL, &gen, &mode, |
| 2434 | if (ret < 0) | 2465 | NULL, NULL, &rdev); |
| 2435 | goto out; | 2466 | if (ret < 0) |
| 2467 | goto out; | ||
| 2468 | } else { | ||
| 2469 | gen = sctx->cur_inode_gen; | ||
| 2470 | mode = sctx->cur_inode_mode; | ||
| 2471 | rdev = sctx->cur_inode_rdev; | ||
| 2472 | } | ||
| 2436 | 2473 | ||
| 2437 | if (S_ISREG(mode)) { | 2474 | if (S_ISREG(mode)) { |
| 2438 | cmd = BTRFS_SEND_C_MKFILE; | 2475 | cmd = BTRFS_SEND_C_MKFILE; |
| @@ -2512,17 +2549,26 @@ static int did_create_dir(struct send_ctx *sctx, u64 dir) | |||
| 2512 | key.objectid = dir; | 2549 | key.objectid = dir; |
| 2513 | key.type = BTRFS_DIR_INDEX_KEY; | 2550 | key.type = BTRFS_DIR_INDEX_KEY; |
| 2514 | key.offset = 0; | 2551 | key.offset = 0; |
| 2552 | ret = btrfs_search_slot(NULL, sctx->send_root, &key, path, 0, 0); | ||
| 2553 | if (ret < 0) | ||
| 2554 | goto out; | ||
| 2555 | |||
| 2515 | while (1) { | 2556 | while (1) { |
| 2516 | ret = btrfs_search_slot_for_read(sctx->send_root, &key, path, | 2557 | eb = path->nodes[0]; |
| 2517 | 1, 0); | 2558 | slot = path->slots[0]; |
| 2518 | if (ret < 0) | 2559 | if (slot >= btrfs_header_nritems(eb)) { |
| 2519 | goto out; | 2560 | ret = btrfs_next_leaf(sctx->send_root, path); |
| 2520 | if (!ret) { | 2561 | if (ret < 0) { |
| 2521 | eb = path->nodes[0]; | 2562 | goto out; |
| 2522 | slot = path->slots[0]; | 2563 | } else if (ret > 0) { |
| 2523 | btrfs_item_key_to_cpu(eb, &found_key, slot); | 2564 | ret = 0; |
| 2565 | break; | ||
| 2566 | } | ||
| 2567 | continue; | ||
| 2524 | } | 2568 | } |
| 2525 | if (ret || found_key.objectid != key.objectid || | 2569 | |
| 2570 | btrfs_item_key_to_cpu(eb, &found_key, slot); | ||
| 2571 | if (found_key.objectid != key.objectid || | ||
| 2526 | found_key.type != key.type) { | 2572 | found_key.type != key.type) { |
| 2527 | ret = 0; | 2573 | ret = 0; |
| 2528 | goto out; | 2574 | goto out; |
| @@ -2537,8 +2583,7 @@ static int did_create_dir(struct send_ctx *sctx, u64 dir) | |||
| 2537 | goto out; | 2583 | goto out; |
| 2538 | } | 2584 | } |
| 2539 | 2585 | ||
| 2540 | key.offset = found_key.offset + 1; | 2586 | path->slots[0]++; |
| 2541 | btrfs_release_path(path); | ||
| 2542 | } | 2587 | } |
| 2543 | 2588 | ||
| 2544 | out: | 2589 | out: |
| @@ -2590,7 +2635,7 @@ struct recorded_ref { | |||
| 2590 | * everything mixed. So we first record all refs and later process them. | 2635 | * everything mixed. So we first record all refs and later process them. |
| 2591 | * This function is a helper to record one ref. | 2636 | * This function is a helper to record one ref. |
| 2592 | */ | 2637 | */ |
| 2593 | static int record_ref(struct list_head *head, u64 dir, | 2638 | static int __record_ref(struct list_head *head, u64 dir, |
| 2594 | u64 dir_gen, struct fs_path *path) | 2639 | u64 dir_gen, struct fs_path *path) |
| 2595 | { | 2640 | { |
| 2596 | struct recorded_ref *ref; | 2641 | struct recorded_ref *ref; |
| @@ -2676,12 +2721,78 @@ out: | |||
| 2676 | return ret; | 2721 | return ret; |
| 2677 | } | 2722 | } |
| 2678 | 2723 | ||
| 2724 | static struct orphan_dir_info * | ||
| 2725 | add_orphan_dir_info(struct send_ctx *sctx, u64 dir_ino) | ||
| 2726 | { | ||
| 2727 | struct rb_node **p = &sctx->orphan_dirs.rb_node; | ||
| 2728 | struct rb_node *parent = NULL; | ||
| 2729 | struct orphan_dir_info *entry, *odi; | ||
| 2730 | |||
| 2731 | odi = kmalloc(sizeof(*odi), GFP_NOFS); | ||
| 2732 | if (!odi) | ||
| 2733 | return ERR_PTR(-ENOMEM); | ||
| 2734 | odi->ino = dir_ino; | ||
| 2735 | odi->gen = 0; | ||
| 2736 | |||
| 2737 | while (*p) { | ||
| 2738 | parent = *p; | ||
| 2739 | entry = rb_entry(parent, struct orphan_dir_info, node); | ||
| 2740 | if (dir_ino < entry->ino) { | ||
| 2741 | p = &(*p)->rb_left; | ||
| 2742 | } else if (dir_ino > entry->ino) { | ||
| 2743 | p = &(*p)->rb_right; | ||
| 2744 | } else { | ||
| 2745 | kfree(odi); | ||
| 2746 | return entry; | ||
| 2747 | } | ||
| 2748 | } | ||
| 2749 | |||
| 2750 | rb_link_node(&odi->node, parent, p); | ||
| 2751 | rb_insert_color(&odi->node, &sctx->orphan_dirs); | ||
| 2752 | return odi; | ||
| 2753 | } | ||
| 2754 | |||
| 2755 | static struct orphan_dir_info * | ||
| 2756 | get_orphan_dir_info(struct send_ctx *sctx, u64 dir_ino) | ||
| 2757 | { | ||
| 2758 | struct rb_node *n = sctx->orphan_dirs.rb_node; | ||
| 2759 | struct orphan_dir_info *entry; | ||
| 2760 | |||
| 2761 | while (n) { | ||
| 2762 | entry = rb_entry(n, struct orphan_dir_info, node); | ||
| 2763 | if (dir_ino < entry->ino) | ||
| 2764 | n = n->rb_left; | ||
| 2765 | else if (dir_ino > entry->ino) | ||
| 2766 | n = n->rb_right; | ||
| 2767 | else | ||
| 2768 | return entry; | ||
| 2769 | } | ||
| 2770 | return NULL; | ||
| 2771 | } | ||
| 2772 | |||
| 2773 | static int is_waiting_for_rm(struct send_ctx *sctx, u64 dir_ino) | ||
| 2774 | { | ||
| 2775 | struct orphan_dir_info *odi = get_orphan_dir_info(sctx, dir_ino); | ||
| 2776 | |||
| 2777 | return odi != NULL; | ||
| 2778 | } | ||
| 2779 | |||
| 2780 | static void free_orphan_dir_info(struct send_ctx *sctx, | ||
| 2781 | struct orphan_dir_info *odi) | ||
| 2782 | { | ||
| 2783 | if (!odi) | ||
| 2784 | return; | ||
| 2785 | rb_erase(&odi->node, &sctx->orphan_dirs); | ||
| 2786 | kfree(odi); | ||
| 2787 | } | ||
| 2788 | |||
| 2679 | /* | 2789 | /* |
| 2680 | * Returns 1 if a directory can be removed at this point in time. | 2790 | * Returns 1 if a directory can be removed at this point in time. |
| 2681 | * We check this by iterating all dir items and checking if the inode behind | 2791 | * We check this by iterating all dir items and checking if the inode behind |
| 2682 | * the dir item was already processed. | 2792 | * the dir item was already processed. |
| 2683 | */ | 2793 | */ |
| 2684 | static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 send_progress) | 2794 | static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 dir_gen, |
| 2795 | u64 send_progress) | ||
| 2685 | { | 2796 | { |
| 2686 | int ret = 0; | 2797 | int ret = 0; |
| 2687 | struct btrfs_root *root = sctx->parent_root; | 2798 | struct btrfs_root *root = sctx->parent_root; |
| @@ -2704,31 +2815,52 @@ static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 send_progress) | |||
| 2704 | key.objectid = dir; | 2815 | key.objectid = dir; |
| 2705 | key.type = BTRFS_DIR_INDEX_KEY; | 2816 | key.type = BTRFS_DIR_INDEX_KEY; |
| 2706 | key.offset = 0; | 2817 | key.offset = 0; |
| 2818 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | ||
| 2819 | if (ret < 0) | ||
| 2820 | goto out; | ||
| 2707 | 2821 | ||
| 2708 | while (1) { | 2822 | while (1) { |
| 2709 | ret = btrfs_search_slot_for_read(root, &key, path, 1, 0); | 2823 | struct waiting_dir_move *dm; |
| 2710 | if (ret < 0) | 2824 | |
| 2711 | goto out; | 2825 | if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { |
| 2712 | if (!ret) { | 2826 | ret = btrfs_next_leaf(root, path); |
| 2713 | btrfs_item_key_to_cpu(path->nodes[0], &found_key, | 2827 | if (ret < 0) |
| 2714 | path->slots[0]); | 2828 | goto out; |
| 2829 | else if (ret > 0) | ||
| 2830 | break; | ||
| 2831 | continue; | ||
| 2715 | } | 2832 | } |
| 2716 | if (ret || found_key.objectid != key.objectid || | 2833 | btrfs_item_key_to_cpu(path->nodes[0], &found_key, |
| 2717 | found_key.type != key.type) { | 2834 | path->slots[0]); |
| 2835 | if (found_key.objectid != key.objectid || | ||
| 2836 | found_key.type != key.type) | ||
| 2718 | break; | 2837 | break; |
| 2719 | } | ||
| 2720 | 2838 | ||
| 2721 | di = btrfs_item_ptr(path->nodes[0], path->slots[0], | 2839 | di = btrfs_item_ptr(path->nodes[0], path->slots[0], |
| 2722 | struct btrfs_dir_item); | 2840 | struct btrfs_dir_item); |
| 2723 | btrfs_dir_item_key_to_cpu(path->nodes[0], di, &loc); | 2841 | btrfs_dir_item_key_to_cpu(path->nodes[0], di, &loc); |
| 2724 | 2842 | ||
| 2843 | dm = get_waiting_dir_move(sctx, loc.objectid); | ||
| 2844 | if (dm) { | ||
| 2845 | struct orphan_dir_info *odi; | ||
| 2846 | |||
| 2847 | odi = add_orphan_dir_info(sctx, dir); | ||
| 2848 | if (IS_ERR(odi)) { | ||
| 2849 | ret = PTR_ERR(odi); | ||
| 2850 | goto out; | ||
| 2851 | } | ||
| 2852 | odi->gen = dir_gen; | ||
| 2853 | dm->rmdir_ino = dir; | ||
| 2854 | ret = 0; | ||
| 2855 | goto out; | ||
| 2856 | } | ||
| 2857 | |||
| 2725 | if (loc.objectid > send_progress) { | 2858 | if (loc.objectid > send_progress) { |
| 2726 | ret = 0; | 2859 | ret = 0; |
| 2727 | goto out; | 2860 | goto out; |
| 2728 | } | 2861 | } |
| 2729 | 2862 | ||
| 2730 | btrfs_release_path(path); | 2863 | path->slots[0]++; |
| 2731 | key.offset = found_key.offset + 1; | ||
| 2732 | } | 2864 | } |
| 2733 | 2865 | ||
| 2734 | ret = 1; | 2866 | ret = 1; |
| @@ -2740,19 +2872,9 @@ out: | |||
| 2740 | 2872 | ||
| 2741 | static int is_waiting_for_move(struct send_ctx *sctx, u64 ino) | 2873 | static int is_waiting_for_move(struct send_ctx *sctx, u64 ino) |
| 2742 | { | 2874 | { |
| 2743 | struct rb_node *n = sctx->waiting_dir_moves.rb_node; | 2875 | struct waiting_dir_move *entry = get_waiting_dir_move(sctx, ino); |
| 2744 | struct waiting_dir_move *entry; | ||
| 2745 | 2876 | ||
| 2746 | while (n) { | 2877 | return entry != NULL; |
| 2747 | entry = rb_entry(n, struct waiting_dir_move, node); | ||
| 2748 | if (ino < entry->ino) | ||
| 2749 | n = n->rb_left; | ||
| 2750 | else if (ino > entry->ino) | ||
| 2751 | n = n->rb_right; | ||
| 2752 | else | ||
| 2753 | return 1; | ||
| 2754 | } | ||
| 2755 | return 0; | ||
| 2756 | } | 2878 | } |
| 2757 | 2879 | ||
| 2758 | static int add_waiting_dir_move(struct send_ctx *sctx, u64 ino) | 2880 | static int add_waiting_dir_move(struct send_ctx *sctx, u64 ino) |
| @@ -2765,6 +2887,7 @@ static int add_waiting_dir_move(struct send_ctx *sctx, u64 ino) | |||
| 2765 | if (!dm) | 2887 | if (!dm) |
| 2766 | return -ENOMEM; | 2888 | return -ENOMEM; |
| 2767 | dm->ino = ino; | 2889 | dm->ino = ino; |
| 2890 | dm->rmdir_ino = 0; | ||
| 2768 | 2891 | ||
| 2769 | while (*p) { | 2892 | while (*p) { |
| 2770 | parent = *p; | 2893 | parent = *p; |
| @@ -2784,31 +2907,41 @@ static int add_waiting_dir_move(struct send_ctx *sctx, u64 ino) | |||
| 2784 | return 0; | 2907 | return 0; |
| 2785 | } | 2908 | } |
| 2786 | 2909 | ||
| 2787 | static int del_waiting_dir_move(struct send_ctx *sctx, u64 ino) | 2910 | static struct waiting_dir_move * |
| 2911 | get_waiting_dir_move(struct send_ctx *sctx, u64 ino) | ||
| 2788 | { | 2912 | { |
| 2789 | struct rb_node *n = sctx->waiting_dir_moves.rb_node; | 2913 | struct rb_node *n = sctx->waiting_dir_moves.rb_node; |
| 2790 | struct waiting_dir_move *entry; | 2914 | struct waiting_dir_move *entry; |
| 2791 | 2915 | ||
| 2792 | while (n) { | 2916 | while (n) { |
| 2793 | entry = rb_entry(n, struct waiting_dir_move, node); | 2917 | entry = rb_entry(n, struct waiting_dir_move, node); |
| 2794 | if (ino < entry->ino) { | 2918 | if (ino < entry->ino) |
| 2795 | n = n->rb_left; | 2919 | n = n->rb_left; |
| 2796 | } else if (ino > entry->ino) { | 2920 | else if (ino > entry->ino) |
| 2797 | n = n->rb_right; | 2921 | n = n->rb_right; |
| 2798 | } else { | 2922 | else |
| 2799 | rb_erase(&entry->node, &sctx->waiting_dir_moves); | 2923 | return entry; |
| 2800 | kfree(entry); | ||
| 2801 | return 0; | ||
| 2802 | } | ||
| 2803 | } | 2924 | } |
| 2804 | return -ENOENT; | 2925 | return NULL; |
| 2805 | } | 2926 | } |
| 2806 | 2927 | ||
| 2807 | static int add_pending_dir_move(struct send_ctx *sctx, u64 parent_ino) | 2928 | static void free_waiting_dir_move(struct send_ctx *sctx, |
| 2929 | struct waiting_dir_move *dm) | ||
| 2930 | { | ||
| 2931 | if (!dm) | ||
| 2932 | return; | ||
| 2933 | rb_erase(&dm->node, &sctx->waiting_dir_moves); | ||
| 2934 | kfree(dm); | ||
| 2935 | } | ||
| 2936 | |||
| 2937 | static int add_pending_dir_move(struct send_ctx *sctx, | ||
| 2938 | u64 ino, | ||
| 2939 | u64 ino_gen, | ||
| 2940 | u64 parent_ino) | ||
| 2808 | { | 2941 | { |
| 2809 | struct rb_node **p = &sctx->pending_dir_moves.rb_node; | 2942 | struct rb_node **p = &sctx->pending_dir_moves.rb_node; |
| 2810 | struct rb_node *parent = NULL; | 2943 | struct rb_node *parent = NULL; |
| 2811 | struct pending_dir_move *entry, *pm; | 2944 | struct pending_dir_move *entry = NULL, *pm; |
| 2812 | struct recorded_ref *cur; | 2945 | struct recorded_ref *cur; |
| 2813 | int exists = 0; | 2946 | int exists = 0; |
| 2814 | int ret; | 2947 | int ret; |
| @@ -2817,8 +2950,8 @@ static int add_pending_dir_move(struct send_ctx *sctx, u64 parent_ino) | |||
| 2817 | if (!pm) | 2950 | if (!pm) |
| 2818 | return -ENOMEM; | 2951 | return -ENOMEM; |
| 2819 | pm->parent_ino = parent_ino; | 2952 | pm->parent_ino = parent_ino; |
| 2820 | pm->ino = sctx->cur_ino; | 2953 | pm->ino = ino; |
| 2821 | pm->gen = sctx->cur_inode_gen; | 2954 | pm->gen = ino_gen; |
| 2822 | INIT_LIST_HEAD(&pm->list); | 2955 | INIT_LIST_HEAD(&pm->list); |
| 2823 | INIT_LIST_HEAD(&pm->update_refs); | 2956 | INIT_LIST_HEAD(&pm->update_refs); |
| 2824 | RB_CLEAR_NODE(&pm->node); | 2957 | RB_CLEAR_NODE(&pm->node); |
| @@ -2888,19 +3021,52 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm) | |||
| 2888 | { | 3021 | { |
| 2889 | struct fs_path *from_path = NULL; | 3022 | struct fs_path *from_path = NULL; |
| 2890 | struct fs_path *to_path = NULL; | 3023 | struct fs_path *to_path = NULL; |
| 3024 | struct fs_path *name = NULL; | ||
| 2891 | u64 orig_progress = sctx->send_progress; | 3025 | u64 orig_progress = sctx->send_progress; |
| 2892 | struct recorded_ref *cur; | 3026 | struct recorded_ref *cur; |
| 3027 | u64 parent_ino, parent_gen; | ||
| 3028 | struct waiting_dir_move *dm = NULL; | ||
| 3029 | u64 rmdir_ino = 0; | ||
| 2893 | int ret; | 3030 | int ret; |
| 2894 | 3031 | ||
| 3032 | name = fs_path_alloc(); | ||
| 2895 | from_path = fs_path_alloc(); | 3033 | from_path = fs_path_alloc(); |
| 2896 | if (!from_path) | 3034 | if (!name || !from_path) { |
| 2897 | return -ENOMEM; | 3035 | ret = -ENOMEM; |
| 3036 | goto out; | ||
| 3037 | } | ||
| 3038 | |||
| 3039 | dm = get_waiting_dir_move(sctx, pm->ino); | ||
| 3040 | ASSERT(dm); | ||
| 3041 | rmdir_ino = dm->rmdir_ino; | ||
| 3042 | free_waiting_dir_move(sctx, dm); | ||
| 2898 | 3043 | ||
| 2899 | sctx->send_progress = pm->ino; | 3044 | ret = get_first_ref(sctx->parent_root, pm->ino, |
| 2900 | ret = get_cur_path(sctx, pm->ino, pm->gen, from_path); | 3045 | &parent_ino, &parent_gen, name); |
| 2901 | if (ret < 0) | 3046 | if (ret < 0) |
| 2902 | goto out; | 3047 | goto out; |
| 2903 | 3048 | ||
| 3049 | if (parent_ino == sctx->cur_ino) { | ||
| 3050 | /* child only renamed, not moved */ | ||
| 3051 | ASSERT(parent_gen == sctx->cur_inode_gen); | ||
| 3052 | ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, | ||
| 3053 | from_path); | ||
| 3054 | if (ret < 0) | ||
| 3055 | goto out; | ||
| 3056 | ret = fs_path_add_path(from_path, name); | ||
| 3057 | if (ret < 0) | ||
| 3058 | goto out; | ||
| 3059 | } else { | ||
| 3060 | /* child moved and maybe renamed too */ | ||
| 3061 | sctx->send_progress = pm->ino; | ||
| 3062 | ret = get_cur_path(sctx, pm->ino, pm->gen, from_path); | ||
| 3063 | if (ret < 0) | ||
| 3064 | goto out; | ||
| 3065 | } | ||
| 3066 | |||
| 3067 | fs_path_free(name); | ||
| 3068 | name = NULL; | ||
| 3069 | |||
| 2904 | to_path = fs_path_alloc(); | 3070 | to_path = fs_path_alloc(); |
| 2905 | if (!to_path) { | 3071 | if (!to_path) { |
| 2906 | ret = -ENOMEM; | 3072 | ret = -ENOMEM; |
| @@ -2908,9 +3074,6 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm) | |||
| 2908 | } | 3074 | } |
| 2909 | 3075 | ||
| 2910 | sctx->send_progress = sctx->cur_ino + 1; | 3076 | sctx->send_progress = sctx->cur_ino + 1; |
| 2911 | ret = del_waiting_dir_move(sctx, pm->ino); | ||
| 2912 | ASSERT(ret == 0); | ||
| 2913 | |||
| 2914 | ret = get_cur_path(sctx, pm->ino, pm->gen, to_path); | 3077 | ret = get_cur_path(sctx, pm->ino, pm->gen, to_path); |
| 2915 | if (ret < 0) | 3078 | if (ret < 0) |
| 2916 | goto out; | 3079 | goto out; |
| @@ -2919,6 +3082,35 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm) | |||
| 2919 | if (ret < 0) | 3082 | if (ret < 0) |
| 2920 | goto out; | 3083 | goto out; |
| 2921 | 3084 | ||
| 3085 | if (rmdir_ino) { | ||
| 3086 | struct orphan_dir_info *odi; | ||
| 3087 | |||
| 3088 | odi = get_orphan_dir_info(sctx, rmdir_ino); | ||
| 3089 | if (!odi) { | ||
| 3090 | /* already deleted */ | ||
| 3091 | goto finish; | ||
| 3092 | } | ||
| 3093 | ret = can_rmdir(sctx, rmdir_ino, odi->gen, sctx->cur_ino + 1); | ||
| 3094 | if (ret < 0) | ||
| 3095 | goto out; | ||
| 3096 | if (!ret) | ||
| 3097 | goto finish; | ||
| 3098 | |||
| 3099 | name = fs_path_alloc(); | ||
| 3100 | if (!name) { | ||
| 3101 | ret = -ENOMEM; | ||
| 3102 | goto out; | ||
| 3103 | } | ||
| 3104 | ret = get_cur_path(sctx, rmdir_ino, odi->gen, name); | ||
| 3105 | if (ret < 0) | ||
| 3106 | goto out; | ||
| 3107 | ret = send_rmdir(sctx, name); | ||
| 3108 | if (ret < 0) | ||
| 3109 | goto out; | ||
| 3110 | free_orphan_dir_info(sctx, odi); | ||
| 3111 | } | ||
| 3112 | |||
| 3113 | finish: | ||
| 2922 | ret = send_utimes(sctx, pm->ino, pm->gen); | 3114 | ret = send_utimes(sctx, pm->ino, pm->gen); |
| 2923 | if (ret < 0) | 3115 | if (ret < 0) |
| 2924 | goto out; | 3116 | goto out; |
| @@ -2928,12 +3120,15 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm) | |||
| 2928 | * and old parent(s). | 3120 | * and old parent(s). |
| 2929 | */ | 3121 | */ |
| 2930 | list_for_each_entry(cur, &pm->update_refs, list) { | 3122 | list_for_each_entry(cur, &pm->update_refs, list) { |
| 3123 | if (cur->dir == rmdir_ino) | ||
| 3124 | continue; | ||
| 2931 | ret = send_utimes(sctx, cur->dir, cur->dir_gen); | 3125 | ret = send_utimes(sctx, cur->dir, cur->dir_gen); |
| 2932 | if (ret < 0) | 3126 | if (ret < 0) |
| 2933 | goto out; | 3127 | goto out; |
| 2934 | } | 3128 | } |
| 2935 | 3129 | ||
| 2936 | out: | 3130 | out: |
| 3131 | fs_path_free(name); | ||
| 2937 | fs_path_free(from_path); | 3132 | fs_path_free(from_path); |
| 2938 | fs_path_free(to_path); | 3133 | fs_path_free(to_path); |
| 2939 | sctx->send_progress = orig_progress; | 3134 | sctx->send_progress = orig_progress; |
| @@ -3005,17 +3200,19 @@ static int wait_for_parent_move(struct send_ctx *sctx, | |||
| 3005 | int ret; | 3200 | int ret; |
| 3006 | u64 ino = parent_ref->dir; | 3201 | u64 ino = parent_ref->dir; |
| 3007 | u64 parent_ino_before, parent_ino_after; | 3202 | u64 parent_ino_before, parent_ino_after; |
| 3008 | u64 new_gen, old_gen; | 3203 | u64 old_gen; |
| 3009 | struct fs_path *path_before = NULL; | 3204 | struct fs_path *path_before = NULL; |
| 3010 | struct fs_path *path_after = NULL; | 3205 | struct fs_path *path_after = NULL; |
| 3011 | int len1, len2; | 3206 | int len1, len2; |
| 3012 | 3207 | int register_upper_dirs; | |
| 3013 | if (parent_ref->dir <= sctx->cur_ino) | 3208 | u64 gen; |
| 3014 | return 0; | ||
| 3015 | 3209 | ||
| 3016 | if (is_waiting_for_move(sctx, ino)) | 3210 | if (is_waiting_for_move(sctx, ino)) |
| 3017 | return 1; | 3211 | return 1; |
| 3018 | 3212 | ||
| 3213 | if (parent_ref->dir <= sctx->cur_ino) | ||
| 3214 | return 0; | ||
| 3215 | |||
| 3019 | ret = get_inode_info(sctx->parent_root, ino, NULL, &old_gen, | 3216 | ret = get_inode_info(sctx->parent_root, ino, NULL, &old_gen, |
| 3020 | NULL, NULL, NULL, NULL); | 3217 | NULL, NULL, NULL, NULL); |
| 3021 | if (ret == -ENOENT) | 3218 | if (ret == -ENOENT) |
| @@ -3023,12 +3220,7 @@ static int wait_for_parent_move(struct send_ctx *sctx, | |||
| 3023 | else if (ret < 0) | 3220 | else if (ret < 0) |
| 3024 | return ret; | 3221 | return ret; |
| 3025 | 3222 | ||
| 3026 | ret = get_inode_info(sctx->send_root, ino, NULL, &new_gen, | 3223 | if (parent_ref->dir_gen != old_gen) |
| 3027 | NULL, NULL, NULL, NULL); | ||
| 3028 | if (ret < 0) | ||
| 3029 | return ret; | ||
| 3030 | |||
| 3031 | if (new_gen != old_gen) | ||
| 3032 | return 0; | 3224 | return 0; |
| 3033 | 3225 | ||
| 3034 | path_before = fs_path_alloc(); | 3226 | path_before = fs_path_alloc(); |
| @@ -3051,7 +3243,7 @@ static int wait_for_parent_move(struct send_ctx *sctx, | |||
| 3051 | } | 3243 | } |
| 3052 | 3244 | ||
| 3053 | ret = get_first_ref(sctx->send_root, ino, &parent_ino_after, | 3245 | ret = get_first_ref(sctx->send_root, ino, &parent_ino_after, |
| 3054 | NULL, path_after); | 3246 | &gen, path_after); |
| 3055 | if (ret == -ENOENT) { | 3247 | if (ret == -ENOENT) { |
| 3056 | ret = 0; | 3248 | ret = 0; |
| 3057 | goto out; | 3249 | goto out; |
| @@ -3061,13 +3253,67 @@ static int wait_for_parent_move(struct send_ctx *sctx, | |||
| 3061 | 3253 | ||
| 3062 | len1 = fs_path_len(path_before); | 3254 | len1 = fs_path_len(path_before); |
| 3063 | len2 = fs_path_len(path_after); | 3255 | len2 = fs_path_len(path_after); |
| 3064 | if ((parent_ino_before != parent_ino_after) && (len1 != len2 || | 3256 | if (parent_ino_before != parent_ino_after || len1 != len2 || |
| 3065 | memcmp(path_before->start, path_after->start, len1))) { | 3257 | memcmp(path_before->start, path_after->start, len1)) { |
| 3066 | ret = 1; | 3258 | ret = 1; |
| 3067 | goto out; | 3259 | goto out; |
| 3068 | } | 3260 | } |
| 3069 | ret = 0; | 3261 | ret = 0; |
| 3070 | 3262 | ||
| 3263 | /* | ||
| 3264 | * Ok, our new most direct ancestor has a higher inode number but | ||
| 3265 | * wasn't moved/renamed. So maybe some of the new ancestors higher in | ||
| 3266 | * the hierarchy have an higher inode number too *and* were renamed | ||
| 3267 | * or moved - in this case we need to wait for the ancestor's rename | ||
| 3268 | * or move operation before we can do the move/rename for the current | ||
| 3269 | * inode. | ||
| 3270 | */ | ||
| 3271 | register_upper_dirs = 0; | ||
| 3272 | ino = parent_ino_after; | ||
| 3273 | again: | ||
| 3274 | while ((ret == 0 || register_upper_dirs) && ino > sctx->cur_ino) { | ||
| 3275 | u64 parent_gen; | ||
| 3276 | |||
| 3277 | fs_path_reset(path_before); | ||
| 3278 | fs_path_reset(path_after); | ||
| 3279 | |||
| 3280 | ret = get_first_ref(sctx->send_root, ino, &parent_ino_after, | ||
| 3281 | &parent_gen, path_after); | ||
| 3282 | if (ret < 0) | ||
| 3283 | goto out; | ||
| 3284 | ret = get_first_ref(sctx->parent_root, ino, &parent_ino_before, | ||
| 3285 | NULL, path_before); | ||
| 3286 | if (ret == -ENOENT) { | ||
| 3287 | ret = 0; | ||
| 3288 | break; | ||
| 3289 | } else if (ret < 0) { | ||
| 3290 | goto out; | ||
| 3291 | } | ||
| 3292 | |||
| 3293 | len1 = fs_path_len(path_before); | ||
| 3294 | len2 = fs_path_len(path_after); | ||
| 3295 | if (parent_ino_before != parent_ino_after || len1 != len2 || | ||
| 3296 | memcmp(path_before->start, path_after->start, len1)) { | ||
| 3297 | ret = 1; | ||
| 3298 | if (register_upper_dirs) { | ||
| 3299 | break; | ||
| 3300 | } else { | ||
| 3301 | register_upper_dirs = 1; | ||
| 3302 | ino = parent_ref->dir; | ||
| 3303 | gen = parent_ref->dir_gen; | ||
| 3304 | goto again; | ||
| 3305 | } | ||
| 3306 | } else if (register_upper_dirs) { | ||
| 3307 | ret = add_pending_dir_move(sctx, ino, gen, | ||
| 3308 | parent_ino_after); | ||
| 3309 | if (ret < 0 && ret != -EEXIST) | ||
| 3310 | goto out; | ||
| 3311 | } | ||
| 3312 | |||
| 3313 | ino = parent_ino_after; | ||
| 3314 | gen = parent_gen; | ||
| 3315 | } | ||
| 3316 | |||
| 3071 | out: | 3317 | out: |
| 3072 | fs_path_free(path_before); | 3318 | fs_path_free(path_before); |
| 3073 | fs_path_free(path_after); | 3319 | fs_path_free(path_after); |
| @@ -3089,6 +3335,7 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move) | |||
| 3089 | u64 ow_gen; | 3335 | u64 ow_gen; |
| 3090 | int did_overwrite = 0; | 3336 | int did_overwrite = 0; |
| 3091 | int is_orphan = 0; | 3337 | int is_orphan = 0; |
| 3338 | u64 last_dir_ino_rm = 0; | ||
| 3092 | 3339 | ||
| 3093 | verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); | 3340 | verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); |
| 3094 | 3341 | ||
| @@ -3227,9 +3474,14 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); | |||
| 3227 | * dirs, we always have one new and one deleted | 3474 | * dirs, we always have one new and one deleted |
| 3228 | * ref. The deleted ref is ignored later. | 3475 | * ref. The deleted ref is ignored later. |
| 3229 | */ | 3476 | */ |
| 3230 | if (wait_for_parent_move(sctx, cur)) { | 3477 | ret = wait_for_parent_move(sctx, cur); |
| 3478 | if (ret < 0) | ||
| 3479 | goto out; | ||
| 3480 | if (ret) { | ||
| 3231 | ret = add_pending_dir_move(sctx, | 3481 | ret = add_pending_dir_move(sctx, |
| 3232 | cur->dir); | 3482 | sctx->cur_ino, |
| 3483 | sctx->cur_inode_gen, | ||
| 3484 | cur->dir); | ||
| 3233 | *pending_move = 1; | 3485 | *pending_move = 1; |
| 3234 | } else { | 3486 | } else { |
| 3235 | ret = send_rename(sctx, valid_path, | 3487 | ret = send_rename(sctx, valid_path, |
| @@ -3259,7 +3511,8 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); | |||
| 3259 | * later, we do this check again and rmdir it then if possible. | 3511 | * later, we do this check again and rmdir it then if possible. |
| 3260 | * See the use of check_dirs for more details. | 3512 | * See the use of check_dirs for more details. |
| 3261 | */ | 3513 | */ |
| 3262 | ret = can_rmdir(sctx, sctx->cur_ino, sctx->cur_ino); | 3514 | ret = can_rmdir(sctx, sctx->cur_ino, sctx->cur_inode_gen, |
| 3515 | sctx->cur_ino); | ||
| 3263 | if (ret < 0) | 3516 | if (ret < 0) |
| 3264 | goto out; | 3517 | goto out; |
| 3265 | if (ret) { | 3518 | if (ret) { |
| @@ -3350,8 +3603,10 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); | |||
| 3350 | ret = send_utimes(sctx, cur->dir, cur->dir_gen); | 3603 | ret = send_utimes(sctx, cur->dir, cur->dir_gen); |
| 3351 | if (ret < 0) | 3604 | if (ret < 0) |
| 3352 | goto out; | 3605 | goto out; |
| 3353 | } else if (ret == inode_state_did_delete) { | 3606 | } else if (ret == inode_state_did_delete && |
| 3354 | ret = can_rmdir(sctx, cur->dir, sctx->cur_ino); | 3607 | cur->dir != last_dir_ino_rm) { |
| 3608 | ret = can_rmdir(sctx, cur->dir, cur->dir_gen, | ||
| 3609 | sctx->cur_ino); | ||
| 3355 | if (ret < 0) | 3610 | if (ret < 0) |
| 3356 | goto out; | 3611 | goto out; |
| 3357 | if (ret) { | 3612 | if (ret) { |
| @@ -3362,6 +3617,7 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); | |||
| 3362 | ret = send_rmdir(sctx, valid_path); | 3617 | ret = send_rmdir(sctx, valid_path); |
| 3363 | if (ret < 0) | 3618 | if (ret < 0) |
| 3364 | goto out; | 3619 | goto out; |
| 3620 | last_dir_ino_rm = cur->dir; | ||
| 3365 | } | 3621 | } |
| 3366 | } | 3622 | } |
| 3367 | } | 3623 | } |
| @@ -3375,9 +3631,8 @@ out: | |||
| 3375 | return ret; | 3631 | return ret; |
| 3376 | } | 3632 | } |
| 3377 | 3633 | ||
| 3378 | static int __record_new_ref(int num, u64 dir, int index, | 3634 | static int record_ref(struct btrfs_root *root, int num, u64 dir, int index, |
| 3379 | struct fs_path *name, | 3635 | struct fs_path *name, void *ctx, struct list_head *refs) |
| 3380 | void *ctx) | ||
| 3381 | { | 3636 | { |
| 3382 | int ret = 0; | 3637 | int ret = 0; |
| 3383 | struct send_ctx *sctx = ctx; | 3638 | struct send_ctx *sctx = ctx; |
| @@ -3388,7 +3643,7 @@ static int __record_new_ref(int num, u64 dir, int index, | |||
| 3388 | if (!p) | 3643 | if (!p) |
| 3389 | return -ENOMEM; | 3644 | return -ENOMEM; |
| 3390 | 3645 | ||
| 3391 | ret = get_inode_info(sctx->send_root, dir, NULL, &gen, NULL, NULL, | 3646 | ret = get_inode_info(root, dir, NULL, &gen, NULL, NULL, |
| 3392 | NULL, NULL); | 3647 | NULL, NULL); |
| 3393 | if (ret < 0) | 3648 | if (ret < 0) |
| 3394 | goto out; | 3649 | goto out; |
| @@ -3400,7 +3655,7 @@ static int __record_new_ref(int num, u64 dir, int index, | |||
| 3400 | if (ret < 0) | 3655 | if (ret < 0) |
| 3401 | goto out; | 3656 | goto out; |
| 3402 | 3657 | ||
| 3403 | ret = record_ref(&sctx->new_refs, dir, gen, p); | 3658 | ret = __record_ref(refs, dir, gen, p); |
| 3404 | 3659 | ||
| 3405 | out: | 3660 | out: |
| 3406 | if (ret) | 3661 | if (ret) |
| @@ -3408,37 +3663,23 @@ out: | |||
| 3408 | return ret; | 3663 | return ret; |
| 3409 | } | 3664 | } |
| 3410 | 3665 | ||
| 3666 | static int __record_new_ref(int num, u64 dir, int index, | ||
| 3667 | struct fs_path *name, | ||
| 3668 | void *ctx) | ||
| 3669 | { | ||
| 3670 | struct send_ctx *sctx = ctx; | ||
| 3671 | return record_ref(sctx->send_root, num, dir, index, name, | ||
| 3672 | ctx, &sctx->new_refs); | ||
| 3673 | } | ||
| 3674 | |||
| 3675 | |||
| 3411 | static int __record_deleted_ref(int num, u64 dir, int index, | 3676 | static int __record_deleted_ref(int num, u64 dir, int index, |
| 3412 | struct fs_path *name, | 3677 | struct fs_path *name, |
| 3413 | void *ctx) | 3678 | void *ctx) |
| 3414 | { | 3679 | { |
| 3415 | int ret = 0; | ||
| 3416 | struct send_ctx *sctx = ctx; | 3680 | struct send_ctx *sctx = ctx; |
| 3417 | struct fs_path *p; | 3681 | return record_ref(sctx->parent_root, num, dir, index, name, |
| 3418 | u64 gen; | 3682 | ctx, &sctx->deleted_refs); |
| 3419 | |||
| 3420 | p = fs_path_alloc(); | ||
| 3421 | if (!p) | ||
| 3422 | return -ENOMEM; | ||
| 3423 | |||
| 3424 | ret = get_inode_info(sctx->parent_root, dir, NULL, &gen, NULL, NULL, | ||
| 3425 | NULL, NULL); | ||
| 3426 | if (ret < 0) | ||
| 3427 | goto out; | ||
| 3428 | |||
| 3429 | ret = get_cur_path(sctx, dir, gen, p); | ||
| 3430 | if (ret < 0) | ||
| 3431 | goto out; | ||
| 3432 | ret = fs_path_add_path(p, name); | ||
| 3433 | if (ret < 0) | ||
| 3434 | goto out; | ||
| 3435 | |||
| 3436 | ret = record_ref(&sctx->deleted_refs, dir, gen, p); | ||
| 3437 | |||
| 3438 | out: | ||
| 3439 | if (ret) | ||
| 3440 | fs_path_free(p); | ||
| 3441 | return ret; | ||
| 3442 | } | 3683 | } |
| 3443 | 3684 | ||
| 3444 | static int record_new_ref(struct send_ctx *sctx) | 3685 | static int record_new_ref(struct send_ctx *sctx) |
| @@ -3619,21 +3860,31 @@ static int process_all_refs(struct send_ctx *sctx, | |||
| 3619 | root = sctx->parent_root; | 3860 | root = sctx->parent_root; |
| 3620 | cb = __record_deleted_ref; | 3861 | cb = __record_deleted_ref; |
| 3621 | } else { | 3862 | } else { |
| 3622 | BUG(); | 3863 | btrfs_err(sctx->send_root->fs_info, |
| 3864 | "Wrong command %d in process_all_refs", cmd); | ||
| 3865 | ret = -EINVAL; | ||
| 3866 | goto out; | ||
| 3623 | } | 3867 | } |
| 3624 | 3868 | ||
| 3625 | key.objectid = sctx->cmp_key->objectid; | 3869 | key.objectid = sctx->cmp_key->objectid; |
| 3626 | key.type = BTRFS_INODE_REF_KEY; | 3870 | key.type = BTRFS_INODE_REF_KEY; |
| 3627 | key.offset = 0; | 3871 | key.offset = 0; |
| 3628 | while (1) { | 3872 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); |
| 3629 | ret = btrfs_search_slot_for_read(root, &key, path, 1, 0); | 3873 | if (ret < 0) |
| 3630 | if (ret < 0) | 3874 | goto out; |
| 3631 | goto out; | ||
| 3632 | if (ret) | ||
| 3633 | break; | ||
| 3634 | 3875 | ||
| 3876 | while (1) { | ||
| 3635 | eb = path->nodes[0]; | 3877 | eb = path->nodes[0]; |
| 3636 | slot = path->slots[0]; | 3878 | slot = path->slots[0]; |
| 3879 | if (slot >= btrfs_header_nritems(eb)) { | ||
| 3880 | ret = btrfs_next_leaf(root, path); | ||
| 3881 | if (ret < 0) | ||
| 3882 | goto out; | ||
| 3883 | else if (ret > 0) | ||
| 3884 | break; | ||
| 3885 | continue; | ||
| 3886 | } | ||
| 3887 | |||
| 3637 | btrfs_item_key_to_cpu(eb, &found_key, slot); | 3888 | btrfs_item_key_to_cpu(eb, &found_key, slot); |
| 3638 | 3889 | ||
| 3639 | if (found_key.objectid != key.objectid || | 3890 | if (found_key.objectid != key.objectid || |
| @@ -3642,11 +3893,10 @@ static int process_all_refs(struct send_ctx *sctx, | |||
| 3642 | break; | 3893 | break; |
| 3643 | 3894 | ||
| 3644 | ret = iterate_inode_ref(root, path, &found_key, 0, cb, sctx); | 3895 | ret = iterate_inode_ref(root, path, &found_key, 0, cb, sctx); |
| 3645 | btrfs_release_path(path); | ||
| 3646 | if (ret < 0) | 3896 | if (ret < 0) |
| 3647 | goto out; | 3897 | goto out; |
| 3648 | 3898 | ||
| 3649 | key.offset = found_key.offset + 1; | 3899 | path->slots[0]++; |
| 3650 | } | 3900 | } |
| 3651 | btrfs_release_path(path); | 3901 | btrfs_release_path(path); |
| 3652 | 3902 | ||
| @@ -3927,19 +4177,25 @@ static int process_all_new_xattrs(struct send_ctx *sctx) | |||
| 3927 | key.objectid = sctx->cmp_key->objectid; | 4177 | key.objectid = sctx->cmp_key->objectid; |
| 3928 | key.type = BTRFS_XATTR_ITEM_KEY; | 4178 | key.type = BTRFS_XATTR_ITEM_KEY; |
| 3929 | key.offset = 0; | 4179 | key.offset = 0; |
| 3930 | while (1) { | 4180 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); |
| 3931 | ret = btrfs_search_slot_for_read(root, &key, path, 1, 0); | 4181 | if (ret < 0) |
| 3932 | if (ret < 0) | 4182 | goto out; |
| 3933 | goto out; | ||
| 3934 | if (ret) { | ||
| 3935 | ret = 0; | ||
| 3936 | goto out; | ||
| 3937 | } | ||
| 3938 | 4183 | ||
| 4184 | while (1) { | ||
| 3939 | eb = path->nodes[0]; | 4185 | eb = path->nodes[0]; |
| 3940 | slot = path->slots[0]; | 4186 | slot = path->slots[0]; |
| 3941 | btrfs_item_key_to_cpu(eb, &found_key, slot); | 4187 | if (slot >= btrfs_header_nritems(eb)) { |
| 4188 | ret = btrfs_next_leaf(root, path); | ||
| 4189 | if (ret < 0) { | ||
| 4190 | goto out; | ||
| 4191 | } else if (ret > 0) { | ||
| 4192 | ret = 0; | ||
| 4193 | break; | ||
| 4194 | } | ||
| 4195 | continue; | ||
| 4196 | } | ||
| 3942 | 4197 | ||
| 4198 | btrfs_item_key_to_cpu(eb, &found_key, slot); | ||
| 3943 | if (found_key.objectid != key.objectid || | 4199 | if (found_key.objectid != key.objectid || |
| 3944 | found_key.type != key.type) { | 4200 | found_key.type != key.type) { |
| 3945 | ret = 0; | 4201 | ret = 0; |
| @@ -3951,8 +4207,7 @@ static int process_all_new_xattrs(struct send_ctx *sctx) | |||
| 3951 | if (ret < 0) | 4207 | if (ret < 0) |
| 3952 | goto out; | 4208 | goto out; |
| 3953 | 4209 | ||
| 3954 | btrfs_release_path(path); | 4210 | path->slots[0]++; |
| 3955 | key.offset = found_key.offset + 1; | ||
| 3956 | } | 4211 | } |
| 3957 | 4212 | ||
| 3958 | out: | 4213 | out: |
| @@ -3991,6 +4246,13 @@ static ssize_t fill_read_buf(struct send_ctx *sctx, u64 offset, u32 len) | |||
| 3991 | goto out; | 4246 | goto out; |
| 3992 | 4247 | ||
| 3993 | last_index = (offset + len - 1) >> PAGE_CACHE_SHIFT; | 4248 | last_index = (offset + len - 1) >> PAGE_CACHE_SHIFT; |
| 4249 | |||
| 4250 | /* initial readahead */ | ||
| 4251 | memset(&sctx->ra, 0, sizeof(struct file_ra_state)); | ||
| 4252 | file_ra_state_init(&sctx->ra, inode->i_mapping); | ||
| 4253 | btrfs_force_ra(inode->i_mapping, &sctx->ra, NULL, index, | ||
| 4254 | last_index - index + 1); | ||
| 4255 | |||
| 3994 | while (index <= last_index) { | 4256 | while (index <= last_index) { |
| 3995 | unsigned cur_len = min_t(unsigned, len, | 4257 | unsigned cur_len = min_t(unsigned, len, |
| 3996 | PAGE_CACHE_SIZE - pg_offset); | 4258 | PAGE_CACHE_SIZE - pg_offset); |
| @@ -4174,6 +4436,9 @@ static int send_hole(struct send_ctx *sctx, u64 end) | |||
| 4174 | p = fs_path_alloc(); | 4436 | p = fs_path_alloc(); |
| 4175 | if (!p) | 4437 | if (!p) |
| 4176 | return -ENOMEM; | 4438 | return -ENOMEM; |
| 4439 | ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p); | ||
| 4440 | if (ret < 0) | ||
| 4441 | goto tlv_put_failure; | ||
| 4177 | memset(sctx->read_buf, 0, BTRFS_SEND_READ_SIZE); | 4442 | memset(sctx->read_buf, 0, BTRFS_SEND_READ_SIZE); |
| 4178 | while (offset < end) { | 4443 | while (offset < end) { |
| 4179 | len = min_t(u64, end - offset, BTRFS_SEND_READ_SIZE); | 4444 | len = min_t(u64, end - offset, BTRFS_SEND_READ_SIZE); |
| @@ -4181,9 +4446,6 @@ static int send_hole(struct send_ctx *sctx, u64 end) | |||
| 4181 | ret = begin_cmd(sctx, BTRFS_SEND_C_WRITE); | 4446 | ret = begin_cmd(sctx, BTRFS_SEND_C_WRITE); |
| 4182 | if (ret < 0) | 4447 | if (ret < 0) |
| 4183 | break; | 4448 | break; |
| 4184 | ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p); | ||
| 4185 | if (ret < 0) | ||
| 4186 | break; | ||
| 4187 | TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); | 4449 | TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); |
| 4188 | TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset); | 4450 | TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset); |
| 4189 | TLV_PUT(sctx, BTRFS_SEND_A_DATA, sctx->read_buf, len); | 4451 | TLV_PUT(sctx, BTRFS_SEND_A_DATA, sctx->read_buf, len); |
| @@ -4724,7 +4986,9 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end) | |||
| 4724 | 4986 | ||
| 4725 | if (S_ISREG(sctx->cur_inode_mode)) { | 4987 | if (S_ISREG(sctx->cur_inode_mode)) { |
| 4726 | if (need_send_hole(sctx)) { | 4988 | if (need_send_hole(sctx)) { |
| 4727 | if (sctx->cur_inode_last_extent == (u64)-1) { | 4989 | if (sctx->cur_inode_last_extent == (u64)-1 || |
| 4990 | sctx->cur_inode_last_extent < | ||
| 4991 | sctx->cur_inode_size) { | ||
| 4728 | ret = get_last_extent(sctx, (u64)-1); | 4992 | ret = get_last_extent(sctx, (u64)-1); |
| 4729 | if (ret) | 4993 | if (ret) |
| 4730 | goto out; | 4994 | goto out; |
| @@ -4763,18 +5027,19 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end) | |||
| 4763 | ret = apply_children_dir_moves(sctx); | 5027 | ret = apply_children_dir_moves(sctx); |
| 4764 | if (ret) | 5028 | if (ret) |
| 4765 | goto out; | 5029 | goto out; |
| 5030 | /* | ||
| 5031 | * Need to send that every time, no matter if it actually | ||
| 5032 | * changed between the two trees as we have done changes to | ||
| 5033 | * the inode before. If our inode is a directory and it's | ||
| 5034 | * waiting to be moved/renamed, we will send its utimes when | ||
| 5035 | * it's moved/renamed, therefore we don't need to do it here. | ||
| 5036 | */ | ||
| 5037 | sctx->send_progress = sctx->cur_ino + 1; | ||
| 5038 | ret = send_utimes(sctx, sctx->cur_ino, sctx->cur_inode_gen); | ||
| 5039 | if (ret < 0) | ||
| 5040 | goto out; | ||
| 4766 | } | 5041 | } |
| 4767 | 5042 | ||
| 4768 | /* | ||
| 4769 | * Need to send that every time, no matter if it actually | ||
| 4770 | * changed between the two trees as we have done changes to | ||
| 4771 | * the inode before. | ||
| 4772 | */ | ||
| 4773 | sctx->send_progress = sctx->cur_ino + 1; | ||
| 4774 | ret = send_utimes(sctx, sctx->cur_ino, sctx->cur_inode_gen); | ||
| 4775 | if (ret < 0) | ||
| 4776 | goto out; | ||
| 4777 | |||
| 4778 | out: | 5043 | out: |
| 4779 | return ret; | 5044 | return ret; |
| 4780 | } | 5045 | } |
| @@ -4840,6 +5105,8 @@ static int changed_inode(struct send_ctx *sctx, | |||
| 4840 | sctx->left_path->nodes[0], left_ii); | 5105 | sctx->left_path->nodes[0], left_ii); |
| 4841 | sctx->cur_inode_mode = btrfs_inode_mode( | 5106 | sctx->cur_inode_mode = btrfs_inode_mode( |
| 4842 | sctx->left_path->nodes[0], left_ii); | 5107 | sctx->left_path->nodes[0], left_ii); |
| 5108 | sctx->cur_inode_rdev = btrfs_inode_rdev( | ||
| 5109 | sctx->left_path->nodes[0], left_ii); | ||
| 4843 | if (sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID) | 5110 | if (sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID) |
| 4844 | ret = send_create_inode_if_needed(sctx); | 5111 | ret = send_create_inode_if_needed(sctx); |
| 4845 | } else if (result == BTRFS_COMPARE_TREE_DELETED) { | 5112 | } else if (result == BTRFS_COMPARE_TREE_DELETED) { |
| @@ -4884,6 +5151,8 @@ static int changed_inode(struct send_ctx *sctx, | |||
| 4884 | sctx->left_path->nodes[0], left_ii); | 5151 | sctx->left_path->nodes[0], left_ii); |
| 4885 | sctx->cur_inode_mode = btrfs_inode_mode( | 5152 | sctx->cur_inode_mode = btrfs_inode_mode( |
| 4886 | sctx->left_path->nodes[0], left_ii); | 5153 | sctx->left_path->nodes[0], left_ii); |
| 5154 | sctx->cur_inode_rdev = btrfs_inode_rdev( | ||
| 5155 | sctx->left_path->nodes[0], left_ii); | ||
| 4887 | ret = send_create_inode_if_needed(sctx); | 5156 | ret = send_create_inode_if_needed(sctx); |
| 4888 | if (ret < 0) | 5157 | if (ret < 0) |
| 4889 | goto out; | 5158 | goto out; |
| @@ -5124,37 +5393,15 @@ static int full_send_tree(struct send_ctx *sctx) | |||
| 5124 | struct btrfs_path *path; | 5393 | struct btrfs_path *path; |
| 5125 | struct extent_buffer *eb; | 5394 | struct extent_buffer *eb; |
| 5126 | int slot; | 5395 | int slot; |
| 5127 | u64 start_ctransid; | ||
| 5128 | u64 ctransid; | ||
| 5129 | 5396 | ||
| 5130 | path = alloc_path_for_send(); | 5397 | path = alloc_path_for_send(); |
| 5131 | if (!path) | 5398 | if (!path) |
| 5132 | return -ENOMEM; | 5399 | return -ENOMEM; |
| 5133 | 5400 | ||
| 5134 | spin_lock(&send_root->root_item_lock); | ||
| 5135 | start_ctransid = btrfs_root_ctransid(&send_root->root_item); | ||
| 5136 | spin_unlock(&send_root->root_item_lock); | ||
| 5137 | |||
| 5138 | key.objectid = BTRFS_FIRST_FREE_OBJECTID; | 5401 | key.objectid = BTRFS_FIRST_FREE_OBJECTID; |
| 5139 | key.type = BTRFS_INODE_ITEM_KEY; | 5402 | key.type = BTRFS_INODE_ITEM_KEY; |
| 5140 | key.offset = 0; | 5403 | key.offset = 0; |
| 5141 | 5404 | ||
| 5142 | /* | ||
| 5143 | * Make sure the tree has not changed after re-joining. We detect this | ||
| 5144 | * by comparing start_ctransid and ctransid. They should always match. | ||
| 5145 | */ | ||
| 5146 | spin_lock(&send_root->root_item_lock); | ||
| 5147 | ctransid = btrfs_root_ctransid(&send_root->root_item); | ||
| 5148 | spin_unlock(&send_root->root_item_lock); | ||
| 5149 | |||
| 5150 | if (ctransid != start_ctransid) { | ||
| 5151 | WARN(1, KERN_WARNING "BTRFS: the root that you're trying to " | ||
| 5152 | "send was modified in between. This is " | ||
| 5153 | "probably a bug.\n"); | ||
| 5154 | ret = -EIO; | ||
| 5155 | goto out; | ||
| 5156 | } | ||
| 5157 | |||
| 5158 | ret = btrfs_search_slot_for_read(send_root, &key, path, 1, 0); | 5405 | ret = btrfs_search_slot_for_read(send_root, &key, path, 1, 0); |
| 5159 | if (ret < 0) | 5406 | if (ret < 0) |
| 5160 | goto out; | 5407 | goto out; |
| @@ -5340,6 +5587,7 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) | |||
| 5340 | 5587 | ||
| 5341 | sctx->pending_dir_moves = RB_ROOT; | 5588 | sctx->pending_dir_moves = RB_ROOT; |
| 5342 | sctx->waiting_dir_moves = RB_ROOT; | 5589 | sctx->waiting_dir_moves = RB_ROOT; |
| 5590 | sctx->orphan_dirs = RB_ROOT; | ||
| 5343 | 5591 | ||
| 5344 | sctx->clone_roots = vzalloc(sizeof(struct clone_root) * | 5592 | sctx->clone_roots = vzalloc(sizeof(struct clone_root) * |
| 5345 | (arg->clone_sources_count + 1)); | 5593 | (arg->clone_sources_count + 1)); |
| @@ -5435,7 +5683,9 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) | |||
| 5435 | NULL); | 5683 | NULL); |
| 5436 | sort_clone_roots = 1; | 5684 | sort_clone_roots = 1; |
| 5437 | 5685 | ||
| 5686 | current->journal_info = (void *)BTRFS_SEND_TRANS_STUB; | ||
| 5438 | ret = send_subvol(sctx); | 5687 | ret = send_subvol(sctx); |
| 5688 | current->journal_info = NULL; | ||
| 5439 | if (ret < 0) | 5689 | if (ret < 0) |
| 5440 | goto out; | 5690 | goto out; |
| 5441 | 5691 | ||
| @@ -5477,6 +5727,16 @@ out: | |||
| 5477 | kfree(dm); | 5727 | kfree(dm); |
| 5478 | } | 5728 | } |
| 5479 | 5729 | ||
| 5730 | WARN_ON(sctx && !ret && !RB_EMPTY_ROOT(&sctx->orphan_dirs)); | ||
| 5731 | while (sctx && !RB_EMPTY_ROOT(&sctx->orphan_dirs)) { | ||
| 5732 | struct rb_node *n; | ||
| 5733 | struct orphan_dir_info *odi; | ||
| 5734 | |||
| 5735 | n = rb_first(&sctx->orphan_dirs); | ||
| 5736 | odi = rb_entry(n, struct orphan_dir_info, node); | ||
| 5737 | free_orphan_dir_info(sctx, odi); | ||
| 5738 | } | ||
| 5739 | |||
| 5480 | if (sort_clone_roots) { | 5740 | if (sort_clone_roots) { |
| 5481 | for (i = 0; i < sctx->clone_roots_cnt; i++) | 5741 | for (i = 0; i < sctx->clone_roots_cnt; i++) |
| 5482 | btrfs_root_dec_send_in_progress( | 5742 | btrfs_root_dec_send_in_progress( |
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index d04db817be5c..5011aadacab8 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c | |||
| @@ -66,6 +66,8 @@ | |||
| 66 | static const struct super_operations btrfs_super_ops; | 66 | static const struct super_operations btrfs_super_ops; |
| 67 | static struct file_system_type btrfs_fs_type; | 67 | static struct file_system_type btrfs_fs_type; |
| 68 | 68 | ||
| 69 | static int btrfs_remount(struct super_block *sb, int *flags, char *data); | ||
| 70 | |||
| 69 | static const char *btrfs_decode_error(int errno) | 71 | static const char *btrfs_decode_error(int errno) |
| 70 | { | 72 | { |
| 71 | char *errstr = "unknown"; | 73 | char *errstr = "unknown"; |
| @@ -1185,6 +1187,26 @@ static struct dentry *mount_subvol(const char *subvol_name, int flags, | |||
| 1185 | mnt = vfs_kern_mount(&btrfs_fs_type, flags, device_name, | 1187 | mnt = vfs_kern_mount(&btrfs_fs_type, flags, device_name, |
| 1186 | newargs); | 1188 | newargs); |
| 1187 | kfree(newargs); | 1189 | kfree(newargs); |
| 1190 | |||
| 1191 | if (PTR_RET(mnt) == -EBUSY) { | ||
| 1192 | if (flags & MS_RDONLY) { | ||
| 1193 | mnt = vfs_kern_mount(&btrfs_fs_type, flags & ~MS_RDONLY, device_name, | ||
| 1194 | newargs); | ||
| 1195 | } else { | ||
| 1196 | int r; | ||
| 1197 | mnt = vfs_kern_mount(&btrfs_fs_type, flags | MS_RDONLY, device_name, | ||
| 1198 | newargs); | ||
| 1199 | if (IS_ERR(mnt)) | ||
| 1200 | return ERR_CAST(mnt); | ||
| 1201 | |||
| 1202 | r = btrfs_remount(mnt->mnt_sb, &flags, NULL); | ||
| 1203 | if (r < 0) { | ||
| 1204 | /* FIXME: release vfsmount mnt ??*/ | ||
| 1205 | return ERR_PTR(r); | ||
| 1206 | } | ||
| 1207 | } | ||
| 1208 | } | ||
| 1209 | |||
| 1188 | if (IS_ERR(mnt)) | 1210 | if (IS_ERR(mnt)) |
| 1189 | return ERR_CAST(mnt); | 1211 | return ERR_CAST(mnt); |
| 1190 | 1212 | ||
| @@ -1305,13 +1327,6 @@ error_fs_info: | |||
| 1305 | return ERR_PTR(error); | 1327 | return ERR_PTR(error); |
| 1306 | } | 1328 | } |
| 1307 | 1329 | ||
| 1308 | static void btrfs_set_max_workers(struct btrfs_workers *workers, int new_limit) | ||
| 1309 | { | ||
| 1310 | spin_lock_irq(&workers->lock); | ||
| 1311 | workers->max_workers = new_limit; | ||
| 1312 | spin_unlock_irq(&workers->lock); | ||
| 1313 | } | ||
| 1314 | |||
| 1315 | static void btrfs_resize_thread_pool(struct btrfs_fs_info *fs_info, | 1330 | static void btrfs_resize_thread_pool(struct btrfs_fs_info *fs_info, |
| 1316 | int new_pool_size, int old_pool_size) | 1331 | int new_pool_size, int old_pool_size) |
| 1317 | { | 1332 | { |
| @@ -1323,21 +1338,20 @@ static void btrfs_resize_thread_pool(struct btrfs_fs_info *fs_info, | |||
| 1323 | btrfs_info(fs_info, "resize thread pool %d -> %d", | 1338 | btrfs_info(fs_info, "resize thread pool %d -> %d", |
| 1324 | old_pool_size, new_pool_size); | 1339 | old_pool_size, new_pool_size); |
| 1325 | 1340 | ||
| 1326 | btrfs_set_max_workers(&fs_info->generic_worker, new_pool_size); | 1341 | btrfs_workqueue_set_max(fs_info->workers, new_pool_size); |
| 1327 | btrfs_set_max_workers(&fs_info->workers, new_pool_size); | 1342 | btrfs_workqueue_set_max(fs_info->delalloc_workers, new_pool_size); |
| 1328 | btrfs_set_max_workers(&fs_info->delalloc_workers, new_pool_size); | 1343 | btrfs_workqueue_set_max(fs_info->submit_workers, new_pool_size); |
| 1329 | btrfs_set_max_workers(&fs_info->submit_workers, new_pool_size); | 1344 | btrfs_workqueue_set_max(fs_info->caching_workers, new_pool_size); |
| 1330 | btrfs_set_max_workers(&fs_info->caching_workers, new_pool_size); | 1345 | btrfs_workqueue_set_max(fs_info->endio_workers, new_pool_size); |
| 1331 | btrfs_set_max_workers(&fs_info->fixup_workers, new_pool_size); | 1346 | btrfs_workqueue_set_max(fs_info->endio_meta_workers, new_pool_size); |
| 1332 | btrfs_set_max_workers(&fs_info->endio_workers, new_pool_size); | 1347 | btrfs_workqueue_set_max(fs_info->endio_meta_write_workers, |
| 1333 | btrfs_set_max_workers(&fs_info->endio_meta_workers, new_pool_size); | 1348 | new_pool_size); |
| 1334 | btrfs_set_max_workers(&fs_info->endio_meta_write_workers, new_pool_size); | 1349 | btrfs_workqueue_set_max(fs_info->endio_write_workers, new_pool_size); |
| 1335 | btrfs_set_max_workers(&fs_info->endio_write_workers, new_pool_size); | 1350 | btrfs_workqueue_set_max(fs_info->endio_freespace_worker, new_pool_size); |
| 1336 | btrfs_set_max_workers(&fs_info->endio_freespace_worker, new_pool_size); | 1351 | btrfs_workqueue_set_max(fs_info->delayed_workers, new_pool_size); |
| 1337 | btrfs_set_max_workers(&fs_info->delayed_workers, new_pool_size); | 1352 | btrfs_workqueue_set_max(fs_info->readahead_workers, new_pool_size); |
| 1338 | btrfs_set_max_workers(&fs_info->readahead_workers, new_pool_size); | 1353 | btrfs_workqueue_set_max(fs_info->scrub_wr_completion_workers, |
| 1339 | btrfs_set_max_workers(&fs_info->scrub_wr_completion_workers, | 1354 | new_pool_size); |
| 1340 | new_pool_size); | ||
| 1341 | } | 1355 | } |
| 1342 | 1356 | ||
| 1343 | static inline void btrfs_remount_prepare(struct btrfs_fs_info *fs_info) | 1357 | static inline void btrfs_remount_prepare(struct btrfs_fs_info *fs_info) |
| @@ -1388,6 +1402,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) | |||
| 1388 | unsigned int old_metadata_ratio = fs_info->metadata_ratio; | 1402 | unsigned int old_metadata_ratio = fs_info->metadata_ratio; |
| 1389 | int ret; | 1403 | int ret; |
| 1390 | 1404 | ||
| 1405 | sync_filesystem(sb); | ||
| 1391 | btrfs_remount_prepare(fs_info); | 1406 | btrfs_remount_prepare(fs_info); |
| 1392 | 1407 | ||
| 1393 | ret = btrfs_parse_options(root, data); | 1408 | ret = btrfs_parse_options(root, data); |
| @@ -1479,6 +1494,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) | |||
| 1479 | sb->s_flags &= ~MS_RDONLY; | 1494 | sb->s_flags &= ~MS_RDONLY; |
| 1480 | } | 1495 | } |
| 1481 | out: | 1496 | out: |
| 1497 | wake_up_process(fs_info->transaction_kthread); | ||
| 1482 | btrfs_remount_cleanup(fs_info, old_opts); | 1498 | btrfs_remount_cleanup(fs_info, old_opts); |
| 1483 | return 0; | 1499 | return 0; |
| 1484 | 1500 | ||
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index 865f4cf9a769..c5eb2143dc66 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c | |||
| @@ -24,6 +24,7 @@ | |||
| 24 | #include <linux/kobject.h> | 24 | #include <linux/kobject.h> |
| 25 | #include <linux/bug.h> | 25 | #include <linux/bug.h> |
| 26 | #include <linux/genhd.h> | 26 | #include <linux/genhd.h> |
| 27 | #include <linux/debugfs.h> | ||
| 27 | 28 | ||
| 28 | #include "ctree.h" | 29 | #include "ctree.h" |
| 29 | #include "disk-io.h" | 30 | #include "disk-io.h" |
| @@ -599,6 +600,12 @@ static int add_device_membership(struct btrfs_fs_info *fs_info) | |||
| 599 | /* /sys/fs/btrfs/ entry */ | 600 | /* /sys/fs/btrfs/ entry */ |
| 600 | static struct kset *btrfs_kset; | 601 | static struct kset *btrfs_kset; |
| 601 | 602 | ||
| 603 | /* /sys/kernel/debug/btrfs */ | ||
| 604 | static struct dentry *btrfs_debugfs_root_dentry; | ||
| 605 | |||
| 606 | /* Debugging tunables and exported data */ | ||
| 607 | u64 btrfs_debugfs_test; | ||
| 608 | |||
| 602 | int btrfs_sysfs_add_one(struct btrfs_fs_info *fs_info) | 609 | int btrfs_sysfs_add_one(struct btrfs_fs_info *fs_info) |
| 603 | { | 610 | { |
| 604 | int error; | 611 | int error; |
| @@ -642,27 +649,41 @@ failure: | |||
| 642 | return error; | 649 | return error; |
| 643 | } | 650 | } |
| 644 | 651 | ||
| 652 | static int btrfs_init_debugfs(void) | ||
| 653 | { | ||
| 654 | #ifdef CONFIG_DEBUG_FS | ||
| 655 | btrfs_debugfs_root_dentry = debugfs_create_dir("btrfs", NULL); | ||
| 656 | if (!btrfs_debugfs_root_dentry) | ||
| 657 | return -ENOMEM; | ||
| 658 | |||
| 659 | debugfs_create_u64("test", S_IRUGO | S_IWUGO, btrfs_debugfs_root_dentry, | ||
| 660 | &btrfs_debugfs_test); | ||
| 661 | #endif | ||
| 662 | return 0; | ||
| 663 | } | ||
| 664 | |||
| 645 | int btrfs_init_sysfs(void) | 665 | int btrfs_init_sysfs(void) |
| 646 | { | 666 | { |
| 647 | int ret; | 667 | int ret; |
| 668 | |||
| 648 | btrfs_kset = kset_create_and_add("btrfs", NULL, fs_kobj); | 669 | btrfs_kset = kset_create_and_add("btrfs", NULL, fs_kobj); |
| 649 | if (!btrfs_kset) | 670 | if (!btrfs_kset) |
| 650 | return -ENOMEM; | 671 | return -ENOMEM; |
| 651 | 672 | ||
| 652 | init_feature_attrs(); | 673 | ret = btrfs_init_debugfs(); |
| 674 | if (ret) | ||
| 675 | return ret; | ||
| 653 | 676 | ||
| 677 | init_feature_attrs(); | ||
| 654 | ret = sysfs_create_group(&btrfs_kset->kobj, &btrfs_feature_attr_group); | 678 | ret = sysfs_create_group(&btrfs_kset->kobj, &btrfs_feature_attr_group); |
| 655 | if (ret) { | ||
| 656 | kset_unregister(btrfs_kset); | ||
| 657 | return ret; | ||
| 658 | } | ||
| 659 | 679 | ||
| 660 | return 0; | 680 | return ret; |
| 661 | } | 681 | } |
| 662 | 682 | ||
| 663 | void btrfs_exit_sysfs(void) | 683 | void btrfs_exit_sysfs(void) |
| 664 | { | 684 | { |
| 665 | sysfs_remove_group(&btrfs_kset->kobj, &btrfs_feature_attr_group); | 685 | sysfs_remove_group(&btrfs_kset->kobj, &btrfs_feature_attr_group); |
| 666 | kset_unregister(btrfs_kset); | 686 | kset_unregister(btrfs_kset); |
| 687 | debugfs_remove_recursive(btrfs_debugfs_root_dentry); | ||
| 667 | } | 688 | } |
| 668 | 689 | ||
diff --git a/fs/btrfs/sysfs.h b/fs/btrfs/sysfs.h index f3cea3710d44..9ab576318a84 100644 --- a/fs/btrfs/sysfs.h +++ b/fs/btrfs/sysfs.h | |||
| @@ -1,6 +1,11 @@ | |||
| 1 | #ifndef _BTRFS_SYSFS_H_ | 1 | #ifndef _BTRFS_SYSFS_H_ |
| 2 | #define _BTRFS_SYSFS_H_ | 2 | #define _BTRFS_SYSFS_H_ |
| 3 | 3 | ||
| 4 | /* | ||
| 5 | * Data exported through sysfs | ||
| 6 | */ | ||
| 7 | extern u64 btrfs_debugfs_test; | ||
| 8 | |||
| 4 | enum btrfs_feature_set { | 9 | enum btrfs_feature_set { |
| 5 | FEAT_COMPAT, | 10 | FEAT_COMPAT, |
| 6 | FEAT_COMPAT_RO, | 11 | FEAT_COMPAT_RO, |
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 34cd83184c4a..7579f6d0b854 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
| @@ -75,10 +75,21 @@ void btrfs_put_transaction(struct btrfs_transaction *transaction) | |||
| 75 | } | 75 | } |
| 76 | } | 76 | } |
| 77 | 77 | ||
| 78 | static noinline void switch_commit_root(struct btrfs_root *root) | 78 | static noinline void switch_commit_roots(struct btrfs_transaction *trans, |
| 79 | struct btrfs_fs_info *fs_info) | ||
| 79 | { | 80 | { |
| 80 | free_extent_buffer(root->commit_root); | 81 | struct btrfs_root *root, *tmp; |
| 81 | root->commit_root = btrfs_root_node(root); | 82 | |
| 83 | down_write(&fs_info->commit_root_sem); | ||
| 84 | list_for_each_entry_safe(root, tmp, &trans->switch_commits, | ||
| 85 | dirty_list) { | ||
| 86 | list_del_init(&root->dirty_list); | ||
| 87 | free_extent_buffer(root->commit_root); | ||
| 88 | root->commit_root = btrfs_root_node(root); | ||
| 89 | if (is_fstree(root->objectid)) | ||
| 90 | btrfs_unpin_free_ino(root); | ||
| 91 | } | ||
| 92 | up_write(&fs_info->commit_root_sem); | ||
| 82 | } | 93 | } |
| 83 | 94 | ||
| 84 | static inline void extwriter_counter_inc(struct btrfs_transaction *trans, | 95 | static inline void extwriter_counter_inc(struct btrfs_transaction *trans, |
| @@ -208,6 +219,7 @@ loop: | |||
| 208 | INIT_LIST_HEAD(&cur_trans->pending_snapshots); | 219 | INIT_LIST_HEAD(&cur_trans->pending_snapshots); |
| 209 | INIT_LIST_HEAD(&cur_trans->ordered_operations); | 220 | INIT_LIST_HEAD(&cur_trans->ordered_operations); |
| 210 | INIT_LIST_HEAD(&cur_trans->pending_chunks); | 221 | INIT_LIST_HEAD(&cur_trans->pending_chunks); |
| 222 | INIT_LIST_HEAD(&cur_trans->switch_commits); | ||
| 211 | list_add_tail(&cur_trans->list, &fs_info->trans_list); | 223 | list_add_tail(&cur_trans->list, &fs_info->trans_list); |
| 212 | extent_io_tree_init(&cur_trans->dirty_pages, | 224 | extent_io_tree_init(&cur_trans->dirty_pages, |
| 213 | fs_info->btree_inode->i_mapping); | 225 | fs_info->btree_inode->i_mapping); |
| @@ -375,7 +387,8 @@ start_transaction(struct btrfs_root *root, u64 num_items, unsigned int type, | |||
| 375 | if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state)) | 387 | if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state)) |
| 376 | return ERR_PTR(-EROFS); | 388 | return ERR_PTR(-EROFS); |
| 377 | 389 | ||
| 378 | if (current->journal_info) { | 390 | if (current->journal_info && |
| 391 | current->journal_info != (void *)BTRFS_SEND_TRANS_STUB) { | ||
| 379 | WARN_ON(type & TRANS_EXTWRITERS); | 392 | WARN_ON(type & TRANS_EXTWRITERS); |
| 380 | h = current->journal_info; | 393 | h = current->journal_info; |
| 381 | h->use_count++; | 394 | h->use_count++; |
| @@ -683,7 +696,8 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, | |||
| 683 | int lock = (trans->type != TRANS_JOIN_NOLOCK); | 696 | int lock = (trans->type != TRANS_JOIN_NOLOCK); |
| 684 | int err = 0; | 697 | int err = 0; |
| 685 | 698 | ||
| 686 | if (--trans->use_count) { | 699 | if (trans->use_count > 1) { |
| 700 | trans->use_count--; | ||
| 687 | trans->block_rsv = trans->orig_rsv; | 701 | trans->block_rsv = trans->orig_rsv; |
| 688 | return 0; | 702 | return 0; |
| 689 | } | 703 | } |
| @@ -731,17 +745,10 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, | |||
| 731 | } | 745 | } |
| 732 | 746 | ||
| 733 | if (lock && ACCESS_ONCE(cur_trans->state) == TRANS_STATE_BLOCKED) { | 747 | if (lock && ACCESS_ONCE(cur_trans->state) == TRANS_STATE_BLOCKED) { |
| 734 | if (throttle) { | 748 | if (throttle) |
| 735 | /* | ||
| 736 | * We may race with somebody else here so end up having | ||
| 737 | * to call end_transaction on ourselves again, so inc | ||
| 738 | * our use_count. | ||
| 739 | */ | ||
| 740 | trans->use_count++; | ||
| 741 | return btrfs_commit_transaction(trans, root); | 749 | return btrfs_commit_transaction(trans, root); |
| 742 | } else { | 750 | else |
| 743 | wake_up_process(info->transaction_kthread); | 751 | wake_up_process(info->transaction_kthread); |
| 744 | } | ||
| 745 | } | 752 | } |
| 746 | 753 | ||
| 747 | if (trans->type & __TRANS_FREEZABLE) | 754 | if (trans->type & __TRANS_FREEZABLE) |
| @@ -925,9 +932,6 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans, | |||
| 925 | return ret; | 932 | return ret; |
| 926 | } | 933 | } |
| 927 | 934 | ||
| 928 | if (root != root->fs_info->extent_root) | ||
| 929 | switch_commit_root(root); | ||
| 930 | |||
| 931 | return 0; | 935 | return 0; |
| 932 | } | 936 | } |
| 933 | 937 | ||
| @@ -983,15 +987,16 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans, | |||
| 983 | list_del_init(next); | 987 | list_del_init(next); |
| 984 | root = list_entry(next, struct btrfs_root, dirty_list); | 988 | root = list_entry(next, struct btrfs_root, dirty_list); |
| 985 | 989 | ||
| 990 | if (root != fs_info->extent_root) | ||
| 991 | list_add_tail(&root->dirty_list, | ||
| 992 | &trans->transaction->switch_commits); | ||
| 986 | ret = update_cowonly_root(trans, root); | 993 | ret = update_cowonly_root(trans, root); |
| 987 | if (ret) | 994 | if (ret) |
| 988 | return ret; | 995 | return ret; |
| 989 | } | 996 | } |
| 990 | 997 | ||
| 991 | down_write(&fs_info->extent_commit_sem); | 998 | list_add_tail(&fs_info->extent_root->dirty_list, |
| 992 | switch_commit_root(fs_info->extent_root); | 999 | &trans->transaction->switch_commits); |
| 993 | up_write(&fs_info->extent_commit_sem); | ||
| 994 | |||
| 995 | btrfs_after_dev_replace_commit(fs_info); | 1000 | btrfs_after_dev_replace_commit(fs_info); |
| 996 | 1001 | ||
| 997 | return 0; | 1002 | return 0; |
| @@ -1048,11 +1053,8 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans, | |||
| 1048 | smp_wmb(); | 1053 | smp_wmb(); |
| 1049 | 1054 | ||
| 1050 | if (root->commit_root != root->node) { | 1055 | if (root->commit_root != root->node) { |
| 1051 | mutex_lock(&root->fs_commit_mutex); | 1056 | list_add_tail(&root->dirty_list, |
| 1052 | switch_commit_root(root); | 1057 | &trans->transaction->switch_commits); |
| 1053 | btrfs_unpin_free_ino(root); | ||
| 1054 | mutex_unlock(&root->fs_commit_mutex); | ||
| 1055 | |||
| 1056 | btrfs_set_root_node(&root->root_item, | 1058 | btrfs_set_root_node(&root->root_item, |
| 1057 | root->node); | 1059 | root->node); |
| 1058 | } | 1060 | } |
| @@ -1578,10 +1580,9 @@ static void cleanup_transaction(struct btrfs_trans_handle *trans, | |||
| 1578 | 1580 | ||
| 1579 | trace_btrfs_transaction_commit(root); | 1581 | trace_btrfs_transaction_commit(root); |
| 1580 | 1582 | ||
| 1581 | btrfs_scrub_continue(root); | ||
| 1582 | |||
| 1583 | if (current->journal_info == trans) | 1583 | if (current->journal_info == trans) |
| 1584 | current->journal_info = NULL; | 1584 | current->journal_info = NULL; |
| 1585 | btrfs_scrub_cancel(root->fs_info); | ||
| 1585 | 1586 | ||
| 1586 | kmem_cache_free(btrfs_trans_handle_cachep, trans); | 1587 | kmem_cache_free(btrfs_trans_handle_cachep, trans); |
| 1587 | } | 1588 | } |
| @@ -1621,7 +1622,7 @@ static int btrfs_flush_all_pending_stuffs(struct btrfs_trans_handle *trans, | |||
| 1621 | static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info) | 1622 | static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info) |
| 1622 | { | 1623 | { |
| 1623 | if (btrfs_test_opt(fs_info->tree_root, FLUSHONCOMMIT)) | 1624 | if (btrfs_test_opt(fs_info->tree_root, FLUSHONCOMMIT)) |
| 1624 | return btrfs_start_delalloc_roots(fs_info, 1); | 1625 | return btrfs_start_delalloc_roots(fs_info, 1, -1); |
| 1625 | return 0; | 1626 | return 0; |
| 1626 | } | 1627 | } |
| 1627 | 1628 | ||
| @@ -1754,7 +1755,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
| 1754 | /* ->aborted might be set after the previous check, so check it */ | 1755 | /* ->aborted might be set after the previous check, so check it */ |
| 1755 | if (unlikely(ACCESS_ONCE(cur_trans->aborted))) { | 1756 | if (unlikely(ACCESS_ONCE(cur_trans->aborted))) { |
| 1756 | ret = cur_trans->aborted; | 1757 | ret = cur_trans->aborted; |
| 1757 | goto cleanup_transaction; | 1758 | goto scrub_continue; |
| 1758 | } | 1759 | } |
| 1759 | /* | 1760 | /* |
| 1760 | * the reloc mutex makes sure that we stop | 1761 | * the reloc mutex makes sure that we stop |
| @@ -1771,7 +1772,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
| 1771 | ret = create_pending_snapshots(trans, root->fs_info); | 1772 | ret = create_pending_snapshots(trans, root->fs_info); |
| 1772 | if (ret) { | 1773 | if (ret) { |
| 1773 | mutex_unlock(&root->fs_info->reloc_mutex); | 1774 | mutex_unlock(&root->fs_info->reloc_mutex); |
| 1774 | goto cleanup_transaction; | 1775 | goto scrub_continue; |
| 1775 | } | 1776 | } |
| 1776 | 1777 | ||
| 1777 | /* | 1778 | /* |
| @@ -1787,13 +1788,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
| 1787 | ret = btrfs_run_delayed_items(trans, root); | 1788 | ret = btrfs_run_delayed_items(trans, root); |
| 1788 | if (ret) { | 1789 | if (ret) { |
| 1789 | mutex_unlock(&root->fs_info->reloc_mutex); | 1790 | mutex_unlock(&root->fs_info->reloc_mutex); |
| 1790 | goto cleanup_transaction; | 1791 | goto scrub_continue; |
| 1791 | } | 1792 | } |
| 1792 | 1793 | ||
| 1793 | ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); | 1794 | ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); |
| 1794 | if (ret) { | 1795 | if (ret) { |
| 1795 | mutex_unlock(&root->fs_info->reloc_mutex); | 1796 | mutex_unlock(&root->fs_info->reloc_mutex); |
| 1796 | goto cleanup_transaction; | 1797 | goto scrub_continue; |
| 1797 | } | 1798 | } |
| 1798 | 1799 | ||
| 1799 | /* | 1800 | /* |
| @@ -1823,7 +1824,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
| 1823 | if (ret) { | 1824 | if (ret) { |
| 1824 | mutex_unlock(&root->fs_info->tree_log_mutex); | 1825 | mutex_unlock(&root->fs_info->tree_log_mutex); |
| 1825 | mutex_unlock(&root->fs_info->reloc_mutex); | 1826 | mutex_unlock(&root->fs_info->reloc_mutex); |
| 1826 | goto cleanup_transaction; | 1827 | goto scrub_continue; |
| 1827 | } | 1828 | } |
| 1828 | 1829 | ||
| 1829 | /* | 1830 | /* |
| @@ -1844,7 +1845,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
| 1844 | if (ret) { | 1845 | if (ret) { |
| 1845 | mutex_unlock(&root->fs_info->tree_log_mutex); | 1846 | mutex_unlock(&root->fs_info->tree_log_mutex); |
| 1846 | mutex_unlock(&root->fs_info->reloc_mutex); | 1847 | mutex_unlock(&root->fs_info->reloc_mutex); |
| 1847 | goto cleanup_transaction; | 1848 | goto scrub_continue; |
| 1848 | } | 1849 | } |
| 1849 | 1850 | ||
| 1850 | /* | 1851 | /* |
| @@ -1855,7 +1856,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
| 1855 | ret = cur_trans->aborted; | 1856 | ret = cur_trans->aborted; |
| 1856 | mutex_unlock(&root->fs_info->tree_log_mutex); | 1857 | mutex_unlock(&root->fs_info->tree_log_mutex); |
| 1857 | mutex_unlock(&root->fs_info->reloc_mutex); | 1858 | mutex_unlock(&root->fs_info->reloc_mutex); |
| 1858 | goto cleanup_transaction; | 1859 | goto scrub_continue; |
| 1859 | } | 1860 | } |
| 1860 | 1861 | ||
| 1861 | btrfs_prepare_extent_commit(trans, root); | 1862 | btrfs_prepare_extent_commit(trans, root); |
| @@ -1864,11 +1865,15 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
| 1864 | 1865 | ||
| 1865 | btrfs_set_root_node(&root->fs_info->tree_root->root_item, | 1866 | btrfs_set_root_node(&root->fs_info->tree_root->root_item, |
| 1866 | root->fs_info->tree_root->node); | 1867 | root->fs_info->tree_root->node); |
| 1867 | switch_commit_root(root->fs_info->tree_root); | 1868 | list_add_tail(&root->fs_info->tree_root->dirty_list, |
| 1869 | &cur_trans->switch_commits); | ||
| 1868 | 1870 | ||
| 1869 | btrfs_set_root_node(&root->fs_info->chunk_root->root_item, | 1871 | btrfs_set_root_node(&root->fs_info->chunk_root->root_item, |
| 1870 | root->fs_info->chunk_root->node); | 1872 | root->fs_info->chunk_root->node); |
| 1871 | switch_commit_root(root->fs_info->chunk_root); | 1873 | list_add_tail(&root->fs_info->chunk_root->dirty_list, |
| 1874 | &cur_trans->switch_commits); | ||
| 1875 | |||
| 1876 | switch_commit_roots(cur_trans, root->fs_info); | ||
| 1872 | 1877 | ||
| 1873 | assert_qgroups_uptodate(trans); | 1878 | assert_qgroups_uptodate(trans); |
| 1874 | update_super_roots(root); | 1879 | update_super_roots(root); |
| @@ -1891,13 +1896,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
| 1891 | btrfs_error(root->fs_info, ret, | 1896 | btrfs_error(root->fs_info, ret, |
| 1892 | "Error while writing out transaction"); | 1897 | "Error while writing out transaction"); |
| 1893 | mutex_unlock(&root->fs_info->tree_log_mutex); | 1898 | mutex_unlock(&root->fs_info->tree_log_mutex); |
| 1894 | goto cleanup_transaction; | 1899 | goto scrub_continue; |
| 1895 | } | 1900 | } |
| 1896 | 1901 | ||
| 1897 | ret = write_ctree_super(trans, root, 0); | 1902 | ret = write_ctree_super(trans, root, 0); |
| 1898 | if (ret) { | 1903 | if (ret) { |
| 1899 | mutex_unlock(&root->fs_info->tree_log_mutex); | 1904 | mutex_unlock(&root->fs_info->tree_log_mutex); |
| 1900 | goto cleanup_transaction; | 1905 | goto scrub_continue; |
| 1901 | } | 1906 | } |
| 1902 | 1907 | ||
| 1903 | /* | 1908 | /* |
| @@ -1940,6 +1945,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
| 1940 | 1945 | ||
| 1941 | return ret; | 1946 | return ret; |
| 1942 | 1947 | ||
| 1948 | scrub_continue: | ||
| 1949 | btrfs_scrub_continue(root); | ||
| 1943 | cleanup_transaction: | 1950 | cleanup_transaction: |
| 1944 | btrfs_trans_release_metadata(trans, root); | 1951 | btrfs_trans_release_metadata(trans, root); |
| 1945 | trans->block_rsv = NULL; | 1952 | trans->block_rsv = NULL; |
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 6ac037e9f9f0..b57b924e8e03 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h | |||
| @@ -57,6 +57,7 @@ struct btrfs_transaction { | |||
| 57 | struct list_head pending_snapshots; | 57 | struct list_head pending_snapshots; |
| 58 | struct list_head ordered_operations; | 58 | struct list_head ordered_operations; |
| 59 | struct list_head pending_chunks; | 59 | struct list_head pending_chunks; |
| 60 | struct list_head switch_commits; | ||
| 60 | struct btrfs_delayed_ref_root delayed_refs; | 61 | struct btrfs_delayed_ref_root delayed_refs; |
| 61 | int aborted; | 62 | int aborted; |
| 62 | }; | 63 | }; |
| @@ -78,6 +79,8 @@ struct btrfs_transaction { | |||
| 78 | #define TRANS_EXTWRITERS (__TRANS_USERSPACE | __TRANS_START | \ | 79 | #define TRANS_EXTWRITERS (__TRANS_USERSPACE | __TRANS_START | \ |
| 79 | __TRANS_ATTACH) | 80 | __TRANS_ATTACH) |
| 80 | 81 | ||
| 82 | #define BTRFS_SEND_TRANS_STUB 1 | ||
| 83 | |||
| 81 | struct btrfs_trans_handle { | 84 | struct btrfs_trans_handle { |
| 82 | u64 transid; | 85 | u64 transid; |
| 83 | u64 bytes_reserved; | 86 | u64 bytes_reserved; |
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 39d83da03e03..e2f45fc02610 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c | |||
| @@ -136,13 +136,20 @@ static noinline int replay_dir_deletes(struct btrfs_trans_handle *trans, | |||
| 136 | * syncing the tree wait for us to finish | 136 | * syncing the tree wait for us to finish |
| 137 | */ | 137 | */ |
| 138 | static int start_log_trans(struct btrfs_trans_handle *trans, | 138 | static int start_log_trans(struct btrfs_trans_handle *trans, |
| 139 | struct btrfs_root *root) | 139 | struct btrfs_root *root, |
| 140 | struct btrfs_log_ctx *ctx) | ||
| 140 | { | 141 | { |
| 142 | int index; | ||
| 141 | int ret; | 143 | int ret; |
| 142 | int err = 0; | ||
| 143 | 144 | ||
| 144 | mutex_lock(&root->log_mutex); | 145 | mutex_lock(&root->log_mutex); |
| 145 | if (root->log_root) { | 146 | if (root->log_root) { |
| 147 | if (ACCESS_ONCE(root->fs_info->last_trans_log_full_commit) == | ||
| 148 | trans->transid) { | ||
| 149 | ret = -EAGAIN; | ||
| 150 | goto out; | ||
| 151 | } | ||
| 152 | |||
| 146 | if (!root->log_start_pid) { | 153 | if (!root->log_start_pid) { |
| 147 | root->log_start_pid = current->pid; | 154 | root->log_start_pid = current->pid; |
| 148 | root->log_multiple_pids = false; | 155 | root->log_multiple_pids = false; |
| @@ -152,27 +159,40 @@ static int start_log_trans(struct btrfs_trans_handle *trans, | |||
| 152 | 159 | ||
| 153 | atomic_inc(&root->log_batch); | 160 | atomic_inc(&root->log_batch); |
| 154 | atomic_inc(&root->log_writers); | 161 | atomic_inc(&root->log_writers); |
| 162 | if (ctx) { | ||
| 163 | index = root->log_transid % 2; | ||
| 164 | list_add_tail(&ctx->list, &root->log_ctxs[index]); | ||
| 165 | ctx->log_transid = root->log_transid; | ||
| 166 | } | ||
| 155 | mutex_unlock(&root->log_mutex); | 167 | mutex_unlock(&root->log_mutex); |
| 156 | return 0; | 168 | return 0; |
| 157 | } | 169 | } |
| 158 | root->log_multiple_pids = false; | 170 | |
| 159 | root->log_start_pid = current->pid; | 171 | ret = 0; |
| 160 | mutex_lock(&root->fs_info->tree_log_mutex); | 172 | mutex_lock(&root->fs_info->tree_log_mutex); |
| 161 | if (!root->fs_info->log_root_tree) { | 173 | if (!root->fs_info->log_root_tree) |
| 162 | ret = btrfs_init_log_root_tree(trans, root->fs_info); | 174 | ret = btrfs_init_log_root_tree(trans, root->fs_info); |
| 163 | if (ret) | 175 | mutex_unlock(&root->fs_info->tree_log_mutex); |
| 164 | err = ret; | 176 | if (ret) |
| 165 | } | 177 | goto out; |
| 166 | if (err == 0 && !root->log_root) { | 178 | |
| 179 | if (!root->log_root) { | ||
| 167 | ret = btrfs_add_log_tree(trans, root); | 180 | ret = btrfs_add_log_tree(trans, root); |
| 168 | if (ret) | 181 | if (ret) |
| 169 | err = ret; | 182 | goto out; |
| 170 | } | 183 | } |
| 171 | mutex_unlock(&root->fs_info->tree_log_mutex); | 184 | root->log_multiple_pids = false; |
| 185 | root->log_start_pid = current->pid; | ||
| 172 | atomic_inc(&root->log_batch); | 186 | atomic_inc(&root->log_batch); |
| 173 | atomic_inc(&root->log_writers); | 187 | atomic_inc(&root->log_writers); |
| 188 | if (ctx) { | ||
| 189 | index = root->log_transid % 2; | ||
| 190 | list_add_tail(&ctx->list, &root->log_ctxs[index]); | ||
| 191 | ctx->log_transid = root->log_transid; | ||
| 192 | } | ||
| 193 | out: | ||
| 174 | mutex_unlock(&root->log_mutex); | 194 | mutex_unlock(&root->log_mutex); |
| 175 | return err; | 195 | return ret; |
| 176 | } | 196 | } |
| 177 | 197 | ||
| 178 | /* | 198 | /* |
| @@ -2359,8 +2379,8 @@ static int update_log_root(struct btrfs_trans_handle *trans, | |||
| 2359 | return ret; | 2379 | return ret; |
| 2360 | } | 2380 | } |
| 2361 | 2381 | ||
| 2362 | static int wait_log_commit(struct btrfs_trans_handle *trans, | 2382 | static void wait_log_commit(struct btrfs_trans_handle *trans, |
| 2363 | struct btrfs_root *root, unsigned long transid) | 2383 | struct btrfs_root *root, int transid) |
| 2364 | { | 2384 | { |
| 2365 | DEFINE_WAIT(wait); | 2385 | DEFINE_WAIT(wait); |
| 2366 | int index = transid % 2; | 2386 | int index = transid % 2; |
| @@ -2375,36 +2395,63 @@ static int wait_log_commit(struct btrfs_trans_handle *trans, | |||
| 2375 | &wait, TASK_UNINTERRUPTIBLE); | 2395 | &wait, TASK_UNINTERRUPTIBLE); |
| 2376 | mutex_unlock(&root->log_mutex); | 2396 | mutex_unlock(&root->log_mutex); |
| 2377 | 2397 | ||
| 2378 | if (root->fs_info->last_trans_log_full_commit != | 2398 | if (root->log_transid_committed < transid && |
| 2379 | trans->transid && root->log_transid < transid + 2 && | ||
| 2380 | atomic_read(&root->log_commit[index])) | 2399 | atomic_read(&root->log_commit[index])) |
| 2381 | schedule(); | 2400 | schedule(); |
| 2382 | 2401 | ||
| 2383 | finish_wait(&root->log_commit_wait[index], &wait); | 2402 | finish_wait(&root->log_commit_wait[index], &wait); |
| 2384 | mutex_lock(&root->log_mutex); | 2403 | mutex_lock(&root->log_mutex); |
| 2385 | } while (root->fs_info->last_trans_log_full_commit != | 2404 | } while (root->log_transid_committed < transid && |
| 2386 | trans->transid && root->log_transid < transid + 2 && | ||
| 2387 | atomic_read(&root->log_commit[index])); | 2405 | atomic_read(&root->log_commit[index])); |
| 2388 | return 0; | ||
| 2389 | } | 2406 | } |
| 2390 | 2407 | ||
| 2391 | static void wait_for_writer(struct btrfs_trans_handle *trans, | 2408 | static void wait_for_writer(struct btrfs_trans_handle *trans, |
| 2392 | struct btrfs_root *root) | 2409 | struct btrfs_root *root) |
| 2393 | { | 2410 | { |
| 2394 | DEFINE_WAIT(wait); | 2411 | DEFINE_WAIT(wait); |
| 2395 | while (root->fs_info->last_trans_log_full_commit != | 2412 | |
| 2396 | trans->transid && atomic_read(&root->log_writers)) { | 2413 | while (atomic_read(&root->log_writers)) { |
| 2397 | prepare_to_wait(&root->log_writer_wait, | 2414 | prepare_to_wait(&root->log_writer_wait, |
| 2398 | &wait, TASK_UNINTERRUPTIBLE); | 2415 | &wait, TASK_UNINTERRUPTIBLE); |
| 2399 | mutex_unlock(&root->log_mutex); | 2416 | mutex_unlock(&root->log_mutex); |
| 2400 | if (root->fs_info->last_trans_log_full_commit != | 2417 | if (atomic_read(&root->log_writers)) |
| 2401 | trans->transid && atomic_read(&root->log_writers)) | ||
| 2402 | schedule(); | 2418 | schedule(); |
| 2403 | mutex_lock(&root->log_mutex); | 2419 | mutex_lock(&root->log_mutex); |
| 2404 | finish_wait(&root->log_writer_wait, &wait); | 2420 | finish_wait(&root->log_writer_wait, &wait); |
| 2405 | } | 2421 | } |
| 2406 | } | 2422 | } |
| 2407 | 2423 | ||
| 2424 | static inline void btrfs_remove_log_ctx(struct btrfs_root *root, | ||
| 2425 | struct btrfs_log_ctx *ctx) | ||
| 2426 | { | ||
| 2427 | if (!ctx) | ||
| 2428 | return; | ||
| 2429 | |||
| 2430 | mutex_lock(&root->log_mutex); | ||
| 2431 | list_del_init(&ctx->list); | ||
| 2432 | mutex_unlock(&root->log_mutex); | ||
| 2433 | } | ||
| 2434 | |||
| 2435 | /* | ||
| 2436 | * Invoked in log mutex context, or be sure there is no other task which | ||
| 2437 | * can access the list. | ||
| 2438 | */ | ||
| 2439 | static inline void btrfs_remove_all_log_ctxs(struct btrfs_root *root, | ||
| 2440 | int index, int error) | ||
| 2441 | { | ||
| 2442 | struct btrfs_log_ctx *ctx; | ||
| 2443 | |||
| 2444 | if (!error) { | ||
| 2445 | INIT_LIST_HEAD(&root->log_ctxs[index]); | ||
| 2446 | return; | ||
| 2447 | } | ||
| 2448 | |||
| 2449 | list_for_each_entry(ctx, &root->log_ctxs[index], list) | ||
| 2450 | ctx->log_ret = error; | ||
| 2451 | |||
| 2452 | INIT_LIST_HEAD(&root->log_ctxs[index]); | ||
| 2453 | } | ||
| 2454 | |||
| 2408 | /* | 2455 | /* |
| 2409 | * btrfs_sync_log does sends a given tree log down to the disk and | 2456 | * btrfs_sync_log does sends a given tree log down to the disk and |
| 2410 | * updates the super blocks to record it. When this call is done, | 2457 | * updates the super blocks to record it. When this call is done, |
| @@ -2418,7 +2465,7 @@ static void wait_for_writer(struct btrfs_trans_handle *trans, | |||
| 2418 | * that has happened. | 2465 | * that has happened. |
| 2419 | */ | 2466 | */ |
| 2420 | int btrfs_sync_log(struct btrfs_trans_handle *trans, | 2467 | int btrfs_sync_log(struct btrfs_trans_handle *trans, |
| 2421 | struct btrfs_root *root) | 2468 | struct btrfs_root *root, struct btrfs_log_ctx *ctx) |
| 2422 | { | 2469 | { |
| 2423 | int index1; | 2470 | int index1; |
| 2424 | int index2; | 2471 | int index2; |
| @@ -2426,22 +2473,30 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
| 2426 | int ret; | 2473 | int ret; |
| 2427 | struct btrfs_root *log = root->log_root; | 2474 | struct btrfs_root *log = root->log_root; |
| 2428 | struct btrfs_root *log_root_tree = root->fs_info->log_root_tree; | 2475 | struct btrfs_root *log_root_tree = root->fs_info->log_root_tree; |
| 2429 | unsigned long log_transid = 0; | 2476 | int log_transid = 0; |
| 2477 | struct btrfs_log_ctx root_log_ctx; | ||
| 2430 | struct blk_plug plug; | 2478 | struct blk_plug plug; |
| 2431 | 2479 | ||
| 2432 | mutex_lock(&root->log_mutex); | 2480 | mutex_lock(&root->log_mutex); |
| 2433 | log_transid = root->log_transid; | 2481 | log_transid = ctx->log_transid; |
| 2434 | index1 = root->log_transid % 2; | 2482 | if (root->log_transid_committed >= log_transid) { |
| 2483 | mutex_unlock(&root->log_mutex); | ||
| 2484 | return ctx->log_ret; | ||
| 2485 | } | ||
| 2486 | |||
| 2487 | index1 = log_transid % 2; | ||
| 2435 | if (atomic_read(&root->log_commit[index1])) { | 2488 | if (atomic_read(&root->log_commit[index1])) { |
| 2436 | wait_log_commit(trans, root, root->log_transid); | 2489 | wait_log_commit(trans, root, log_transid); |
| 2437 | mutex_unlock(&root->log_mutex); | 2490 | mutex_unlock(&root->log_mutex); |
| 2438 | return 0; | 2491 | return ctx->log_ret; |
| 2439 | } | 2492 | } |
| 2493 | ASSERT(log_transid == root->log_transid); | ||
| 2440 | atomic_set(&root->log_commit[index1], 1); | 2494 | atomic_set(&root->log_commit[index1], 1); |
| 2441 | 2495 | ||
| 2442 | /* wait for previous tree log sync to complete */ | 2496 | /* wait for previous tree log sync to complete */ |
| 2443 | if (atomic_read(&root->log_commit[(index1 + 1) % 2])) | 2497 | if (atomic_read(&root->log_commit[(index1 + 1) % 2])) |
| 2444 | wait_log_commit(trans, root, root->log_transid - 1); | 2498 | wait_log_commit(trans, root, log_transid - 1); |
| 2499 | |||
| 2445 | while (1) { | 2500 | while (1) { |
| 2446 | int batch = atomic_read(&root->log_batch); | 2501 | int batch = atomic_read(&root->log_batch); |
| 2447 | /* when we're on an ssd, just kick the log commit out */ | 2502 | /* when we're on an ssd, just kick the log commit out */ |
| @@ -2456,7 +2511,8 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
| 2456 | } | 2511 | } |
| 2457 | 2512 | ||
| 2458 | /* bail out if we need to do a full commit */ | 2513 | /* bail out if we need to do a full commit */ |
| 2459 | if (root->fs_info->last_trans_log_full_commit == trans->transid) { | 2514 | if (ACCESS_ONCE(root->fs_info->last_trans_log_full_commit) == |
| 2515 | trans->transid) { | ||
| 2460 | ret = -EAGAIN; | 2516 | ret = -EAGAIN; |
| 2461 | btrfs_free_logged_extents(log, log_transid); | 2517 | btrfs_free_logged_extents(log, log_transid); |
| 2462 | mutex_unlock(&root->log_mutex); | 2518 | mutex_unlock(&root->log_mutex); |
| @@ -2477,6 +2533,8 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
| 2477 | blk_finish_plug(&plug); | 2533 | blk_finish_plug(&plug); |
| 2478 | btrfs_abort_transaction(trans, root, ret); | 2534 | btrfs_abort_transaction(trans, root, ret); |
| 2479 | btrfs_free_logged_extents(log, log_transid); | 2535 | btrfs_free_logged_extents(log, log_transid); |
| 2536 | ACCESS_ONCE(root->fs_info->last_trans_log_full_commit) = | ||
| 2537 | trans->transid; | ||
| 2480 | mutex_unlock(&root->log_mutex); | 2538 | mutex_unlock(&root->log_mutex); |
| 2481 | goto out; | 2539 | goto out; |
| 2482 | } | 2540 | } |
| @@ -2486,7 +2544,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
| 2486 | root->log_transid++; | 2544 | root->log_transid++; |
| 2487 | log->log_transid = root->log_transid; | 2545 | log->log_transid = root->log_transid; |
| 2488 | root->log_start_pid = 0; | 2546 | root->log_start_pid = 0; |
| 2489 | smp_mb(); | ||
| 2490 | /* | 2547 | /* |
| 2491 | * IO has been started, blocks of the log tree have WRITTEN flag set | 2548 | * IO has been started, blocks of the log tree have WRITTEN flag set |
| 2492 | * in their headers. new modifications of the log will be written to | 2549 | * in their headers. new modifications of the log will be written to |
| @@ -2494,9 +2551,16 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
| 2494 | */ | 2551 | */ |
| 2495 | mutex_unlock(&root->log_mutex); | 2552 | mutex_unlock(&root->log_mutex); |
| 2496 | 2553 | ||
| 2554 | btrfs_init_log_ctx(&root_log_ctx); | ||
| 2555 | |||
| 2497 | mutex_lock(&log_root_tree->log_mutex); | 2556 | mutex_lock(&log_root_tree->log_mutex); |
| 2498 | atomic_inc(&log_root_tree->log_batch); | 2557 | atomic_inc(&log_root_tree->log_batch); |
| 2499 | atomic_inc(&log_root_tree->log_writers); | 2558 | atomic_inc(&log_root_tree->log_writers); |
| 2559 | |||
| 2560 | index2 = log_root_tree->log_transid % 2; | ||
| 2561 | list_add_tail(&root_log_ctx.list, &log_root_tree->log_ctxs[index2]); | ||
| 2562 | root_log_ctx.log_transid = log_root_tree->log_transid; | ||
| 2563 | |||
| 2500 | mutex_unlock(&log_root_tree->log_mutex); | 2564 | mutex_unlock(&log_root_tree->log_mutex); |
| 2501 | 2565 | ||
| 2502 | ret = update_log_root(trans, log); | 2566 | ret = update_log_root(trans, log); |
| @@ -2509,13 +2573,17 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
| 2509 | } | 2573 | } |
| 2510 | 2574 | ||
| 2511 | if (ret) { | 2575 | if (ret) { |
| 2576 | if (!list_empty(&root_log_ctx.list)) | ||
| 2577 | list_del_init(&root_log_ctx.list); | ||
| 2578 | |||
| 2512 | blk_finish_plug(&plug); | 2579 | blk_finish_plug(&plug); |
| 2580 | ACCESS_ONCE(root->fs_info->last_trans_log_full_commit) = | ||
| 2581 | trans->transid; | ||
| 2513 | if (ret != -ENOSPC) { | 2582 | if (ret != -ENOSPC) { |
| 2514 | btrfs_abort_transaction(trans, root, ret); | 2583 | btrfs_abort_transaction(trans, root, ret); |
| 2515 | mutex_unlock(&log_root_tree->log_mutex); | 2584 | mutex_unlock(&log_root_tree->log_mutex); |
| 2516 | goto out; | 2585 | goto out; |
| 2517 | } | 2586 | } |
| 2518 | root->fs_info->last_trans_log_full_commit = trans->transid; | ||
| 2519 | btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); | 2587 | btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); |
| 2520 | btrfs_free_logged_extents(log, log_transid); | 2588 | btrfs_free_logged_extents(log, log_transid); |
| 2521 | mutex_unlock(&log_root_tree->log_mutex); | 2589 | mutex_unlock(&log_root_tree->log_mutex); |
| @@ -2523,22 +2591,29 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
| 2523 | goto out; | 2591 | goto out; |
| 2524 | } | 2592 | } |
| 2525 | 2593 | ||
| 2526 | index2 = log_root_tree->log_transid % 2; | 2594 | if (log_root_tree->log_transid_committed >= root_log_ctx.log_transid) { |
| 2595 | mutex_unlock(&log_root_tree->log_mutex); | ||
| 2596 | ret = root_log_ctx.log_ret; | ||
| 2597 | goto out; | ||
| 2598 | } | ||
| 2599 | |||
| 2600 | index2 = root_log_ctx.log_transid % 2; | ||
| 2527 | if (atomic_read(&log_root_tree->log_commit[index2])) { | 2601 | if (atomic_read(&log_root_tree->log_commit[index2])) { |
| 2528 | blk_finish_plug(&plug); | 2602 | blk_finish_plug(&plug); |
| 2529 | btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); | 2603 | btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); |
| 2530 | wait_log_commit(trans, log_root_tree, | 2604 | wait_log_commit(trans, log_root_tree, |
| 2531 | log_root_tree->log_transid); | 2605 | root_log_ctx.log_transid); |
| 2532 | btrfs_free_logged_extents(log, log_transid); | 2606 | btrfs_free_logged_extents(log, log_transid); |
| 2533 | mutex_unlock(&log_root_tree->log_mutex); | 2607 | mutex_unlock(&log_root_tree->log_mutex); |
| 2534 | ret = 0; | 2608 | ret = root_log_ctx.log_ret; |
| 2535 | goto out; | 2609 | goto out; |
| 2536 | } | 2610 | } |
| 2611 | ASSERT(root_log_ctx.log_transid == log_root_tree->log_transid); | ||
| 2537 | atomic_set(&log_root_tree->log_commit[index2], 1); | 2612 | atomic_set(&log_root_tree->log_commit[index2], 1); |
| 2538 | 2613 | ||
| 2539 | if (atomic_read(&log_root_tree->log_commit[(index2 + 1) % 2])) { | 2614 | if (atomic_read(&log_root_tree->log_commit[(index2 + 1) % 2])) { |
| 2540 | wait_log_commit(trans, log_root_tree, | 2615 | wait_log_commit(trans, log_root_tree, |
| 2541 | log_root_tree->log_transid - 1); | 2616 | root_log_ctx.log_transid - 1); |
| 2542 | } | 2617 | } |
| 2543 | 2618 | ||
| 2544 | wait_for_writer(trans, log_root_tree); | 2619 | wait_for_writer(trans, log_root_tree); |
| @@ -2547,7 +2622,8 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
| 2547 | * now that we've moved on to the tree of log tree roots, | 2622 | * now that we've moved on to the tree of log tree roots, |
| 2548 | * check the full commit flag again | 2623 | * check the full commit flag again |
| 2549 | */ | 2624 | */ |
| 2550 | if (root->fs_info->last_trans_log_full_commit == trans->transid) { | 2625 | if (ACCESS_ONCE(root->fs_info->last_trans_log_full_commit) == |
| 2626 | trans->transid) { | ||
| 2551 | blk_finish_plug(&plug); | 2627 | blk_finish_plug(&plug); |
| 2552 | btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); | 2628 | btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); |
| 2553 | btrfs_free_logged_extents(log, log_transid); | 2629 | btrfs_free_logged_extents(log, log_transid); |
| @@ -2561,6 +2637,8 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
| 2561 | EXTENT_DIRTY | EXTENT_NEW); | 2637 | EXTENT_DIRTY | EXTENT_NEW); |
| 2562 | blk_finish_plug(&plug); | 2638 | blk_finish_plug(&plug); |
| 2563 | if (ret) { | 2639 | if (ret) { |
| 2640 | ACCESS_ONCE(root->fs_info->last_trans_log_full_commit) = | ||
| 2641 | trans->transid; | ||
| 2564 | btrfs_abort_transaction(trans, root, ret); | 2642 | btrfs_abort_transaction(trans, root, ret); |
| 2565 | btrfs_free_logged_extents(log, log_transid); | 2643 | btrfs_free_logged_extents(log, log_transid); |
| 2566 | mutex_unlock(&log_root_tree->log_mutex); | 2644 | mutex_unlock(&log_root_tree->log_mutex); |
| @@ -2578,8 +2656,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
| 2578 | btrfs_header_level(log_root_tree->node)); | 2656 | btrfs_header_level(log_root_tree->node)); |
| 2579 | 2657 | ||
| 2580 | log_root_tree->log_transid++; | 2658 | log_root_tree->log_transid++; |
| 2581 | smp_mb(); | ||
| 2582 | |||
| 2583 | mutex_unlock(&log_root_tree->log_mutex); | 2659 | mutex_unlock(&log_root_tree->log_mutex); |
| 2584 | 2660 | ||
| 2585 | /* | 2661 | /* |
| @@ -2591,6 +2667,8 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
| 2591 | */ | 2667 | */ |
| 2592 | ret = write_ctree_super(trans, root->fs_info->tree_root, 1); | 2668 | ret = write_ctree_super(trans, root->fs_info->tree_root, 1); |
| 2593 | if (ret) { | 2669 | if (ret) { |
| 2670 | ACCESS_ONCE(root->fs_info->last_trans_log_full_commit) = | ||
| 2671 | trans->transid; | ||
| 2594 | btrfs_abort_transaction(trans, root, ret); | 2672 | btrfs_abort_transaction(trans, root, ret); |
| 2595 | goto out_wake_log_root; | 2673 | goto out_wake_log_root; |
| 2596 | } | 2674 | } |
| @@ -2601,13 +2679,28 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
| 2601 | mutex_unlock(&root->log_mutex); | 2679 | mutex_unlock(&root->log_mutex); |
| 2602 | 2680 | ||
| 2603 | out_wake_log_root: | 2681 | out_wake_log_root: |
| 2682 | /* | ||
| 2683 | * We needn't get log_mutex here because we are sure all | ||
| 2684 | * the other tasks are blocked. | ||
| 2685 | */ | ||
| 2686 | btrfs_remove_all_log_ctxs(log_root_tree, index2, ret); | ||
| 2687 | |||
| 2688 | mutex_lock(&log_root_tree->log_mutex); | ||
| 2689 | log_root_tree->log_transid_committed++; | ||
| 2604 | atomic_set(&log_root_tree->log_commit[index2], 0); | 2690 | atomic_set(&log_root_tree->log_commit[index2], 0); |
| 2605 | smp_mb(); | 2691 | mutex_unlock(&log_root_tree->log_mutex); |
| 2692 | |||
| 2606 | if (waitqueue_active(&log_root_tree->log_commit_wait[index2])) | 2693 | if (waitqueue_active(&log_root_tree->log_commit_wait[index2])) |
| 2607 | wake_up(&log_root_tree->log_commit_wait[index2]); | 2694 | wake_up(&log_root_tree->log_commit_wait[index2]); |
| 2608 | out: | 2695 | out: |
| 2696 | /* See above. */ | ||
| 2697 | btrfs_remove_all_log_ctxs(root, index1, ret); | ||
| 2698 | |||
| 2699 | mutex_lock(&root->log_mutex); | ||
| 2700 | root->log_transid_committed++; | ||
| 2609 | atomic_set(&root->log_commit[index1], 0); | 2701 | atomic_set(&root->log_commit[index1], 0); |
| 2610 | smp_mb(); | 2702 | mutex_unlock(&root->log_mutex); |
| 2703 | |||
| 2611 | if (waitqueue_active(&root->log_commit_wait[index1])) | 2704 | if (waitqueue_active(&root->log_commit_wait[index1])) |
| 2612 | wake_up(&root->log_commit_wait[index1]); | 2705 | wake_up(&root->log_commit_wait[index1]); |
| 2613 | return ret; | 2706 | return ret; |
| @@ -3479,7 +3572,8 @@ static int extent_cmp(void *priv, struct list_head *a, struct list_head *b) | |||
| 3479 | 3572 | ||
| 3480 | static int log_one_extent(struct btrfs_trans_handle *trans, | 3573 | static int log_one_extent(struct btrfs_trans_handle *trans, |
| 3481 | struct inode *inode, struct btrfs_root *root, | 3574 | struct inode *inode, struct btrfs_root *root, |
| 3482 | struct extent_map *em, struct btrfs_path *path) | 3575 | struct extent_map *em, struct btrfs_path *path, |
| 3576 | struct list_head *logged_list) | ||
| 3483 | { | 3577 | { |
| 3484 | struct btrfs_root *log = root->log_root; | 3578 | struct btrfs_root *log = root->log_root; |
| 3485 | struct btrfs_file_extent_item *fi; | 3579 | struct btrfs_file_extent_item *fi; |
| @@ -3495,7 +3589,6 @@ static int log_one_extent(struct btrfs_trans_handle *trans, | |||
| 3495 | u64 extent_offset = em->start - em->orig_start; | 3589 | u64 extent_offset = em->start - em->orig_start; |
| 3496 | u64 block_len; | 3590 | u64 block_len; |
| 3497 | int ret; | 3591 | int ret; |
| 3498 | int index = log->log_transid % 2; | ||
| 3499 | bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; | 3592 | bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; |
| 3500 | int extent_inserted = 0; | 3593 | int extent_inserted = 0; |
| 3501 | 3594 | ||
| @@ -3579,17 +3672,12 @@ static int log_one_extent(struct btrfs_trans_handle *trans, | |||
| 3579 | * First check and see if our csums are on our outstanding ordered | 3672 | * First check and see if our csums are on our outstanding ordered |
| 3580 | * extents. | 3673 | * extents. |
| 3581 | */ | 3674 | */ |
| 3582 | again: | 3675 | list_for_each_entry(ordered, logged_list, log_list) { |
| 3583 | spin_lock_irq(&log->log_extents_lock[index]); | ||
| 3584 | list_for_each_entry(ordered, &log->logged_list[index], log_list) { | ||
| 3585 | struct btrfs_ordered_sum *sum; | 3676 | struct btrfs_ordered_sum *sum; |
| 3586 | 3677 | ||
| 3587 | if (!mod_len) | 3678 | if (!mod_len) |
| 3588 | break; | 3679 | break; |
| 3589 | 3680 | ||
| 3590 | if (ordered->inode != inode) | ||
| 3591 | continue; | ||
| 3592 | |||
| 3593 | if (ordered->file_offset + ordered->len <= mod_start || | 3681 | if (ordered->file_offset + ordered->len <= mod_start || |
| 3594 | mod_start + mod_len <= ordered->file_offset) | 3682 | mod_start + mod_len <= ordered->file_offset) |
| 3595 | continue; | 3683 | continue; |
| @@ -3632,12 +3720,6 @@ again: | |||
| 3632 | if (test_and_set_bit(BTRFS_ORDERED_LOGGED_CSUM, | 3720 | if (test_and_set_bit(BTRFS_ORDERED_LOGGED_CSUM, |
| 3633 | &ordered->flags)) | 3721 | &ordered->flags)) |
| 3634 | continue; | 3722 | continue; |
| 3635 | atomic_inc(&ordered->refs); | ||
| 3636 | spin_unlock_irq(&log->log_extents_lock[index]); | ||
| 3637 | /* | ||
| 3638 | * we've dropped the lock, we must either break or | ||
| 3639 | * start over after this. | ||
| 3640 | */ | ||
| 3641 | 3723 | ||
| 3642 | if (ordered->csum_bytes_left) { | 3724 | if (ordered->csum_bytes_left) { |
| 3643 | btrfs_start_ordered_extent(inode, ordered, 0); | 3725 | btrfs_start_ordered_extent(inode, ordered, 0); |
| @@ -3647,16 +3729,11 @@ again: | |||
| 3647 | 3729 | ||
| 3648 | list_for_each_entry(sum, &ordered->list, list) { | 3730 | list_for_each_entry(sum, &ordered->list, list) { |
| 3649 | ret = btrfs_csum_file_blocks(trans, log, sum); | 3731 | ret = btrfs_csum_file_blocks(trans, log, sum); |
| 3650 | if (ret) { | 3732 | if (ret) |
| 3651 | btrfs_put_ordered_extent(ordered); | ||
| 3652 | goto unlocked; | 3733 | goto unlocked; |
| 3653 | } | ||
| 3654 | } | 3734 | } |
| 3655 | btrfs_put_ordered_extent(ordered); | ||
| 3656 | goto again; | ||
| 3657 | 3735 | ||
| 3658 | } | 3736 | } |
| 3659 | spin_unlock_irq(&log->log_extents_lock[index]); | ||
| 3660 | unlocked: | 3737 | unlocked: |
| 3661 | 3738 | ||
| 3662 | if (!mod_len || ret) | 3739 | if (!mod_len || ret) |
| @@ -3694,7 +3771,8 @@ unlocked: | |||
| 3694 | static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans, | 3771 | static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans, |
| 3695 | struct btrfs_root *root, | 3772 | struct btrfs_root *root, |
| 3696 | struct inode *inode, | 3773 | struct inode *inode, |
| 3697 | struct btrfs_path *path) | 3774 | struct btrfs_path *path, |
| 3775 | struct list_head *logged_list) | ||
| 3698 | { | 3776 | { |
| 3699 | struct extent_map *em, *n; | 3777 | struct extent_map *em, *n; |
| 3700 | struct list_head extents; | 3778 | struct list_head extents; |
| @@ -3752,7 +3830,7 @@ process: | |||
| 3752 | 3830 | ||
| 3753 | write_unlock(&tree->lock); | 3831 | write_unlock(&tree->lock); |
| 3754 | 3832 | ||
| 3755 | ret = log_one_extent(trans, inode, root, em, path); | 3833 | ret = log_one_extent(trans, inode, root, em, path, logged_list); |
| 3756 | write_lock(&tree->lock); | 3834 | write_lock(&tree->lock); |
| 3757 | clear_em_logging(tree, em); | 3835 | clear_em_logging(tree, em); |
| 3758 | free_extent_map(em); | 3836 | free_extent_map(em); |
| @@ -3788,6 +3866,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, | |||
| 3788 | struct btrfs_key max_key; | 3866 | struct btrfs_key max_key; |
| 3789 | struct btrfs_root *log = root->log_root; | 3867 | struct btrfs_root *log = root->log_root; |
| 3790 | struct extent_buffer *src = NULL; | 3868 | struct extent_buffer *src = NULL; |
| 3869 | LIST_HEAD(logged_list); | ||
| 3791 | u64 last_extent = 0; | 3870 | u64 last_extent = 0; |
| 3792 | int err = 0; | 3871 | int err = 0; |
| 3793 | int ret; | 3872 | int ret; |
| @@ -3836,7 +3915,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, | |||
| 3836 | 3915 | ||
| 3837 | mutex_lock(&BTRFS_I(inode)->log_mutex); | 3916 | mutex_lock(&BTRFS_I(inode)->log_mutex); |
| 3838 | 3917 | ||
| 3839 | btrfs_get_logged_extents(log, inode); | 3918 | btrfs_get_logged_extents(inode, &logged_list); |
| 3840 | 3919 | ||
| 3841 | /* | 3920 | /* |
| 3842 | * a brute force approach to making sure we get the most uptodate | 3921 | * a brute force approach to making sure we get the most uptodate |
| @@ -3962,7 +4041,8 @@ log_extents: | |||
| 3962 | btrfs_release_path(path); | 4041 | btrfs_release_path(path); |
| 3963 | btrfs_release_path(dst_path); | 4042 | btrfs_release_path(dst_path); |
| 3964 | if (fast_search) { | 4043 | if (fast_search) { |
| 3965 | ret = btrfs_log_changed_extents(trans, root, inode, dst_path); | 4044 | ret = btrfs_log_changed_extents(trans, root, inode, dst_path, |
| 4045 | &logged_list); | ||
| 3966 | if (ret) { | 4046 | if (ret) { |
| 3967 | err = ret; | 4047 | err = ret; |
| 3968 | goto out_unlock; | 4048 | goto out_unlock; |
| @@ -3987,8 +4067,10 @@ log_extents: | |||
| 3987 | BTRFS_I(inode)->logged_trans = trans->transid; | 4067 | BTRFS_I(inode)->logged_trans = trans->transid; |
| 3988 | BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->last_sub_trans; | 4068 | BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->last_sub_trans; |
| 3989 | out_unlock: | 4069 | out_unlock: |
| 3990 | if (err) | 4070 | if (unlikely(err)) |
| 3991 | btrfs_free_logged_extents(log, log->log_transid); | 4071 | btrfs_put_logged_extents(&logged_list); |
| 4072 | else | ||
| 4073 | btrfs_submit_logged_extents(&logged_list, log); | ||
| 3992 | mutex_unlock(&BTRFS_I(inode)->log_mutex); | 4074 | mutex_unlock(&BTRFS_I(inode)->log_mutex); |
| 3993 | 4075 | ||
| 3994 | btrfs_free_path(path); | 4076 | btrfs_free_path(path); |
| @@ -4079,7 +4161,8 @@ out: | |||
| 4079 | */ | 4161 | */ |
| 4080 | static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, | 4162 | static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, |
| 4081 | struct btrfs_root *root, struct inode *inode, | 4163 | struct btrfs_root *root, struct inode *inode, |
| 4082 | struct dentry *parent, int exists_only) | 4164 | struct dentry *parent, int exists_only, |
| 4165 | struct btrfs_log_ctx *ctx) | ||
| 4083 | { | 4166 | { |
| 4084 | int inode_only = exists_only ? LOG_INODE_EXISTS : LOG_INODE_ALL; | 4167 | int inode_only = exists_only ? LOG_INODE_EXISTS : LOG_INODE_ALL; |
| 4085 | struct super_block *sb; | 4168 | struct super_block *sb; |
| @@ -4116,9 +4199,9 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, | |||
| 4116 | goto end_no_trans; | 4199 | goto end_no_trans; |
| 4117 | } | 4200 | } |
| 4118 | 4201 | ||
| 4119 | ret = start_log_trans(trans, root); | 4202 | ret = start_log_trans(trans, root, ctx); |
| 4120 | if (ret) | 4203 | if (ret) |
| 4121 | goto end_trans; | 4204 | goto end_no_trans; |
| 4122 | 4205 | ||
| 4123 | ret = btrfs_log_inode(trans, root, inode, inode_only); | 4206 | ret = btrfs_log_inode(trans, root, inode, inode_only); |
| 4124 | if (ret) | 4207 | if (ret) |
| @@ -4166,6 +4249,9 @@ end_trans: | |||
| 4166 | root->fs_info->last_trans_log_full_commit = trans->transid; | 4249 | root->fs_info->last_trans_log_full_commit = trans->transid; |
| 4167 | ret = 1; | 4250 | ret = 1; |
| 4168 | } | 4251 | } |
| 4252 | |||
| 4253 | if (ret) | ||
| 4254 | btrfs_remove_log_ctx(root, ctx); | ||
| 4169 | btrfs_end_log_trans(root); | 4255 | btrfs_end_log_trans(root); |
| 4170 | end_no_trans: | 4256 | end_no_trans: |
| 4171 | return ret; | 4257 | return ret; |
| @@ -4178,12 +4264,14 @@ end_no_trans: | |||
| 4178 | * data on disk. | 4264 | * data on disk. |
| 4179 | */ | 4265 | */ |
| 4180 | int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans, | 4266 | int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans, |
| 4181 | struct btrfs_root *root, struct dentry *dentry) | 4267 | struct btrfs_root *root, struct dentry *dentry, |
| 4268 | struct btrfs_log_ctx *ctx) | ||
| 4182 | { | 4269 | { |
| 4183 | struct dentry *parent = dget_parent(dentry); | 4270 | struct dentry *parent = dget_parent(dentry); |
| 4184 | int ret; | 4271 | int ret; |
| 4185 | 4272 | ||
| 4186 | ret = btrfs_log_inode_parent(trans, root, dentry->d_inode, parent, 0); | 4273 | ret = btrfs_log_inode_parent(trans, root, dentry->d_inode, parent, |
| 4274 | 0, ctx); | ||
| 4187 | dput(parent); | 4275 | dput(parent); |
| 4188 | 4276 | ||
| 4189 | return ret; | 4277 | return ret; |
| @@ -4420,6 +4508,6 @@ int btrfs_log_new_name(struct btrfs_trans_handle *trans, | |||
| 4420 | root->fs_info->last_trans_committed)) | 4508 | root->fs_info->last_trans_committed)) |
| 4421 | return 0; | 4509 | return 0; |
| 4422 | 4510 | ||
| 4423 | return btrfs_log_inode_parent(trans, root, inode, parent, 1); | 4511 | return btrfs_log_inode_parent(trans, root, inode, parent, 1, NULL); |
| 4424 | } | 4512 | } |
| 4425 | 4513 | ||
diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h index 1d4ae0d15a70..91b145fce333 100644 --- a/fs/btrfs/tree-log.h +++ b/fs/btrfs/tree-log.h | |||
| @@ -22,14 +22,28 @@ | |||
| 22 | /* return value for btrfs_log_dentry_safe that means we don't need to log it at all */ | 22 | /* return value for btrfs_log_dentry_safe that means we don't need to log it at all */ |
| 23 | #define BTRFS_NO_LOG_SYNC 256 | 23 | #define BTRFS_NO_LOG_SYNC 256 |
| 24 | 24 | ||
| 25 | struct btrfs_log_ctx { | ||
| 26 | int log_ret; | ||
| 27 | int log_transid; | ||
| 28 | struct list_head list; | ||
| 29 | }; | ||
| 30 | |||
| 31 | static inline void btrfs_init_log_ctx(struct btrfs_log_ctx *ctx) | ||
| 32 | { | ||
| 33 | ctx->log_ret = 0; | ||
| 34 | ctx->log_transid = 0; | ||
| 35 | INIT_LIST_HEAD(&ctx->list); | ||
| 36 | } | ||
| 37 | |||
| 25 | int btrfs_sync_log(struct btrfs_trans_handle *trans, | 38 | int btrfs_sync_log(struct btrfs_trans_handle *trans, |
| 26 | struct btrfs_root *root); | 39 | struct btrfs_root *root, struct btrfs_log_ctx *ctx); |
| 27 | int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root); | 40 | int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root); |
| 28 | int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans, | 41 | int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans, |
| 29 | struct btrfs_fs_info *fs_info); | 42 | struct btrfs_fs_info *fs_info); |
| 30 | int btrfs_recover_log_trees(struct btrfs_root *tree_root); | 43 | int btrfs_recover_log_trees(struct btrfs_root *tree_root); |
| 31 | int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans, | 44 | int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans, |
| 32 | struct btrfs_root *root, struct dentry *dentry); | 45 | struct btrfs_root *root, struct dentry *dentry, |
| 46 | struct btrfs_log_ctx *ctx); | ||
| 33 | int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, | 47 | int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, |
| 34 | struct btrfs_root *root, | 48 | struct btrfs_root *root, |
| 35 | const char *name, int name_len, | 49 | const char *name, int name_len, |
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index bab0b84d8f80..49d7fab73360 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
| @@ -415,7 +415,8 @@ loop_lock: | |||
| 415 | device->running_pending = 1; | 415 | device->running_pending = 1; |
| 416 | 416 | ||
| 417 | spin_unlock(&device->io_lock); | 417 | spin_unlock(&device->io_lock); |
| 418 | btrfs_requeue_work(&device->work); | 418 | btrfs_queue_work(fs_info->submit_workers, |
| 419 | &device->work); | ||
| 419 | goto done; | 420 | goto done; |
| 420 | } | 421 | } |
| 421 | /* unplug every 64 requests just for good measure */ | 422 | /* unplug every 64 requests just for good measure */ |
| @@ -447,6 +448,14 @@ static void pending_bios_fn(struct btrfs_work *work) | |||
| 447 | run_scheduled_bios(device); | 448 | run_scheduled_bios(device); |
| 448 | } | 449 | } |
| 449 | 450 | ||
| 451 | /* | ||
| 452 | * Add new device to list of registered devices | ||
| 453 | * | ||
| 454 | * Returns: | ||
| 455 | * 1 - first time device is seen | ||
| 456 | * 0 - device already known | ||
| 457 | * < 0 - error | ||
| 458 | */ | ||
| 450 | static noinline int device_list_add(const char *path, | 459 | static noinline int device_list_add(const char *path, |
| 451 | struct btrfs_super_block *disk_super, | 460 | struct btrfs_super_block *disk_super, |
| 452 | u64 devid, struct btrfs_fs_devices **fs_devices_ret) | 461 | u64 devid, struct btrfs_fs_devices **fs_devices_ret) |
| @@ -454,6 +463,7 @@ static noinline int device_list_add(const char *path, | |||
| 454 | struct btrfs_device *device; | 463 | struct btrfs_device *device; |
| 455 | struct btrfs_fs_devices *fs_devices; | 464 | struct btrfs_fs_devices *fs_devices; |
| 456 | struct rcu_string *name; | 465 | struct rcu_string *name; |
| 466 | int ret = 0; | ||
| 457 | u64 found_transid = btrfs_super_generation(disk_super); | 467 | u64 found_transid = btrfs_super_generation(disk_super); |
| 458 | 468 | ||
| 459 | fs_devices = find_fsid(disk_super->fsid); | 469 | fs_devices = find_fsid(disk_super->fsid); |
| @@ -494,6 +504,7 @@ static noinline int device_list_add(const char *path, | |||
| 494 | fs_devices->num_devices++; | 504 | fs_devices->num_devices++; |
| 495 | mutex_unlock(&fs_devices->device_list_mutex); | 505 | mutex_unlock(&fs_devices->device_list_mutex); |
| 496 | 506 | ||
| 507 | ret = 1; | ||
| 497 | device->fs_devices = fs_devices; | 508 | device->fs_devices = fs_devices; |
| 498 | } else if (!device->name || strcmp(device->name->str, path)) { | 509 | } else if (!device->name || strcmp(device->name->str, path)) { |
| 499 | name = rcu_string_strdup(path, GFP_NOFS); | 510 | name = rcu_string_strdup(path, GFP_NOFS); |
| @@ -512,7 +523,8 @@ static noinline int device_list_add(const char *path, | |||
| 512 | fs_devices->latest_trans = found_transid; | 523 | fs_devices->latest_trans = found_transid; |
| 513 | } | 524 | } |
| 514 | *fs_devices_ret = fs_devices; | 525 | *fs_devices_ret = fs_devices; |
| 515 | return 0; | 526 | |
| 527 | return ret; | ||
| 516 | } | 528 | } |
| 517 | 529 | ||
| 518 | static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig) | 530 | static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig) |
| @@ -909,17 +921,19 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, | |||
| 909 | transid = btrfs_super_generation(disk_super); | 921 | transid = btrfs_super_generation(disk_super); |
| 910 | total_devices = btrfs_super_num_devices(disk_super); | 922 | total_devices = btrfs_super_num_devices(disk_super); |
| 911 | 923 | ||
| 912 | if (disk_super->label[0]) { | ||
| 913 | if (disk_super->label[BTRFS_LABEL_SIZE - 1]) | ||
| 914 | disk_super->label[BTRFS_LABEL_SIZE - 1] = '\0'; | ||
| 915 | printk(KERN_INFO "BTRFS: device label %s ", disk_super->label); | ||
| 916 | } else { | ||
| 917 | printk(KERN_INFO "BTRFS: device fsid %pU ", disk_super->fsid); | ||
| 918 | } | ||
| 919 | |||
| 920 | printk(KERN_CONT "devid %llu transid %llu %s\n", devid, transid, path); | ||
| 921 | |||
| 922 | ret = device_list_add(path, disk_super, devid, fs_devices_ret); | 924 | ret = device_list_add(path, disk_super, devid, fs_devices_ret); |
| 925 | if (ret > 0) { | ||
| 926 | if (disk_super->label[0]) { | ||
| 927 | if (disk_super->label[BTRFS_LABEL_SIZE - 1]) | ||
| 928 | disk_super->label[BTRFS_LABEL_SIZE - 1] = '\0'; | ||
| 929 | printk(KERN_INFO "BTRFS: device label %s ", disk_super->label); | ||
| 930 | } else { | ||
| 931 | printk(KERN_INFO "BTRFS: device fsid %pU ", disk_super->fsid); | ||
| 932 | } | ||
| 933 | |||
| 934 | printk(KERN_CONT "devid %llu transid %llu %s\n", devid, transid, path); | ||
| 935 | ret = 0; | ||
| 936 | } | ||
| 923 | if (!ret && fs_devices_ret) | 937 | if (!ret && fs_devices_ret) |
| 924 | (*fs_devices_ret)->total_devices = total_devices; | 938 | (*fs_devices_ret)->total_devices = total_devices; |
| 925 | 939 | ||
| @@ -5263,6 +5277,7 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, | |||
| 5263 | static void btrfs_end_bio(struct bio *bio, int err) | 5277 | static void btrfs_end_bio(struct bio *bio, int err) |
| 5264 | { | 5278 | { |
| 5265 | struct btrfs_bio *bbio = bio->bi_private; | 5279 | struct btrfs_bio *bbio = bio->bi_private; |
| 5280 | struct btrfs_device *dev = bbio->stripes[0].dev; | ||
| 5266 | int is_orig_bio = 0; | 5281 | int is_orig_bio = 0; |
| 5267 | 5282 | ||
| 5268 | if (err) { | 5283 | if (err) { |
| @@ -5270,7 +5285,6 @@ static void btrfs_end_bio(struct bio *bio, int err) | |||
| 5270 | if (err == -EIO || err == -EREMOTEIO) { | 5285 | if (err == -EIO || err == -EREMOTEIO) { |
| 5271 | unsigned int stripe_index = | 5286 | unsigned int stripe_index = |
| 5272 | btrfs_io_bio(bio)->stripe_index; | 5287 | btrfs_io_bio(bio)->stripe_index; |
| 5273 | struct btrfs_device *dev; | ||
| 5274 | 5288 | ||
| 5275 | BUG_ON(stripe_index >= bbio->num_stripes); | 5289 | BUG_ON(stripe_index >= bbio->num_stripes); |
| 5276 | dev = bbio->stripes[stripe_index].dev; | 5290 | dev = bbio->stripes[stripe_index].dev; |
| @@ -5292,6 +5306,8 @@ static void btrfs_end_bio(struct bio *bio, int err) | |||
| 5292 | if (bio == bbio->orig_bio) | 5306 | if (bio == bbio->orig_bio) |
| 5293 | is_orig_bio = 1; | 5307 | is_orig_bio = 1; |
| 5294 | 5308 | ||
| 5309 | btrfs_bio_counter_dec(bbio->fs_info); | ||
| 5310 | |||
| 5295 | if (atomic_dec_and_test(&bbio->stripes_pending)) { | 5311 | if (atomic_dec_and_test(&bbio->stripes_pending)) { |
| 5296 | if (!is_orig_bio) { | 5312 | if (!is_orig_bio) { |
| 5297 | bio_put(bio); | 5313 | bio_put(bio); |
| @@ -5328,13 +5344,6 @@ static void btrfs_end_bio(struct bio *bio, int err) | |||
| 5328 | } | 5344 | } |
| 5329 | } | 5345 | } |
| 5330 | 5346 | ||
| 5331 | struct async_sched { | ||
| 5332 | struct bio *bio; | ||
| 5333 | int rw; | ||
| 5334 | struct btrfs_fs_info *info; | ||
| 5335 | struct btrfs_work work; | ||
| 5336 | }; | ||
| 5337 | |||
| 5338 | /* | 5347 | /* |
| 5339 | * see run_scheduled_bios for a description of why bios are collected for | 5348 | * see run_scheduled_bios for a description of why bios are collected for |
| 5340 | * async submit. | 5349 | * async submit. |
| @@ -5391,8 +5400,8 @@ static noinline void btrfs_schedule_bio(struct btrfs_root *root, | |||
| 5391 | spin_unlock(&device->io_lock); | 5400 | spin_unlock(&device->io_lock); |
| 5392 | 5401 | ||
| 5393 | if (should_queue) | 5402 | if (should_queue) |
| 5394 | btrfs_queue_worker(&root->fs_info->submit_workers, | 5403 | btrfs_queue_work(root->fs_info->submit_workers, |
| 5395 | &device->work); | 5404 | &device->work); |
| 5396 | } | 5405 | } |
| 5397 | 5406 | ||
| 5398 | static int bio_size_ok(struct block_device *bdev, struct bio *bio, | 5407 | static int bio_size_ok(struct block_device *bdev, struct bio *bio, |
| @@ -5447,6 +5456,9 @@ static void submit_stripe_bio(struct btrfs_root *root, struct btrfs_bio *bbio, | |||
| 5447 | } | 5456 | } |
| 5448 | #endif | 5457 | #endif |
| 5449 | bio->bi_bdev = dev->bdev; | 5458 | bio->bi_bdev = dev->bdev; |
| 5459 | |||
| 5460 | btrfs_bio_counter_inc_noblocked(root->fs_info); | ||
| 5461 | |||
| 5450 | if (async) | 5462 | if (async) |
| 5451 | btrfs_schedule_bio(root, dev, rw, bio); | 5463 | btrfs_schedule_bio(root, dev, rw, bio); |
| 5452 | else | 5464 | else |
| @@ -5515,28 +5527,38 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, | |||
| 5515 | length = bio->bi_iter.bi_size; | 5527 | length = bio->bi_iter.bi_size; |
| 5516 | map_length = length; | 5528 | map_length = length; |
| 5517 | 5529 | ||
| 5530 | btrfs_bio_counter_inc_blocked(root->fs_info); | ||
| 5518 | ret = __btrfs_map_block(root->fs_info, rw, logical, &map_length, &bbio, | 5531 | ret = __btrfs_map_block(root->fs_info, rw, logical, &map_length, &bbio, |
| 5519 | mirror_num, &raid_map); | 5532 | mirror_num, &raid_map); |
| 5520 | if (ret) /* -ENOMEM */ | 5533 | if (ret) { |
| 5534 | btrfs_bio_counter_dec(root->fs_info); | ||
| 5521 | return ret; | 5535 | return ret; |
| 5536 | } | ||
| 5522 | 5537 | ||
| 5523 | total_devs = bbio->num_stripes; | 5538 | total_devs = bbio->num_stripes; |
| 5524 | bbio->orig_bio = first_bio; | 5539 | bbio->orig_bio = first_bio; |
| 5525 | bbio->private = first_bio->bi_private; | 5540 | bbio->private = first_bio->bi_private; |
| 5526 | bbio->end_io = first_bio->bi_end_io; | 5541 | bbio->end_io = first_bio->bi_end_io; |
| 5542 | bbio->fs_info = root->fs_info; | ||
| 5527 | atomic_set(&bbio->stripes_pending, bbio->num_stripes); | 5543 | atomic_set(&bbio->stripes_pending, bbio->num_stripes); |
| 5528 | 5544 | ||
| 5529 | if (raid_map) { | 5545 | if (raid_map) { |
| 5530 | /* In this case, map_length has been set to the length of | 5546 | /* In this case, map_length has been set to the length of |
| 5531 | a single stripe; not the whole write */ | 5547 | a single stripe; not the whole write */ |
| 5532 | if (rw & WRITE) { | 5548 | if (rw & WRITE) { |
| 5533 | return raid56_parity_write(root, bio, bbio, | 5549 | ret = raid56_parity_write(root, bio, bbio, |
| 5534 | raid_map, map_length); | 5550 | raid_map, map_length); |
| 5535 | } else { | 5551 | } else { |
| 5536 | return raid56_parity_recover(root, bio, bbio, | 5552 | ret = raid56_parity_recover(root, bio, bbio, |
| 5537 | raid_map, map_length, | 5553 | raid_map, map_length, |
| 5538 | mirror_num); | 5554 | mirror_num); |
| 5539 | } | 5555 | } |
| 5556 | /* | ||
| 5557 | * FIXME, replace dosen't support raid56 yet, please fix | ||
| 5558 | * it in the future. | ||
| 5559 | */ | ||
| 5560 | btrfs_bio_counter_dec(root->fs_info); | ||
| 5561 | return ret; | ||
| 5540 | } | 5562 | } |
| 5541 | 5563 | ||
| 5542 | if (map_length < length) { | 5564 | if (map_length < length) { |
| @@ -5578,6 +5600,7 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, | |||
| 5578 | async_submit); | 5600 | async_submit); |
| 5579 | dev_nr++; | 5601 | dev_nr++; |
| 5580 | } | 5602 | } |
| 5603 | btrfs_bio_counter_dec(root->fs_info); | ||
| 5581 | return 0; | 5604 | return 0; |
| 5582 | } | 5605 | } |
| 5583 | 5606 | ||
| @@ -5666,7 +5689,7 @@ struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info, | |||
| 5666 | else | 5689 | else |
| 5667 | generate_random_uuid(dev->uuid); | 5690 | generate_random_uuid(dev->uuid); |
| 5668 | 5691 | ||
| 5669 | dev->work.func = pending_bios_fn; | 5692 | btrfs_init_work(&dev->work, pending_bios_fn, NULL, NULL); |
| 5670 | 5693 | ||
| 5671 | return dev; | 5694 | return dev; |
| 5672 | } | 5695 | } |
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 8b3cd142b373..80754f9dd3df 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h | |||
| @@ -192,6 +192,7 @@ typedef void (btrfs_bio_end_io_t) (struct btrfs_bio *bio, int err); | |||
| 192 | 192 | ||
| 193 | struct btrfs_bio { | 193 | struct btrfs_bio { |
| 194 | atomic_t stripes_pending; | 194 | atomic_t stripes_pending; |
| 195 | struct btrfs_fs_info *fs_info; | ||
| 195 | bio_end_io_t *end_io; | 196 | bio_end_io_t *end_io; |
| 196 | struct bio *orig_bio; | 197 | struct bio *orig_bio; |
| 197 | void *private; | 198 | void *private; |
